From e4a60c3e2d1704cda83320cc7a5bf3e2b931af92 Mon Sep 17 00:00:00 2001 From: Ina Date: Mon, 23 Sep 2024 00:18:38 +0200 Subject: [PATCH 1/9] current progress on noise --- analysis/NoiseAnalysis.jl | 13 +++ analysis/noise.jl | 174 ++++++++++++++++++++++++++++++++++ examples/sod_shock_tube_1d.jl | 22 ++++- 3 files changed, 205 insertions(+), 4 deletions(-) create mode 100644 analysis/NoiseAnalysis.jl create mode 100644 analysis/noise.jl diff --git a/analysis/NoiseAnalysis.jl b/analysis/NoiseAnalysis.jl new file mode 100644 index 0000000..67fe601 --- /dev/null +++ b/analysis/NoiseAnalysis.jl @@ -0,0 +1,13 @@ +module NoiseAnalysis + +using Distributions +using LinearAlgebra +using ShockwaveDetection #: FlowData + +export NoiseData +export apply_noise, apply_noise_to_flow +export compare_shock_clusters_over_time, compare_shock_positions_over_time, compare_shock_fits_over_time + +include("noise.jl") + +end diff --git a/analysis/noise.jl b/analysis/noise.jl new file mode 100644 index 0000000..9b8be2c --- /dev/null +++ b/analysis/noise.jl @@ -0,0 +1,174 @@ +using Euler2D +using Distributions +using LinearAlgebra +using ShockwaveProperties +using Unitful +using ShockwaveDetection +#using .NoiseAnalysis +export NoiseData + +""" + struct NoiseData + +A structure to store noise configuration for testing and analysis purposes. + +# Fields +- `intensity::Float64`: The intensity of the noise. +- `distribution::T`: The probability distribution used to generate noise. +""" +struct NoiseData{T} + intensity::Float64 + distribution::T +end + +""" + NoiseData(intensity::Float64, distribution::T) -> NoiseData{T} + +Constructs a `NoiseData` instance with the specified intensity and distribution. + +# Arguments +- `intensity`: A `Float64` representing the intensity of the noise. +- `distribution`: The probability distribution used to generate noise. + +# Returns +- A `NoiseData` instance. +""" +function NoiseData(intensity::Float64, distribution::T) where T + return NoiseData{T}(intensity, distribution) +end + +""" + apply_noise(data::Array{T}, noise_data::NoiseData) -> Array{T} + +Applies noise to a given data array using the noise intensity and distribution from `NoiseData`. + +# Arguments +- `data`: The original data array to which noise will be applied. +- `noise_data`: A `NoiseData` struct containing the noise configuration. + +# Returns +- An array with noise added to the original data. +""" +function apply_noise(data::Array{T}, noise_data::NoiseData) where T + return data .+ noise_data.intensity * rand(noise_data.distribution, size(data)) +end + +""" + apply_noise_to_flow(flow_data::FlowData, noise_data::NoiseData) + +Applies noise to the fields of the `FlowData` struct (density, velocity, pressure) using the noise specified in `NoiseData`. + +# Arguments +- `flow_data`: The original `FlowData` to which noise will be added. +- `noise_data`: A `NoiseData` struct containing the noise configuration. + +# Returns +- A new `FlowData` struct with noise added to the relevant fields. +""" +function apply_noise_to_flow(flow_data::FlowData, noise_data::NoiseData) + noisy_density_field = apply_noise(flow_data.density_field, noise_data) + noisy_velocity_field = apply_noise(flow_data.velocity_field, noise_data) + noisy_pressure_field = apply_noise(flow_data.pressure_field, noise_data) + + # Create a new FlowData struct with noisy fields + return FlowData( + flow_data.ncells, + flow_data.nsteps, + flow_data.bounds, + flow_data.tsteps, + flow_data.u, + noisy_density_field, + noisy_velocity_field, + noisy_pressure_field, + flow_data.mach_to_m_s, + flow_data.cell_ids + ) +end + +""" + compare_shock_clusters_over_time(original_clusters, noisy_clusters) + +Compares shock clusters over time between original and noisy data. + +# Arguments +- `original_clusters`: Shock clusters from the original `FlowData`. +- `noisy_clusters`: Shock clusters from the noisy `FlowData`. + +# Returns +- A measure of the difference between the original and noisy clusters. +""" +function compare_shock_clusters_over_time(original_clusters, noisy_clusters) + diff = original_clusters .- noisy_clusters + return sqrt(mean(diff.^2)) +end + +""" + compare_shock_positions_over_time(original_positions, noisy_positions) + +Compares shock positions over time between original and noisy data. + +# Arguments +- `original_positions`: Shock positions from the original `FlowData`. +- `noisy_positions`: Shock positions from the noisy `FlowData`. + +# Returns +- A measure of the difference between the original and noisy positions. +""" +function compare_shock_positions_over_time(original_positions, noisy_positions) + diffs = Float64[] + + println("Length of original_positions: ", length(original_positions)) + println("Length of noisy_positions: ", length(noisy_positions)) + + for (orig, noisy) in zip(original_positions, noisy_positions) + println("Length of current original: ", length(orig)) + println("Length of current noisy: ", length(noisy)) + + if isempty(orig) && isempty(noisy) + println("Both arrays are empty. Skipping comparison.") + continue + elseif isempty(orig) || isempty(noisy) + println("One of the arrays is empty. Skipping comparison. Original: ", orig, " Noisy: ", noisy) + continue + end + + # Calculate the differences + if length(orig) == length(noisy) + diff = mean((orig .- noisy).^2) + push!(diffs, diff) + else + println("Arrays have different lengths. Original: ", length(orig), " Noisy: ", length(noisy)) + # Optionally handle this case, e.g., calculate differences for the overlapping indices + min_len = min(length(orig), length(noisy)) + diff = mean((orig[1:min_len] .- noisy[1:min_len]).^2) + push!(diffs, diff) + end + end + + if isempty(diffs) + println("No valid differences calculated.") + return NaN + end + + println("Calculated differences: ", diffs) + + return mean(diffs) +end + +""" + compare_shock_fits_over_time(original_fits, noisy_fits) + +Compares shock fits over time between original and noisy data. + +# Arguments +- `original_fits`: Shock fits from the original `FlowData`. +- `noisy_fits`: Shock fits from the noisy `FlowData`. + +# Returns +- A measure of the difference between the original and noisy fits. +""" +function compare_shock_fits_over_time(original_fits, noisy_fits) + diff = original_fits .- noisy_fits + return sqrt(mean(diff.^2)) +end + diff --git a/examples/sod_shock_tube_1d.jl b/examples/sod_shock_tube_1d.jl index 73166c9..2350aec 100644 --- a/examples/sod_shock_tube_1d.jl +++ b/examples/sod_shock_tube_1d.jl @@ -3,17 +3,31 @@ using LinearAlgebra using ShockwaveProperties using Unitful using ShockwaveDetection +include("../analysis/NoiseAnalysis.jl") +using .NoiseAnalysis +using Distributions + # Create a NoiseData instance -#noise_data = NoiseData(0.01, Normal(0, 1)) # 1% noise intensity, Gaussian distribution +noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 1)) # 1% noise intensity, Gaussian distribution flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) +# Apply noise to flow data +noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) + point_detect_algo = GradientShockDetectionAlgo(0.5) -detection = detect(flow_data, point_detect_algo) +#detection = detect(flow_data, point_detect_algo) #plot_shock_fits_over_time(flow_data, detection, true) -shock_positions_over_time = detect(flow_data, point_detect_algo) -anim = create_wave_animation_with_shock(flow_data, detection) \ No newline at end of file +#shock_positions_over_time = detect(flow_data, point_detect_algo) +original_shock_positions = detect(flow_data, point_detect_algo) +noisy_shock_positions = detect(noisy_flow_data, point_detect_algo) +#println("original_shock_positions: ", original_shock_positions) +#println("noisy_shock_positions: ", noisy_shock_positions) +#anim = create_wave_animation_with_shock(flow_data, detection) + +# Detect and compare shock positions with and without noise +shock_diff = NoiseAnalysis.compare_shock_positions_over_time(original_shock_positions, noisy_shock_positions) \ No newline at end of file From a54c9b083c114d02e860a3d7918f89a86b8c481b Mon Sep 17 00:00:00 2001 From: Ina Date: Mon, 23 Sep 2024 21:41:42 +0200 Subject: [PATCH 2/9] implement function for comparing the shock positions over time in 1D and 2D --- analysis/examples/sod_shock_tube_1d.jl | 27 ++++++++ analysis/examples/sod_shock_tube_2d.jl | 19 +++++ analysis/noise.jl | 96 +++++++++++++++++++++----- examples/sod_shock_tube_1d.jl | 23 +----- examples/sod_shock_tube_2d.jl | 2 +- 5 files changed, 127 insertions(+), 40 deletions(-) create mode 100644 analysis/examples/sod_shock_tube_1d.jl create mode 100644 analysis/examples/sod_shock_tube_2d.jl diff --git a/analysis/examples/sod_shock_tube_1d.jl b/analysis/examples/sod_shock_tube_1d.jl new file mode 100644 index 0000000..955e62a --- /dev/null +++ b/analysis/examples/sod_shock_tube_1d.jl @@ -0,0 +1,27 @@ +using Euler2D +using LinearAlgebra +using ShockwaveProperties +using Unitful +using ShockwaveDetection +include("../NoiseAnalysis.jl") +using .NoiseAnalysis +using Distributions + + +# Create a NoiseData instance +noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.1)) # 1% noise intensity, Gaussian distribution + +flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) + +# Apply noise to flow data +noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) + +point_detect_algo = GradientShockDetectionAlgo(0.5) + +original_shock_positions_over_time = detect(flow_data, point_detect_algo) +noisy_shock_positions_over_time = detect(noisy_flow_data, point_detect_algo) +#println("original_shock_positions_over_time: ", original_shock_positions_over_time) +#println("noisy_shock_positions_over_time: ", noisy_shock_positions_over_time) + +# Detect and compare shock positions with and without noise +shock_diff = NoiseAnalysis.compare_shock_positions_over_time_1d(original_shock_positions_over_time, noisy_shock_positions_over_time) \ No newline at end of file diff --git a/analysis/examples/sod_shock_tube_2d.jl b/analysis/examples/sod_shock_tube_2d.jl new file mode 100644 index 0000000..9db33bf --- /dev/null +++ b/analysis/examples/sod_shock_tube_2d.jl @@ -0,0 +1,19 @@ +using ShockwaveDetection +include("../NoiseAnalysis.jl") +using .NoiseAnalysis +using Distributions +ENV["JULIA_NUM_THREADS"] = "4" + +flow_data = FlowData("examples/data/sod_shock_right_2d.tape", false) +noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.1)) +noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) + +point_detect_algo = ImageProcessingShockDetectionAlgo(0.5, :prewitt) +fitting_algo = FittingAlgo(0.1, true) +dbscan_algo = DBSCANAlgo(0.25, 3, 10) + +original_shock_positions_over_time = ShockwaveDetection.detect_shock_points(flow_data, point_detect_algo) +noisy_shock_positions_over_time = ShockwaveDetection.detect_shock_points(noisy_flow_data, point_detect_algo) + +shock_position_difference = NoiseAnalysis.compare_shock_positions_over_time_2d(original_shock_positions_over_time, noisy_shock_positions_over_time) +println("Shock position difference: ", shock_position_difference) diff --git a/analysis/noise.jl b/analysis/noise.jl index 9b8be2c..195104d 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -4,8 +4,6 @@ using LinearAlgebra using ShockwaveProperties using Unitful using ShockwaveDetection -#using .NoiseAnalysis -export NoiseData """ struct NoiseData @@ -86,26 +84,64 @@ function apply_noise_to_flow(flow_data::FlowData, noise_data::NoiseData) end """ - compare_shock_clusters_over_time(original_clusters, noisy_clusters) + compare_shock_positions_over_time_1d(original_positions, noisy_positions) -Compares shock clusters over time between original and noisy data. +Compares shock positions over time between original and noisy data. The function checks if both arrays are empty and skips comparisons accordingly. If one array is empty, it also skips that comparison. +When the lengths differ, it computes the mean difference only for the overlapping elements. # Arguments -- `original_clusters`: Shock clusters from the original `FlowData`. -- `noisy_clusters`: Shock clusters from the noisy `FlowData`. +- `original_positions`: Shock positions from the original `FlowData`. +- `noisy_positions`: Shock positions from the noisy `FlowData`. # Returns -- A measure of the difference between the original and noisy clusters. +- A measure of the difference between the original and noisy positions. """ -function compare_shock_clusters_over_time(original_clusters, noisy_clusters) - diff = original_clusters .- noisy_clusters - return sqrt(mean(diff.^2)) +function compare_shock_positions_over_time_1d(original_positions, noisy_positions) + diffs = Float64[] + + println("Length of original_positions: ", length(original_positions)) + println("Length of noisy_positions: ", length(noisy_positions)) + + for (orig, noisy) in zip(original_positions, noisy_positions) + println("Length of current original: ", length(orig)) + println("Length of current noisy: ", length(noisy)) + + if isempty(orig) && isempty(noisy) + println("Both arrays are empty. Skipping comparison.") + continue + elseif isempty(orig) || isempty(noisy) + println("One of the arrays is empty. Skipping comparison. Original: ", orig, " Noisy: ", noisy) + continue + end + + # Calculate the differences + if length(orig) == length(noisy) + diff = mean((orig .- noisy).^2) + push!(diffs, diff) + else + println("Arrays have different lengths. Original: ", length(orig), " Noisy: ", length(noisy)) + # Optionally handle this case, e.g., calculate differences for the overlapping indices + min_len = min(length(orig), length(noisy)) + diff = mean((orig[1:min_len] .- noisy[1:min_len]).^2) + push!(diffs, diff) + end + end + + if isempty(diffs) + println("No valid differences calculated.") + return NaN + end + + println("Calculated differences: ", diffs) + + return mean(diffs) end """ - compare_shock_positions_over_time(original_positions, noisy_positions) + compare_shock_positions_over_time_2d(original_positions, noisy_positions) -Compares shock positions over time between original and noisy data. +Compares shock positions over time between original and noisy data. The function checks if both arrays are empty and skips comparisons accordingly. If one array is empty, it also skips that comparison. +When the lengths differ, it computes the mean difference only for the overlapping elements. Additionally, the Tuple(ci) converts a CartesianIndex object like CartesianIndex into a tuple, allowing for easy element-wise operations. # Arguments - `original_positions`: Shock positions from the original `FlowData`. @@ -114,7 +150,7 @@ Compares shock positions over time between original and noisy data. # Returns - A measure of the difference between the original and noisy positions. """ -function compare_shock_positions_over_time(original_positions, noisy_positions) +function compare_shock_positions_over_time_2d(original_positions, noisy_positions) diffs = Float64[] println("Length of original_positions: ", length(original_positions)) @@ -132,15 +168,18 @@ function compare_shock_positions_over_time(original_positions, noisy_positions) continue end + # Convert CartesianIndex objects to tuples + orig_positions = [Tuple(ci) for ci in orig] + noisy_positions = [Tuple(ci) for ci in noisy] + # Calculate the differences - if length(orig) == length(noisy) - diff = mean((orig .- noisy).^2) + if length(orig_positions) == length(noisy_positions) + diff = mean(sum((orig_pos .- noisy_pos).^2) for (orig_pos, noisy_pos) in zip(orig_positions, noisy_positions)) push!(diffs, diff) else - println("Arrays have different lengths. Original: ", length(orig), " Noisy: ", length(noisy)) - # Optionally handle this case, e.g., calculate differences for the overlapping indices - min_len = min(length(orig), length(noisy)) - diff = mean((orig[1:min_len] .- noisy[1:min_len]).^2) + println("Arrays have different lengths. Original: ", length(orig_positions), " Noisy: ", length(noisy_positions)) + min_len = min(length(orig_positions), length(noisy_positions)) + diff = mean(sum((orig_positions[i] .- noisy_positions[i]).^2) for i in 1:min_len) push!(diffs, diff) end end @@ -155,6 +194,25 @@ function compare_shock_positions_over_time(original_positions, noisy_positions) return mean(diffs) end + + +""" + compare_shock_clusters_over_time(original_clusters, noisy_clusters) + +Compares shock clusters over time between original and noisy data. + +# Arguments +- `original_clusters`: Shock clusters from the original `FlowData`. +- `noisy_clusters`: Shock clusters from the noisy `FlowData`. + +# Returns +- A measure of the difference between the original and noisy clusters. +""" +function compare_shock_clusters_over_time(original_clusters, noisy_clusters) + diff = original_clusters .- noisy_clusters + return sqrt(mean(diff.^2)) +end + """ compare_shock_fits_over_time(original_fits, noisy_fits) diff --git a/examples/sod_shock_tube_1d.jl b/examples/sod_shock_tube_1d.jl index 2350aec..816775b 100644 --- a/examples/sod_shock_tube_1d.jl +++ b/examples/sod_shock_tube_1d.jl @@ -3,31 +3,14 @@ using LinearAlgebra using ShockwaveProperties using Unitful using ShockwaveDetection -include("../analysis/NoiseAnalysis.jl") -using .NoiseAnalysis -using Distributions - - -# Create a NoiseData instance -noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 1)) # 1% noise intensity, Gaussian distribution flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) -# Apply noise to flow data -noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) - point_detect_algo = GradientShockDetectionAlgo(0.5) -#detection = detect(flow_data, point_detect_algo) +detection = detect(flow_data, point_detect_algo) #plot_shock_fits_over_time(flow_data, detection, true) -#shock_positions_over_time = detect(flow_data, point_detect_algo) -original_shock_positions = detect(flow_data, point_detect_algo) -noisy_shock_positions = detect(noisy_flow_data, point_detect_algo) -#println("original_shock_positions: ", original_shock_positions) -#println("noisy_shock_positions: ", noisy_shock_positions) -#anim = create_wave_animation_with_shock(flow_data, detection) - -# Detect and compare shock positions with and without noise -shock_diff = NoiseAnalysis.compare_shock_positions_over_time(original_shock_positions, noisy_shock_positions) \ No newline at end of file +shock_positions_over_time = detect(flow_data, point_detect_algo) +anim = create_wave_animation_with_shock(flow_data, detection) \ No newline at end of file diff --git a/examples/sod_shock_tube_2d.jl b/examples/sod_shock_tube_2d.jl index 0bc0d3a..4eb7ace 100644 --- a/examples/sod_shock_tube_2d.jl +++ b/examples/sod_shock_tube_2d.jl @@ -10,4 +10,4 @@ dbscan_algo = DBSCANAlgo(0.25, 3, 10) detection = detect(flow_data, point_detect_algo, dbscan_algo, fitting_algo) plot_shock_fits_over_time(flow_data, detection, true) -#create_heatmap_evo_with_shock(flow_data, detection, :density_field, true, false) +#create_heatmap_evo_with_shock(flow_data, detection, :density_field, true, false) \ No newline at end of file From 0a0fafb5ae96381e650cf461ae46703479f59683 Mon Sep 17 00:00:00 2001 From: Ina Date: Fri, 27 Sep 2024 00:03:01 +0200 Subject: [PATCH 3/9] compare shock positions in 1d and compare shock fits in 2d --- analysis/examples/sod_shock_tube_1d.jl | 2 +- analysis/examples/sod_shock_tube_2d.jl | 31 +++- analysis/noise.jl | 202 ++++++++++++++----------- examples/sod_shock_tube_1d.jl | 3 + src/ShockwaveDetection.jl | 2 +- 5 files changed, 145 insertions(+), 95 deletions(-) diff --git a/analysis/examples/sod_shock_tube_1d.jl b/analysis/examples/sod_shock_tube_1d.jl index 955e62a..b1d7116 100644 --- a/analysis/examples/sod_shock_tube_1d.jl +++ b/analysis/examples/sod_shock_tube_1d.jl @@ -16,7 +16,7 @@ flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) # Apply noise to flow data noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) -point_detect_algo = GradientShockDetectionAlgo(0.5) +point_detect_algo = GradientShockDetectionAlgo(0.8) original_shock_positions_over_time = detect(flow_data, point_detect_algo) noisy_shock_positions_over_time = detect(noisy_flow_data, point_detect_algo) diff --git a/analysis/examples/sod_shock_tube_2d.jl b/analysis/examples/sod_shock_tube_2d.jl index 9db33bf..77f2e62 100644 --- a/analysis/examples/sod_shock_tube_2d.jl +++ b/analysis/examples/sod_shock_tube_2d.jl @@ -8,12 +8,35 @@ flow_data = FlowData("examples/data/sod_shock_right_2d.tape", false) noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.1)) noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) -point_detect_algo = ImageProcessingShockDetectionAlgo(0.5, :prewitt) +point_detect_algo = ImageProcessingShockDetectionAlgo(0.8, :prewitt) fitting_algo = FittingAlgo(0.1, true) -dbscan_algo = DBSCANAlgo(0.25, 3, 10) +dbscan_algo = DBSCANAlgo(1.05, 3, 10) + +#original_detection = detect(flow_data, point_detect_algo, dbscan_algo, fitting_algo) +#noisy_detection = detect(noisy_flow_data, point_detect_algo, dbscan_algo, fitting_algo) original_shock_positions_over_time = ShockwaveDetection.detect_shock_points(flow_data, point_detect_algo) noisy_shock_positions_over_time = ShockwaveDetection.detect_shock_points(noisy_flow_data, point_detect_algo) +#println("original_shock_positions_over_time: ", original_shock_positions_over_time) +#println("noisy_shock_positions_over_time: ", noisy_shock_positions_over_time) + +original_shock_clusters_over_time = cluster_shock_points(dbscan_algo, original_shock_positions_over_time, flow_data) +noisy_shock_clusters_over_time = cluster_shock_points(dbscan_algo, noisy_shock_positions_over_time, flow_data) +#println("original_shock_clusters_over_time: ", original_shock_clusters_over_time) +#println("noisy_shock_clusters_over_time: ", noisy_shock_clusters_over_time) + +original_shock_fits_over_time = fit_shock_clusters_over_time(original_shock_clusters_over_time, fitting_algo) +noisy_shock_fits_over_time = fit_shock_clusters_over_time(noisy_shock_clusters_over_time, fitting_algo) +#println("original_shock_fits_over_time: ", original_shock_fits_over_time) +#println("noisy_shock_fits_over_time: ", noisy_shock_fits_over_time) + + +rmse_value = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time) +println("RMSE between fits: ", rmse_value) + +#shock_fits_difference = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time) -shock_position_difference = NoiseAnalysis.compare_shock_positions_over_time_2d(original_shock_positions_over_time, noisy_shock_positions_over_time) -println("Shock position difference: ", shock_position_difference) +#plot_shock_fits_over_time(flow_data, original_detection, true) +#plot_shock_fits_over_time(noisy_flow_data, noisy_detection, true) +#shock_position_difference = NoiseAnalysis.compare_shock_positions_over_time_2d(original_shock_positions_over_time, noisy_shock_positions_over_time) +#println("Shock position difference: ", shock_position_difference) diff --git a/analysis/noise.jl b/analysis/noise.jl index 195104d..c57bfda 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -4,6 +4,7 @@ using LinearAlgebra using ShockwaveProperties using Unitful using ShockwaveDetection +using Statistics """ struct NoiseData @@ -86,18 +87,35 @@ end """ compare_shock_positions_over_time_1d(original_positions, noisy_positions) -Compares shock positions over time between original and noisy data. The function checks if both arrays are empty and skips comparisons accordingly. If one array is empty, it also skips that comparison. -When the lengths differ, it computes the mean difference only for the overlapping elements. +Calculate the mean squared difference between the shock positions from two sets of data over time. -# Arguments -- `original_positions`: Shock positions from the original `FlowData`. -- `noisy_positions`: Shock positions from the noisy `FlowData`. +### Parameters: +- `original_positions::Vector{Vector{Int64}}`: A vector of integer vectors representing the original shock positions over time. Each inner vector corresponds to shock positions at a specific time step. +- `noisy_positions::Vector{Vector{Int64}}`: A vector of integer vectors representing the noisy shock positions over time. Each inner vector corresponds to shock positions at the same specific time step. + +### Returns: +- `Float64`: The mean squared difference between the shock positions of the two input vectors. If no valid comparisons can be made, the function returns `NaN`. + +### Details: +- The function compares each pair of shock position vectors (from `original_positions` and `noisy_positions`). +- If both vectors for a specific time step are empty, the comparison is skipped. +- If one of the vectors is empty, `Inf` is recorded as a difference. +- For vectors of different lengths, the function calculates the mean squared difference for the overlapping elements. +- After calculating differences, `Inf` values are filtered out before computing the mean of the valid differences. +- The function also tracks how many comparisons resulted in `Inf` and reports the proportion of such comparisons. + +### Example: +```julia +original = [[375], [370], [], [367], [366]] +noisy = [[375], [], [368], [367], [365]] + +mean_diff = compare_shock_positions_over_time_1d(original, noisy) +println("Mean Squared Difference: ", mean_diff) -# Returns -- A measure of the difference between the original and noisy positions. """ function compare_shock_positions_over_time_1d(original_positions, noisy_positions) diffs = Float64[] + inf_count = 0 println("Length of original_positions: ", length(original_positions)) println("Length of noisy_positions: ", length(noisy_positions)) @@ -109,124 +127,130 @@ function compare_shock_positions_over_time_1d(original_positions, noisy_position if isempty(orig) && isempty(noisy) println("Both arrays are empty. Skipping comparison.") continue - elseif isempty(orig) || isempty(noisy) - println("One of the arrays is empty. Skipping comparison. Original: ", orig, " Noisy: ", noisy) + elseif isempty(orig) + println("Original array is empty. Recording Inf as a difference.") + push!(diffs, Inf) + inf_count += 1 + continue + elseif isempty(noisy) + println("Noisy array is empty. Recording Inf as a difference.") + push!(diffs, Inf) + inf_count += 1 continue end - # Calculate the differences + # Calculate the differences for non-empty arrays if length(orig) == length(noisy) diff = mean((orig .- noisy).^2) push!(diffs, diff) else println("Arrays have different lengths. Original: ", length(orig), " Noisy: ", length(noisy)) - # Optionally handle this case, e.g., calculate differences for the overlapping indices min_len = min(length(orig), length(noisy)) diff = mean((orig[1:min_len] .- noisy[1:min_len]).^2) push!(diffs, diff) end end - if isempty(diffs) + # Remove any Inf values from diffs before calculating mean + valid_diffs = filter(!isinf, diffs) + + if isempty(valid_diffs) println("No valid differences calculated.") return NaN end + mean_diff = mean(valid_diffs) + inf_ratio = inf_count / length(original_positions) # Calculate ratio based on original count + println("Calculated differences: ", diffs) + println("Mean of valid differences: ", mean_diff) + println("Number of comparisons with missing data: ", inf_count) + println("Proportion of comparisons with missing data: ", inf_ratio) - return mean(diffs) + return mean_diff end """ - compare_shock_positions_over_time_2d(original_positions, noisy_positions) - -Compares shock positions over time between original and noisy data. The function checks if both arrays are empty and skips comparisons accordingly. If one array is empty, it also skips that comparison. -When the lengths differ, it computes the mean difference only for the overlapping elements. Additionally, the Tuple(ci) converts a CartesianIndex object like CartesianIndex into a tuple, allowing for easy element-wise operations. + compare_shock_fits_over_time(original_fit, noisy_fit) -# Arguments -- `original_positions`: Shock positions from the original `FlowData`. -- `noisy_positions`: Shock positions from the noisy `FlowData`. - -# Returns -- A measure of the difference between the original and noisy positions. -""" -function compare_shock_positions_over_time_2d(original_positions, noisy_positions) - diffs = Float64[] +Compare the parameters of original and noisy shock fits over time by calculating the root mean square error (RMSE) between them. - println("Length of original_positions: ", length(original_positions)) - println("Length of noisy_positions: ", length(noisy_positions)) +### Parameters: +- `original_fit::Vector{Vector{Union{Nothing, Fitting}}}`: A nested vector of fitting instances representing the original shock fits. Each inner vector corresponds to shock fits at a specific time step. +- `noisy_fit::Vector{Vector{Union{Nothing, Fitting}}}`: A nested vector of fitting instances representing the noisy shock fits. Each inner vector corresponds to shock fits at the same specific time step. - for (orig, noisy) in zip(original_positions, noisy_positions) - println("Length of current original: ", length(orig)) - println("Length of current noisy: ", length(noisy)) +### Returns: +- `Float64`: The root mean square error (RMSE) between the extracted parameters of the original and noisy fits. - if isempty(orig) && isempty(noisy) - println("Both arrays are empty. Skipping comparison.") - continue - elseif isempty(orig) || isempty(noisy) - println("One of the arrays is empty. Skipping comparison. Original: ", orig, " Noisy: ", noisy) - continue - end +### Details: +- The function extracts the first parameter from each fitting instance (if valid) and compares the parameters from both original and noisy fits. +- If there are any missing parameters (instances of `nothing`), they are counted and reported. +- If the lengths of the parameter vectors differ, the shorter vector is padded with zeroes to match the length of the longer vector. +- The function computes the RMSE based on the differences between the parameter vectors. - # Convert CartesianIndex objects to tuples - orig_positions = [Tuple(ci) for ci in orig] - noisy_positions = [Tuple(ci) for ci in noisy] +### Example: +```julia +original_fits = [[Fitting(params1), nothing], [Fitting(params2), Fitting(params3)]] +noisy_fits = [[Fitting(params4)], [Fitting(params5), nothing]] - # Calculate the differences - if length(orig_positions) == length(noisy_positions) - diff = mean(sum((orig_pos .- noisy_pos).^2) for (orig_pos, noisy_pos) in zip(orig_positions, noisy_positions)) - push!(diffs, diff) - else - println("Arrays have different lengths. Original: ", length(orig_positions), " Noisy: ", length(noisy_positions)) - min_len = min(length(orig_positions), length(noisy_positions)) - diff = mean(sum((orig_positions[i] .- noisy_positions[i]).^2) for i in 1:min_len) - push!(diffs, diff) +rmse = compare_shock_fits_over_time(original_fits, noisy_fits) +println("RMSE between fits: ", rmse) +""" +function compare_shock_fits_over_time(original_fit, noisy_fit) + # Initialize parameter storage + original_params = Float64[] + noisy_params = Float64[] + missing_count = 0 + + # Helper function to extract parameters from nested arrays + function extract_params(fits, params) + for fit_vector in fits + for fit in fit_vector + if fit !== nothing # Ensure we have a valid Fitting instance + push!(params, fit.parameters[1]) # Assuming you want the first parameter + end + end end end - if isempty(diffs) - println("No valid differences calculated.") - return NaN - end - - println("Calculated differences: ", diffs) - - return mean(diffs) -end - - - -""" - compare_shock_clusters_over_time(original_clusters, noisy_clusters) + # Extract parameters for original and noisy fits + extract_params(original_fit, original_params) + extract_params(noisy_fit, noisy_params) -Compares shock clusters over time between original and noisy data. + missing_count = abs(length(original_params)-length(noisy_params)) + # Check the extracted parameters + println("Extracted Original Parameters: ", original_params) + println("Extracted Noisy Parameters: ", noisy_params) -# Arguments -- `original_clusters`: Shock clusters from the original `FlowData`. -- `noisy_clusters`: Shock clusters from the noisy `FlowData`. + println("Length of Original Parameters: ", length(original_params)) + println("Length of Noisy Parameters: ", length(noisy_params)) + println("Number of Missing Parameters: ", missing_count) -# Returns -- A measure of the difference between the original and noisy clusters. -""" -function compare_shock_clusters_over_time(original_clusters, noisy_clusters) - diff = original_clusters .- noisy_clusters - return sqrt(mean(diff.^2)) -end - -""" - compare_shock_fits_over_time(original_fits, noisy_fits) + # Ensure both parameter vectors are not empty + if isempty(original_params) || isempty(noisy_params) + error("No valid Fitting instances found in the inputs.") + end -Compares shock fits over time between original and noisy data. + # Pad the shorter array with zeroes to match lengths + if length(original_params) > length(noisy_params) + padding = fill(0.0, length(original_params) - length(noisy_params)) + append!(noisy_params, padding) + elseif length(noisy_params) > length(original_params) + padding = fill(0.0, length(noisy_params) - length(original_params)) + append!(original_params, padding) + end -# Arguments -- `original_fits`: Shock fits from the original `FlowData`. -- `noisy_fits`: Shock fits from the noisy `FlowData`. + # Check the new lengths after padding + println("Padded Original Parameters: ", original_params) + println("Padded Noisy Parameters: ", noisy_params) + println("Length of Padded Original Parameters: ", length(original_params)) + println("Length of Padded Noisy Parameters: ", length(noisy_params)) -# Returns -- A measure of the difference between the original and noisy fits. -""" -function compare_shock_fits_over_time(original_fits, noisy_fits) - diff = original_fits .- noisy_fits - return sqrt(mean(diff.^2)) -end + # Calculate the differences + differences = original_params .- noisy_params + + # Calculate the RMSE (no NaN values, padded with 0s) + rmse = sqrt(mean(differences .^ 2)) + return rmse +end \ No newline at end of file diff --git a/examples/sod_shock_tube_1d.jl b/examples/sod_shock_tube_1d.jl index 816775b..73166c9 100644 --- a/examples/sod_shock_tube_1d.jl +++ b/examples/sod_shock_tube_1d.jl @@ -4,6 +4,9 @@ using ShockwaveProperties using Unitful using ShockwaveDetection +# Create a NoiseData instance +#noise_data = NoiseData(0.01, Normal(0, 1)) # 1% noise intensity, Gaussian distribution + flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) point_detect_algo = GradientShockDetectionAlgo(0.5) diff --git a/src/ShockwaveDetection.jl b/src/ShockwaveDetection.jl index 7329f08..ff8d8b8 100644 --- a/src/ShockwaveDetection.jl +++ b/src/ShockwaveDetection.jl @@ -9,7 +9,7 @@ export GradientShockDetectionAlgo export ImageProcessingShockDetectionAlgo export DBSCANAlgo export cluster_shock_points -export FittingAlgo, fit_shock_clusters_over_time, calculate_normal_vector +export Fitting, FittingAlgo, fit_shock_clusters_over_time, calculate_normal_vector export create_wave_animation, create_wave_animation_with_shock, create_heatmap_evo, create_heatmap_evo_with_shock, plot_shock_fits_over_time export ShockDetectionResult2D, detect export ShockDetectionResult1D, detect From 035d7da7be8838b10dba15fee09b93b249eb00f4 Mon Sep 17 00:00:00 2001 From: Ina Date: Sat, 28 Sep 2024 01:03:26 +0200 Subject: [PATCH 4/9] changes to comparisson functions --- analysis/examples/sod_shock_tube_1d.jl | 2 +- analysis/noise.jl | 200 +++++++++++++------------ examples/sod_shock_tube_1d.jl | 3 - 3 files changed, 104 insertions(+), 101 deletions(-) diff --git a/analysis/examples/sod_shock_tube_1d.jl b/analysis/examples/sod_shock_tube_1d.jl index b1d7116..cb3cdbb 100644 --- a/analysis/examples/sod_shock_tube_1d.jl +++ b/analysis/examples/sod_shock_tube_1d.jl @@ -24,4 +24,4 @@ noisy_shock_positions_over_time = detect(noisy_flow_data, point_detect_algo) #println("noisy_shock_positions_over_time: ", noisy_shock_positions_over_time) # Detect and compare shock positions with and without noise -shock_diff = NoiseAnalysis.compare_shock_positions_over_time_1d(original_shock_positions_over_time, noisy_shock_positions_over_time) \ No newline at end of file +shock_diff = NoiseAnalysis.compare_shock_positions_over_time_1d(original_shock_positions_over_time, noisy_shock_positions_over_time, 10.0, 100.0) \ No newline at end of file diff --git a/analysis/noise.jl b/analysis/noise.jl index c57bfda..9683ccc 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -85,91 +85,103 @@ function apply_noise_to_flow(flow_data::FlowData, noise_data::NoiseData) end """ - compare_shock_positions_over_time_1d(original_positions, noisy_positions) + compare_shock_positions_over_time_1d(original_positions::Vector{Vector{Float64}}, + noisy_positions::Vector{Vector{Float64}}) -Calculate the mean squared difference between the shock positions from two sets of data over time. +Compare shock positions between original and noisy datasets over time in 1D, using proximity-based matching to better quantify errors caused by false positives, missed detections, or noisy deviations. -### Parameters: -- `original_positions::Vector{Vector{Int64}}`: A vector of integer vectors representing the original shock positions over time. Each inner vector corresponds to shock positions at a specific time step. -- `noisy_positions::Vector{Vector{Int64}}`: A vector of integer vectors representing the noisy shock positions over time. Each inner vector corresponds to shock positions at the same specific time step. - -### Returns: -- `Float64`: The mean squared difference between the shock positions of the two input vectors. If no valid comparisons can be made, the function returns `NaN`. - -### Details: -- The function compares each pair of shock position vectors (from `original_positions` and `noisy_positions`). -- If both vectors for a specific time step are empty, the comparison is skipped. -- If one of the vectors is empty, `Inf` is recorded as a difference. -- For vectors of different lengths, the function calculates the mean squared difference for the overlapping elements. -- After calculating differences, `Inf` values are filtered out before computing the mean of the valid differences. -- The function also tracks how many comparisons resulted in `Inf` and reports the proportion of such comparisons. +# Arguments +- `original_positions::Vector{Vector{Float64}}`: + A vector of vectors where each inner vector contains shock positions for a specific time step in the original data. Each time step may have multiple shock positions. + +- `noisy_positions::Vector{Vector{Float64}}`: + A vector of vectors where each inner vector contains shock positions for a specific time step in the noisy data. Similar to `original_positions`, but potentially with noise. -### Example: -```julia -original = [[375], [370], [], [367], [366]] -noisy = [[375], [], [368], [367], [365]] +# Returns +- `rmse::Float64`: + The Root Mean Squared Error (RMSE) between the matched shock positions from the original and noisy datasets. This metric quantifies the deviation in shock positions due to noise. + +- `missing_count::Int`: + The total count of shock positions that were missing or unmatched between the original and noisy datasets. This includes cases where shocks were present in the original data but absent in the noisy data, or vice versa. + +# Methodology +1. For each time step, the function compares shock positions between the `original_positions` and `noisy_positions`. +2. Instead of assuming the shock positions correspond one-to-one in both datasets, each shock in the original data is matched to the **closest** shock in the noisy data. This accounts for variations in the number and order of shock positions due to noise. +3. **Proximity Matching**: + - The `findmin` function is used to find the closest shock in the noisy data for each shock in the original data. If the distance between the matched positions is within a certain threshold (set to `10` units), the squared difference is computed and stored. + - If no close match is found, the function applies a **large penalty** (set to `100.0`) to reflect a significant mismatch or false positive. +4. **Handling False Positives**: + - Any extra shock positions in the noisy data that don't correspond to any shocks in the original data are penalized similarly. These represent **false detections** due to noise. +5. **Handling Missed Detections**: + - Shock positions in the original data that are missing from the noisy data are also penalized with the large penalty. These represent **missed detections**. +6. After all time steps are processed, the function calculates the RMSE over the matched differences and any penalties. +7. The function also tracks the total count of unmatched (or missing) shocks for additional insight. +""" +# Helper function to find the closest match +function find_closest_match(value, noisy_positions) + if isempty(noisy_positions) + return nothing, nothing + end -mean_diff = compare_shock_positions_over_time_1d(original, noisy) -println("Mean Squared Difference: ", mean_diff) + # Find the closest position in noisy_positions + differences = abs.(noisy_positions .- value) + min_diff, min_idx = findmin(differences) + + # Return the closest noisy position and its index + return noisy_positions[min_idx], min_idx +end -""" -function compare_shock_positions_over_time_1d(original_positions, noisy_positions) +function compare_shock_positions_over_time_1d(original_positions, noisy_positions, threshold=10.0, large_penalty=100.0) diffs = Float64[] - inf_count = 0 - - println("Length of original_positions: ", length(original_positions)) - println("Length of noisy_positions: ", length(noisy_positions)) + missing_count = 0 for (orig, noisy) in zip(original_positions, noisy_positions) - println("Length of current original: ", length(orig)) - println("Length of current noisy: ", length(noisy)) - if isempty(orig) && isempty(noisy) - println("Both arrays are empty. Skipping comparison.") + # Both are empty, no shocks detected in both original and noisy continue - elseif isempty(orig) - println("Original array is empty. Recording Inf as a difference.") - push!(diffs, Inf) - inf_count += 1 + elseif isempty(noisy) && !isempty(orig) + # Noisy missed all shocks, apply large penalty for each missing shock + missing_count += length(orig) + for _ in orig + push!(diffs, large_penalty) + end continue - elseif isempty(noisy) - println("Noisy array is empty. Recording Inf as a difference.") - push!(diffs, Inf) - inf_count += 1 + elseif isempty(orig) + # No original shock, but noisy detected something (false positives) + # This may not count as a "missing", but it could be penalized differently + missing_count += length(noisy) + for _ in noisy + push!(diffs, large_penalty) + end continue end - # Calculate the differences for non-empty arrays - if length(orig) == length(noisy) - diff = mean((orig .- noisy).^2) - push!(diffs, diff) - else - println("Arrays have different lengths. Original: ", length(orig), " Noisy: ", length(noisy)) - min_len = min(length(orig), length(noisy)) - diff = mean((orig[1:min_len] .- noisy[1:min_len]).^2) - push!(diffs, diff) + # Match shocks between original and noisy + for orig_pos in orig + closest_noisy_pos, closest_idx = find_closest_match(orig_pos, noisy) + if isnothing(closest_noisy_pos) || abs(orig_pos - closest_noisy_pos) > threshold + # No match found within threshold, apply penalty + push!(diffs, large_penalty) + missing_count += 1 + else + # Calculate squared difference for valid matches + diff = (orig_pos - closest_noisy_pos)^2 + push!(diffs, diff) + # Remove the matched noisy position to avoid double matching + deleteat!(noisy, closest_idx) + end end end - # Remove any Inf values from diffs before calculating mean - valid_diffs = filter(!isinf, diffs) - - if isempty(valid_diffs) - println("No valid differences calculated.") - return NaN + if isempty(diffs) + return NaN, missing_count end - mean_diff = mean(valid_diffs) - inf_ratio = inf_count / length(original_positions) # Calculate ratio based on original count - - println("Calculated differences: ", diffs) - println("Mean of valid differences: ", mean_diff) - println("Number of comparisons with missing data: ", inf_count) - println("Proportion of comparisons with missing data: ", inf_ratio) - - return mean_diff + rmse = sqrt(mean(diffs)) + return rmse, missing_count end + """ compare_shock_fits_over_time(original_fit, noisy_fit) @@ -187,27 +199,20 @@ Compare the parameters of original and noisy shock fits over time by calculating - If there are any missing parameters (instances of `nothing`), they are counted and reported. - If the lengths of the parameter vectors differ, the shorter vector is padded with zeroes to match the length of the longer vector. - The function computes the RMSE based on the differences between the parameter vectors. - -### Example: -```julia -original_fits = [[Fitting(params1), nothing], [Fitting(params2), Fitting(params3)]] -noisy_fits = [[Fitting(params4)], [Fitting(params5), nothing]] - -rmse = compare_shock_fits_over_time(original_fits, noisy_fits) -println("RMSE between fits: ", rmse) """ function compare_shock_fits_over_time(original_fit, noisy_fit) - # Initialize parameter storage + # Initialize parameter storage and missing count original_params = Float64[] noisy_params = Float64[] - missing_count = 0 + missing_count = 0 # Count how many parameters are missing # Helper function to extract parameters from nested arrays function extract_params(fits, params) for fit_vector in fits for fit in fit_vector - if fit !== nothing # Ensure we have a valid Fitting instance - push!(params, fit.parameters[1]) # Assuming you want the first parameter + if fit !== nothing + # Append all elements of fit.parameters to params + append!(params, fit.parameters) end end end @@ -217,40 +222,41 @@ function compare_shock_fits_over_time(original_fit, noisy_fit) extract_params(original_fit, original_params) extract_params(noisy_fit, noisy_params) - missing_count = abs(length(original_params)-length(noisy_params)) # Check the extracted parameters println("Extracted Original Parameters: ", original_params) println("Extracted Noisy Parameters: ", noisy_params) - println("Length of Original Parameters: ", length(original_params)) - println("Length of Noisy Parameters: ", length(noisy_params)) - println("Number of Missing Parameters: ", missing_count) + # Find the maximum length of parameter lists (for padding) + max_len = max(length(original_params), length(noisy_params)) - # Ensure both parameter vectors are not empty - if isempty(original_params) || isempty(noisy_params) - error("No valid Fitting instances found in the inputs.") + # Pad the shorter array with NaNs (or another value like Inf) to indicate missing parameters + if length(original_params) < max_len + missing_count += max_len - length(original_params) # Increment missing count + padding = fill(NaN, max_len - length(original_params)) + append!(original_params, padding) end - # Pad the shorter array with zeroes to match lengths - if length(original_params) > length(noisy_params) - padding = fill(0.0, length(original_params) - length(noisy_params)) + if length(noisy_params) < max_len + missing_count += max_len - length(noisy_params) # Increment missing count + padding = fill(NaN, max_len - length(noisy_params)) append!(noisy_params, padding) - elseif length(noisy_params) > length(original_params) - padding = fill(0.0, length(noisy_params) - length(original_params)) - append!(original_params, padding) end # Check the new lengths after padding println("Padded Original Parameters: ", original_params) println("Padded Noisy Parameters: ", noisy_params) - println("Length of Padded Original Parameters: ", length(original_params)) - println("Length of Padded Noisy Parameters: ", length(noisy_params)) - # Calculate the differences + # Calculate the differences while ignoring NaNs differences = original_params .- noisy_params - - # Calculate the RMSE (no NaN values, padded with 0s) - rmse = sqrt(mean(differences .^ 2)) + filtered_differences = filter(!isnan, differences) # Remove NaNs for RMSE calculation - return rmse -end \ No newline at end of file + if isempty(filtered_differences) + println("No valid parameter differences to calculate RMSE.") + return NaN, missing_count # If all values were missing + end + + # Calculate the RMSE (ignoring NaN values) + rmse = sqrt(mean(filtered_differences .^ 2)) + + return rmse, missing_count +end diff --git a/examples/sod_shock_tube_1d.jl b/examples/sod_shock_tube_1d.jl index 73166c9..816775b 100644 --- a/examples/sod_shock_tube_1d.jl +++ b/examples/sod_shock_tube_1d.jl @@ -4,9 +4,6 @@ using ShockwaveProperties using Unitful using ShockwaveDetection -# Create a NoiseData instance -#noise_data = NoiseData(0.01, Normal(0, 1)) # 1% noise intensity, Gaussian distribution - flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) point_detect_algo = GradientShockDetectionAlgo(0.5) From ec4a1fbdb8244d7843d396a77efc67ac0c1b6b68 Mon Sep 17 00:00:00 2001 From: Ina Date: Sun, 29 Sep 2024 01:15:07 +0200 Subject: [PATCH 5/9] adjustment on the comparison algorithm --- analysis/examples/sod_shock_tube_1d.jl | 4 +- analysis/examples/sod_shock_tube_2d.jl | 4 +- analysis/noise.jl | 162 ++++++++++++------------- src/ShockwaveDetection.jl | 2 +- 4 files changed, 83 insertions(+), 89 deletions(-) diff --git a/analysis/examples/sod_shock_tube_1d.jl b/analysis/examples/sod_shock_tube_1d.jl index cb3cdbb..8184806 100644 --- a/analysis/examples/sod_shock_tube_1d.jl +++ b/analysis/examples/sod_shock_tube_1d.jl @@ -9,7 +9,7 @@ using Distributions # Create a NoiseData instance -noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.1)) # 1% noise intensity, Gaussian distribution +noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.7)) # 1% noise intensity, Gaussian distribution flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) @@ -24,4 +24,4 @@ noisy_shock_positions_over_time = detect(noisy_flow_data, point_detect_algo) #println("noisy_shock_positions_over_time: ", noisy_shock_positions_over_time) # Detect and compare shock positions with and without noise -shock_diff = NoiseAnalysis.compare_shock_positions_over_time_1d(original_shock_positions_over_time, noisy_shock_positions_over_time, 10.0, 100.0) \ No newline at end of file +shock_diff = NoiseAnalysis.compare_shock_positions_over_time_1d(original_shock_positions_over_time, noisy_shock_positions_over_time; threshold=10.0) \ No newline at end of file diff --git a/analysis/examples/sod_shock_tube_2d.jl b/analysis/examples/sod_shock_tube_2d.jl index 77f2e62..56e20de 100644 --- a/analysis/examples/sod_shock_tube_2d.jl +++ b/analysis/examples/sod_shock_tube_2d.jl @@ -10,7 +10,7 @@ noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) point_detect_algo = ImageProcessingShockDetectionAlgo(0.8, :prewitt) fitting_algo = FittingAlgo(0.1, true) -dbscan_algo = DBSCANAlgo(1.05, 3, 10) +dbscan_algo = DBSCANAlgo(0.95, 3, 10) #original_detection = detect(flow_data, point_detect_algo, dbscan_algo, fitting_algo) #noisy_detection = detect(noisy_flow_data, point_detect_algo, dbscan_algo, fitting_algo) @@ -31,7 +31,7 @@ noisy_shock_fits_over_time = fit_shock_clusters_over_time(noisy_shock_clusters_o #println("noisy_shock_fits_over_time: ", noisy_shock_fits_over_time) -rmse_value = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time) +rmse_value = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data; threshold=10.0) println("RMSE between fits: ", rmse_value) #shock_fits_difference = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time) diff --git a/analysis/noise.jl b/analysis/noise.jl index 9683ccc..a36a8a4 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -131,37 +131,45 @@ function find_closest_match(value, noisy_positions) return noisy_positions[min_idx], min_idx end -function compare_shock_positions_over_time_1d(original_positions, noisy_positions, threshold=10.0, large_penalty=100.0) - diffs = Float64[] - missing_count = 0 +# Helper function to find the closest match for a shock in the noisy data +function find_closest_match(value, noisy_positions) + if isempty(noisy_positions) + return nothing, nothing + end + + # Find the closest position in noisy_positions + differences = abs.(noisy_positions .- value) + min_diff, min_idx = findmin(differences) + + # Return the closest noisy position and its index + return noisy_positions[min_idx], min_idx +end + +# Main function to compare 1D shock positions over time +function compare_shock_positions_over_time_1d(original_positions, noisy_positions; threshold=10.0) + diffs = Float64[] # To store the differences between shocks + missing_count = 0 # Track the number of missing shocks for (orig, noisy) in zip(original_positions, noisy_positions) if isempty(orig) && isempty(noisy) - # Both are empty, no shocks detected in both original and noisy + # No shocks in both original and noisy, no comparison needed continue elseif isempty(noisy) && !isempty(orig) - # Noisy missed all shocks, apply large penalty for each missing shock - missing_count += length(orig) - for _ in orig - push!(diffs, large_penalty) - end + # No shocks detected in noisy but there are shocks in original + missing_count += length(orig) # Count all as missing continue - elseif isempty(orig) - # No original shock, but noisy detected something (false positives) - # This may not count as a "missing", but it could be penalized differently - missing_count += length(noisy) - for _ in noisy - push!(diffs, large_penalty) - end + elseif isempty(orig) && !isempty(noisy) + # Original has no shocks, but noisy has extra shocks (false positives) + missing_count += length(noisy) # Count all noisy as false detections continue end # Match shocks between original and noisy for orig_pos in orig closest_noisy_pos, closest_idx = find_closest_match(orig_pos, noisy) + if isnothing(closest_noisy_pos) || abs(orig_pos - closest_noisy_pos) > threshold - # No match found within threshold, apply penalty - push!(diffs, large_penalty) + # No match found within the threshold, consider it missing missing_count += 1 else # Calculate squared difference for valid matches @@ -171,92 +179,78 @@ function compare_shock_positions_over_time_1d(original_positions, noisy_position deleteat!(noisy, closest_idx) end end + + # Any remaining noisy positions are unmatched, count as false positives + if !isempty(noisy) + missing_count += length(noisy) + end end + # If there are no valid differences to compute RMSE, return NaN and the count of missing positions if isempty(diffs) return NaN, missing_count end + # Calculate RMSE only on the valid matched differences rmse = sqrt(mean(diffs)) return rmse, missing_count end - """ - compare_shock_fits_over_time(original_fit, noisy_fit) - -Compare the parameters of original and noisy shock fits over time by calculating the root mean square error (RMSE) between them. - -### Parameters: -- `original_fit::Vector{Vector{Union{Nothing, Fitting}}}`: A nested vector of fitting instances representing the original shock fits. Each inner vector corresponds to shock fits at a specific time step. -- `noisy_fit::Vector{Vector{Union{Nothing, Fitting}}}`: A nested vector of fitting instances representing the noisy shock fits. Each inner vector corresponds to shock fits at the same specific time step. - -### Returns: -- `Float64`: The root mean square error (RMSE) between the extracted parameters of the original and noisy fits. - -### Details: -- The function extracts the first parameter from each fitting instance (if valid) and compares the parameters from both original and noisy fits. -- If there are any missing parameters (instances of `nothing`), they are counted and reported. -- If the lengths of the parameter vectors differ, the shorter vector is padded with zeroes to match the length of the longer vector. -- The function computes the RMSE based on the differences between the parameter vectors. """ -function compare_shock_fits_over_time(original_fit, noisy_fit) - # Initialize parameter storage and missing count - original_params = Float64[] - noisy_params = Float64[] - missing_count = 0 # Count how many parameters are missing - - # Helper function to extract parameters from nested arrays - function extract_params(fits, params) - for fit_vector in fits - for fit in fit_vector - if fit !== nothing - # Append all elements of fit.parameters to params - append!(params, fit.parameters) - end - end - end +# Helper function to calculate residuals between fit and original data points from the shock point detection +function calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) + residuals = Float64[] + + for i in eachindex(data_points) + fitted_value = fit.model(data_points[i][1], fit.parameters) # Call the model function with parameters + actual_value = data_points[i][2] # Actual y-value from the data + println("Fitted value: ", fitted_value, " Actual value: ", actual_value) + push!(residuals, abs(fitted_value - actual_value)) end - # Extract parameters for original and noisy fits - extract_params(original_fit, original_params) - extract_params(noisy_fit, noisy_params) + return residuals +end - # Check the extracted parameters - println("Extracted Original Parameters: ", original_params) - println("Extracted Noisy Parameters: ", noisy_params) +# Main function to compare fits over time by analyzing residuals rather than raw parameters +function compare_shock_fits_over_time(original_fit, noisy_fit, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data; threshold=10.0) + bounds = flow_data.bounds + ncells = flow_data.ncells + nsteps = flow_data.nsteps + rmse_results = Vector{Float64}(undef, nsteps) + data_points = Vector{Any}(undef, nsteps) - # Find the maximum length of parameter lists (for padding) - max_len = max(length(original_params), length(noisy_params)) + # Define the x-range + x = range(bounds[1][1], bounds[1][2], length=ncells[1]) - # Pad the shorter array with NaNs (or another value like Inf) to indicate missing parameters - if length(original_params) < max_len - missing_count += max_len - length(original_params) # Increment missing count - padding = fill(NaN, max_len - length(original_params)) - append!(original_params, padding) - end + # Define the y-range + y = range(bounds[2][1], bounds[2][2], length=ncells[2]) - if length(noisy_params) < max_len - missing_count += max_len - length(noisy_params) # Increment missing count - padding = fill(NaN, max_len - length(noisy_params)) - append!(noisy_params, padding) - end + for t in eachindex(nsteps) # Use eachindex for time steps + if isempty(original_shock_positions_over_time[t]) + rmse_results[t] = NaN # No shock positions, set RMSE as NaN + data_points[t] = [] # No data points to analyze + continue + end - # Check the new lengths after padding - println("Padded Original Parameters: ", original_params) - println("Padded Noisy Parameters: ", noisy_params) + # Transform shock positions to Cartesian coordinates + original_data_points[t] = ShockwaveDetection.cartesian_index_to_xy(original_shock_positions_over_time[t], x, y) + noisy_data_points[t] = ShockwaveDetection.cartesian_index_to_xy(noisy_shock_positions_over_time[t], x, y) - # Calculate the differences while ignoring NaNs - differences = original_params .- noisy_params - filtered_differences = filter(!isnan, differences) # Remove NaNs for RMSE calculation + # Calculate residuals for original and noisy fits + original_residuals = calculate_residuals(original_fit[t], original_data_points[t]) + noisy_residuals = calculate_residuals(noisy_fit[t], noisy_data_points[t]) - if isempty(filtered_differences) - println("No valid parameter differences to calculate RMSE.") - return NaN, missing_count # If all values were missing - end + if isempty(original_residuals) || isempty(noisy_residuals) + error("No valid residuals found for the input fits.") + end - # Calculate the RMSE (ignoring NaN values) - rmse = sqrt(mean(filtered_differences .^ 2)) + # Compare the residuals to see how well the fits match the actual data points + residual_diffs = abs.(original_residuals .- noisy_residuals) - return rmse, missing_count -end + # Calculate the root mean square error between residuals + rmse_results[t] = sqrt(mean(residual_diffs.^2)) + end + + return rmse_results +end \ No newline at end of file diff --git a/src/ShockwaveDetection.jl b/src/ShockwaveDetection.jl index ff8d8b8..7329f08 100644 --- a/src/ShockwaveDetection.jl +++ b/src/ShockwaveDetection.jl @@ -9,7 +9,7 @@ export GradientShockDetectionAlgo export ImageProcessingShockDetectionAlgo export DBSCANAlgo export cluster_shock_points -export Fitting, FittingAlgo, fit_shock_clusters_over_time, calculate_normal_vector +export FittingAlgo, fit_shock_clusters_over_time, calculate_normal_vector export create_wave_animation, create_wave_animation_with_shock, create_heatmap_evo, create_heatmap_evo_with_shock, plot_shock_fits_over_time export ShockDetectionResult2D, detect export ShockDetectionResult1D, detect From 9c1eb6be22290b40c06a1eeb09ca5658263d33d6 Mon Sep 17 00:00:00 2001 From: Ina Date: Sun, 29 Sep 2024 20:46:08 +0200 Subject: [PATCH 6/9] changes to the comparison algorithm --- analysis/examples/sod_shock_tube_1d.jl | 2 +- analysis/examples/sod_shock_tube_2d.jl | 6 +- analysis/noise.jl | 230 ++++++++++++++++--------- 3 files changed, 148 insertions(+), 90 deletions(-) diff --git a/analysis/examples/sod_shock_tube_1d.jl b/analysis/examples/sod_shock_tube_1d.jl index 8184806..c083bd4 100644 --- a/analysis/examples/sod_shock_tube_1d.jl +++ b/analysis/examples/sod_shock_tube_1d.jl @@ -9,7 +9,7 @@ using Distributions # Create a NoiseData instance -noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.7)) # 1% noise intensity, Gaussian distribution +noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.2)) #Gaussian distribution flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) diff --git a/analysis/examples/sod_shock_tube_2d.jl b/analysis/examples/sod_shock_tube_2d.jl index 56e20de..cf2ce2b 100644 --- a/analysis/examples/sod_shock_tube_2d.jl +++ b/analysis/examples/sod_shock_tube_2d.jl @@ -10,7 +10,7 @@ noisy_flow_data = NoiseAnalysis.apply_noise_to_flow(flow_data, noise_data) point_detect_algo = ImageProcessingShockDetectionAlgo(0.8, :prewitt) fitting_algo = FittingAlgo(0.1, true) -dbscan_algo = DBSCANAlgo(0.95, 3, 10) +dbscan_algo = DBSCANAlgo(5.95, 3, 10) #original_detection = detect(flow_data, point_detect_algo, dbscan_algo, fitting_algo) #noisy_detection = detect(noisy_flow_data, point_detect_algo, dbscan_algo, fitting_algo) @@ -31,8 +31,8 @@ noisy_shock_fits_over_time = fit_shock_clusters_over_time(noisy_shock_clusters_o #println("noisy_shock_fits_over_time: ", noisy_shock_fits_over_time) -rmse_value = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data; threshold=10.0) -println("RMSE between fits: ", rmse_value) +rmse_value = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data, noisy_flow_data, dbscan_algo; threshold=10.0) +println("RMSE between fits, cluster_fit_mismatch_count, empty_residuals_count, shock_data_missing_count, residual_length_mismatch_count: ", rmse_value) #shock_fits_difference = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time) diff --git a/analysis/noise.jl b/analysis/noise.jl index a36a8a4..d108f6d 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -4,7 +4,8 @@ using LinearAlgebra using ShockwaveProperties using Unitful using ShockwaveDetection -using Statistics +using Statistics +using .NoiseAnalysis """ struct NoiseData @@ -85,39 +86,18 @@ function apply_noise_to_flow(flow_data::FlowData, noise_data::NoiseData) end """ - compare_shock_positions_over_time_1d(original_positions::Vector{Vector{Float64}}, - noisy_positions::Vector{Vector{Float64}}) + find_closest_match(value, noisy_positions) -Compare shock positions between original and noisy datasets over time in 1D, using proximity-based matching to better quantify errors caused by false positives, missed detections, or noisy deviations. +Helper function to find the closest match to a given `value` in `noisy_positions`. # Arguments -- `original_positions::Vector{Vector{Float64}}`: - A vector of vectors where each inner vector contains shock positions for a specific time step in the original data. Each time step may have multiple shock positions. - -- `noisy_positions::Vector{Vector{Float64}}`: - A vector of vectors where each inner vector contains shock positions for a specific time step in the noisy data. Similar to `original_positions`, but potentially with noise. +- `value`: A numeric value representing the original position. +- `noisy_positions`: A vector of numeric values representing the noisy positions. # Returns -- `rmse::Float64`: - The Root Mean Squared Error (RMSE) between the matched shock positions from the original and noisy datasets. This metric quantifies the deviation in shock positions due to noise. - -- `missing_count::Int`: - The total count of shock positions that were missing or unmatched between the original and noisy datasets. This includes cases where shocks were present in the original data but absent in the noisy data, or vice versa. - -# Methodology -1. For each time step, the function compares shock positions between the `original_positions` and `noisy_positions`. -2. Instead of assuming the shock positions correspond one-to-one in both datasets, each shock in the original data is matched to the **closest** shock in the noisy data. This accounts for variations in the number and order of shock positions due to noise. -3. **Proximity Matching**: - - The `findmin` function is used to find the closest shock in the noisy data for each shock in the original data. If the distance between the matched positions is within a certain threshold (set to `10` units), the squared difference is computed and stored. - - If no close match is found, the function applies a **large penalty** (set to `100.0`) to reflect a significant mismatch or false positive. -4. **Handling False Positives**: - - Any extra shock positions in the noisy data that don't correspond to any shocks in the original data are penalized similarly. These represent **false detections** due to noise. -5. **Handling Missed Detections**: - - Shock positions in the original data that are missing from the noisy data are also penalized with the large penalty. These represent **missed detections**. -6. After all time steps are processed, the function calculates the RMSE over the matched differences and any penalties. -7. The function also tracks the total count of unmatched (or missing) shocks for additional insight. +- The closest matching value from `noisy_positions` and its index. +- Returns `nothing, nothing` if `noisy_positions` is empty. """ -# Helper function to find the closest match function find_closest_match(value, noisy_positions) if isempty(noisy_positions) return nothing, nothing @@ -126,41 +106,44 @@ function find_closest_match(value, noisy_positions) # Find the closest position in noisy_positions differences = abs.(noisy_positions .- value) min_diff, min_idx = findmin(differences) - + # Return the closest noisy position and its index return noisy_positions[min_idx], min_idx end -# Helper function to find the closest match for a shock in the noisy data -function find_closest_match(value, noisy_positions) - if isempty(noisy_positions) - return nothing, nothing - end +""" + compare_shock_positions_over_time_1d(original_positions, noisy_positions; threshold=10.0) - # Find the closest position in noisy_positions - differences = abs.(noisy_positions .- value) - min_diff, min_idx = findmin(differences) +Main function to compare 1D shock positions between original and noisy data over time. +It attempts to match shocks in `original_positions` to those in `noisy_positions` and computes the RMSE (Root Mean Square Error) between matched shocks. +Unmatched shocks are counted as either false positives (extra detections) or false negatives (missed detections). - # Return the closest noisy position and its index - return noisy_positions[min_idx], min_idx -end +# Arguments +- `original_positions`: A vector of vectors containing shock positions from the original data for each time step. +- `noisy_positions`: A vector of vectors containing shock positions from noisy data for each time step. +- `threshold`: A numeric value (default 10.0) specifying the maximum allowable difference between matched shocks. -# Main function to compare 1D shock positions over time +# Returns +- `rmse`: The Root Mean Square Error between matched shock positions, or `NaN` if no valid matches exist. +- `false_negatives_count`: The number of shocks in the original data that were missed in the noisy data. +- `false_positives_count`: The number of extra shocks detected in the noisy data that do not correspond to any shocks in the original data. +""" function compare_shock_positions_over_time_1d(original_positions, noisy_positions; threshold=10.0) - diffs = Float64[] # To store the differences between shocks - missing_count = 0 # Track the number of missing shocks + diffs = Float64[] # To store the differences between matched shocks + false_negatives_count = 0 # Count of missed shocks (false negatives) + false_positives_count = 0 # Count of extra shocks (false positives) for (orig, noisy) in zip(original_positions, noisy_positions) if isempty(orig) && isempty(noisy) # No shocks in both original and noisy, no comparison needed continue elseif isempty(noisy) && !isempty(orig) - # No shocks detected in noisy but there are shocks in original - missing_count += length(orig) # Count all as missing + # No shocks detected in noisy but there are shocks in original (false negatives) + false_negatives_count += length(orig) # Count all as missed continue elseif isempty(orig) && !isempty(noisy) # Original has no shocks, but noisy has extra shocks (false positives) - missing_count += length(noisy) # Count all noisy as false detections + false_positives_count += length(noisy) # Count all noisy as false detections continue end @@ -169,8 +152,8 @@ function compare_shock_positions_over_time_1d(original_positions, noisy_position closest_noisy_pos, closest_idx = find_closest_match(orig_pos, noisy) if isnothing(closest_noisy_pos) || abs(orig_pos - closest_noisy_pos) > threshold - # No match found within the threshold, consider it missing - missing_count += 1 + # No match found within the threshold, count as false negative + false_negatives_count += 1 else # Calculate squared difference for valid matches diff = (orig_pos - closest_noisy_pos)^2 @@ -182,75 +165,150 @@ function compare_shock_positions_over_time_1d(original_positions, noisy_position # Any remaining noisy positions are unmatched, count as false positives if !isempty(noisy) - missing_count += length(noisy) + false_positives_count += length(noisy) end end - # If there are no valid differences to compute RMSE, return NaN and the count of missing positions + # If there are no valid differences to compute RMSE, return NaN and the counts if isempty(diffs) - return NaN, missing_count + return NaN, false_negatives_count, false_positives_count end # Calculate RMSE only on the valid matched differences rmse = sqrt(mean(diffs)) - return rmse, missing_count + return rmse, false_negatives_count, false_positives_count end """ + calculate_residuals(fit, data_points) + +Calculates the residuals between the fitted curve and the actual data points (shock points). + +# Arguments +- `fit`: A fitted model (from the shock fit) containing a function that can predict values and its associated parameters. +- `data_points`: A collection of 2D data points where the first element of each point is the x-coordinate and the second is the actual y-coordinate. + +# Returns +- `residuals`: A vector of residuals (absolute differences between the fitted values and the actual values) for each data point. """ -# Helper function to calculate residuals between fit and original data points from the shock point detection function calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) residuals = Float64[] - for i in eachindex(data_points) - fitted_value = fit.model(data_points[i][1], fit.parameters) # Call the model function with parameters - actual_value = data_points[i][2] # Actual y-value from the data - println("Fitted value: ", fitted_value, " Actual value: ", actual_value) - push!(residuals, abs(fitted_value - actual_value)) + for point in data_points + x_value = point[1] + actual_value = point[2] + + # Call the model function with parameters + fitted_value_vector = fit.model([x_value], fit.parameters) # Pass as a vector + fitted_value = fitted_value_vector[1] # Extract the first (and presumably only) element + + #println("Fitted value: ", fitted_value, " Actual value: ", actual_value) + + # Calculate residual + push!(residuals, abs(fitted_value - actual_value)) # Calculate residual end return residuals end -# Main function to compare fits over time by analyzing residuals rather than raw parameters -function compare_shock_fits_over_time(original_fit, noisy_fit, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data; threshold=10.0) +""" + compare_shock_fits_over_time(original_fit, noisy_fit, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data; threshold=10.0) + +Compares fitted curves (shock fits) to the respective clusters of shock points over time. + +# Arguments +- `original_fit`: A vector of fits from the original data over time. +- `noisy_fit`: A vector of fits from the noisy data over time. +- `original_shock_positions_over_time`: A vector of shock positions from the original data, for each time step. +- `noisy_shock_positions_over_time`: A vector of shock positions from the noisy data, for each time step. +- `flow_data`: An object containing flow data properties such as bounds, number of cells, and number of time steps. +- `threshold`: A numeric value (default 10.0) specifying the maximum allowable difference between matched shocks. + +# Returns +- `rmse_results`: A vector containing the root mean square error (RMSE) of the fit comparisons over time. +""" +function compare_shock_fits_over_time( + original_fit, noisy_fit, + original_shock_positions_over_time, noisy_shock_positions_over_time, + flow_data, noisy_flow_data, dbscan_algo; threshold=10.0) + bounds = flow_data.bounds - ncells = flow_data.ncells nsteps = flow_data.nsteps - rmse_results = Vector{Float64}(undef, nsteps) - data_points = Vector{Any}(undef, nsteps) - # Define the x-range - x = range(bounds[1][1], bounds[1][2], length=ncells[1]) + # Preallocate RMSE results for each time step + rmse_results = Vector{Float64}(undef, nsteps) + data_points = Vector{Any}(undef, nsteps) + + # Counters for different types of mismatches + cluster_fit_mismatch_count = 0 # Number of times clusters don't match fits + empty_residuals_count = 0 # Number of times residuals were empty + shock_data_missing_count = 0 # Number of times shock points were missing + residual_length_mismatch_count = 0 # Number of times residuals had different lengths + + # Cluster the shock points for original and noisy data + original_clusters = ShockwaveDetection.cluster_shock_points(dbscan_algo, original_shock_positions_over_time, flow_data) + noisy_clusters = ShockwaveDetection.cluster_shock_points(dbscan_algo, noisy_shock_positions_over_time, noisy_flow_data) + + # Check for mismatch in the number of total clusters and fits before time step iteration + if length(original_clusters) != length(original_fit) + error("Mismatch between number of clusters and fits in original data.") + end + if length(noisy_clusters) != length(noisy_fit) + error("Mismatch between number of clusters and fits in noisy data.") + end + + for t in 1:nsteps + # Handle cases where shock data is missing in one or both datasets + if isempty(original_shock_positions_over_time[t]) || isempty(noisy_shock_positions_over_time[t]) + shock_data_missing_count += 1 + rmse_results[t] = NaN + data_points[t] = [] + continue + end - # Define the y-range - y = range(bounds[2][1], bounds[2][2], length=ncells[2]) + num_original_clusters = length(original_clusters[t]) + num_noisy_clusters = length(noisy_clusters[t]) + num_fits = length(original_fit[t]) # Number of fits at time step t - for t in eachindex(nsteps) # Use eachindex for time steps - if isempty(original_shock_positions_over_time[t]) - rmse_results[t] = NaN # No shock positions, set RMSE as NaN - data_points[t] = [] # No data points to analyze + # Check if the number of clusters matches the number of fits for both datasets + if num_fits != num_original_clusters || num_fits != num_noisy_clusters + cluster_fit_mismatch_count += 1 + rmse_results[t] = NaN + data_points[t] = [] continue end - # Transform shock positions to Cartesian coordinates - original_data_points[t] = ShockwaveDetection.cartesian_index_to_xy(original_shock_positions_over_time[t], x, y) - noisy_data_points[t] = ShockwaveDetection.cartesian_index_to_xy(noisy_shock_positions_over_time[t], x, y) + for i in 1:num_fits + # Extract shock cluster data for the current fit + original_data_points = original_clusters[t][i] + noisy_data_points = noisy_clusters[t][i] - # Calculate residuals for original and noisy fits - original_residuals = calculate_residuals(original_fit[t], original_data_points[t]) - noisy_residuals = calculate_residuals(noisy_fit[t], noisy_data_points[t]) + # Calculate residuals for original and noisy fits + original_residuals = calculate_residuals(original_fit[t][i], original_data_points) + noisy_residuals = calculate_residuals(noisy_fit[t][i], noisy_data_points) - if isempty(original_residuals) || isempty(noisy_residuals) - error("No valid residuals found for the input fits.") - end + # If residuals are empty, increment the empty residuals counter + if isempty(original_residuals) || isempty(noisy_residuals) + empty_residuals_count += 1 + continue + end + + # Ensure residuals have the same length + if length(original_residuals) != length(noisy_residuals) + rmse_results[t] = NaN + data_points[t] = [] + residual_length_mismatch_count += 1 + continue + end - # Compare the residuals to see how well the fits match the actual data points - residual_diffs = abs.(original_residuals .- noisy_residuals) + # Compare the residuals between original and noisy fits + residual_diffs = abs.(original_residuals .- noisy_residuals) - # Calculate the root mean square error between residuals - rmse_results[t] = sqrt(mean(residual_diffs.^2)) + # Calculate root mean square error (RMSE) + rmse_results[t] = sqrt(mean(residual_diffs.^2)) + end end - return rmse_results -end \ No newline at end of file + # Return the RMSE results along with detailed mismatch counts + return rmse_results, cluster_fit_mismatch_count, empty_residuals_count, shock_data_missing_count, residual_length_mismatch_count +end From fccd519a237931789a3666bb500acf34cbf28a5c Mon Sep 17 00:00:00 2001 From: Ina Date: Sun, 29 Sep 2024 20:52:14 +0200 Subject: [PATCH 7/9] update documentation --- analysis/noise.jl | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/analysis/noise.jl b/analysis/noise.jl index d108f6d..f6511b3 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -180,16 +180,22 @@ function compare_shock_positions_over_time_1d(original_positions, noisy_position end """ - calculate_residuals(fit, data_points) + calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) -Calculates the residuals between the fitted curve and the actual data points (shock points). +Calculates the residuals between the actual data points and the fitted values generated by the model. # Arguments -- `fit`: A fitted model (from the shock fit) containing a function that can predict values and its associated parameters. -- `data_points`: A collection of 2D data points where the first element of each point is the x-coordinate and the second is the actual y-coordinate. +- `fit`: A `ShockwaveDetection.Fitting` object that contains the model and its parameters used to generate fitted values. +- `data_points`: A vector of tuples or arrays, where each element is a pair `[x_value, actual_value]`. The `x_value` represents the independent variable, and `actual_value` is the observed data point. # Returns -- `residuals`: A vector of residuals (absolute differences between the fitted values and the actual values) for each data point. +- `residuals`: A vector of residuals, where each residual is the absolute difference between the fitted value from the model and the actual observed value. + +# Method +1. For each data point, extract the `x_value` (independent variable) and `actual_value` (dependent variable). +2. Pass the `x_value` to the model using the parameters from `fit`. +3. Calculate the residual as the absolute difference between the model's fitted value and the `actual_value`. +4. Return the vector of residuals. """ function calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) residuals = Float64[] @@ -212,20 +218,30 @@ function calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) end """ - compare_shock_fits_over_time(original_fit, noisy_fit, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data; threshold=10.0) + compare_shock_fits_over_time(original_fit, noisy_fit, original_shock_positions_over_time, + noisy_shock_positions_over_time, flow_data, noisy_flow_data, + dbscan_algo; threshold=10.0) -Compares fitted curves (shock fits) to the respective clusters of shock points over time. +Compares fitted curves (shock fits) to the respective clusters of shock points over time, +considering potential mismatches in cluster and fit counts, and calculates the root mean square error (RMSE) between fits. # Arguments -- `original_fit`: A vector of fits from the original data over time. -- `noisy_fit`: A vector of fits from the noisy data over time. -- `original_shock_positions_over_time`: A vector of shock positions from the original data, for each time step. -- `noisy_shock_positions_over_time`: A vector of shock positions from the noisy data, for each time step. +- `original_fit`: A vector of fits from the original data over time. Each time step contains multiple fits. +- `noisy_fit`: A vector of fits from the noisy data over time. Each time step contains multiple fits. +- `original_shock_positions_over_time`: A vector of shock positions from the original data for each time step. +- `noisy_shock_positions_over_time`: A vector of shock positions from the noisy data for each time step. - `flow_data`: An object containing flow data properties such as bounds, number of cells, and number of time steps. -- `threshold`: A numeric value (default 10.0) specifying the maximum allowable difference between matched shocks. +- `noisy_flow_data`: A similar object containing the flow data for the noisy case. +- `dbscan_algo`: The DBSCAN clustering algorithm used to cluster shock points. +- `threshold`: A numeric value (default is 10.0) specifying the maximum allowable difference between matched shocks. # Returns -- `rmse_results`: A vector containing the root mean square error (RMSE) of the fit comparisons over time. +- `rmse_results`: A vector containing the root mean square error (RMSE) for the fit comparisons at each time step. If there is a mismatch or no valid comparison, the corresponding value will be `NaN`. +- `missing_counts`: A dictionary that counts different types of mismatches: + - `:missing_clusters`: Count of time steps where clusters are missing for either original or noisy data. + - `:missing_fits`: Count of time steps where there is a mismatch between the number of fits and clusters. + - `:missing_residuals`: Count of time steps where residuals between original and noisy fits are of unequal lengths. + - `:valid_comparisons`: Count of valid time steps where RMSE comparison was successfully performed. """ function compare_shock_fits_over_time( original_fit, noisy_fit, @@ -311,4 +327,4 @@ function compare_shock_fits_over_time( # Return the RMSE results along with detailed mismatch counts return rmse_results, cluster_fit_mismatch_count, empty_residuals_count, shock_data_missing_count, residual_length_mismatch_count -end +end \ No newline at end of file From 55331601cfaf8ad7ffca0e857ffa5a7bb8c5de41 Mon Sep 17 00:00:00 2001 From: Ina Date: Mon, 30 Sep 2024 17:21:39 +0200 Subject: [PATCH 8/9] small changes --- analysis/noise.jl | 59 +++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/analysis/noise.jl b/analysis/noise.jl index f6511b3..9eaca9d 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -205,8 +205,8 @@ function calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) actual_value = point[2] # Call the model function with parameters - fitted_value_vector = fit.model([x_value], fit.parameters) # Pass as a vector - fitted_value = fitted_value_vector[1] # Extract the first (and presumably only) element + fitted_value_vector = fit.model([x_value], fit.parameters) + fitted_value = fitted_value_vector[1] #println("Fitted value: ", fitted_value, " Actual value: ", actual_value) @@ -218,30 +218,30 @@ function calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) end """ - compare_shock_fits_over_time(original_fit, noisy_fit, original_shock_positions_over_time, - noisy_shock_positions_over_time, flow_data, noisy_flow_data, - dbscan_algo; threshold=10.0) + compare_shock_fits_over_time(original_fit, noisy_fit, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data; threshold=10.0) -Compares fitted curves (shock fits) to the respective clusters of shock points over time, -considering potential mismatches in cluster and fit counts, and calculates the root mean square error (RMSE) between fits. +Compares fitted curves (shock fits) to the respective clusters of shock points over time. # Arguments -- `original_fit`: A vector of fits from the original data over time. Each time step contains multiple fits. -- `noisy_fit`: A vector of fits from the noisy data over time. Each time step contains multiple fits. -- `original_shock_positions_over_time`: A vector of shock positions from the original data for each time step. -- `noisy_shock_positions_over_time`: A vector of shock positions from the noisy data for each time step. +- `original_fit`: A vector of fits from the original data over time. +- `noisy_fit`: A vector of fits from the noisy data over time. +- `original_shock_positions_over_time`: A vector of shock positions from the original data, for each time step. +- `noisy_shock_positions_over_time`: A vector of shock positions from the noisy data, for each time step. - `flow_data`: An object containing flow data properties such as bounds, number of cells, and number of time steps. -- `noisy_flow_data`: A similar object containing the flow data for the noisy case. -- `dbscan_algo`: The DBSCAN clustering algorithm used to cluster shock points. -- `threshold`: A numeric value (default is 10.0) specifying the maximum allowable difference between matched shocks. +- `threshold`: A numeric value (default 10.0) specifying the maximum allowable difference between matched shocks. # Returns -- `rmse_results`: A vector containing the root mean square error (RMSE) for the fit comparisons at each time step. If there is a mismatch or no valid comparison, the corresponding value will be `NaN`. -- `missing_counts`: A dictionary that counts different types of mismatches: - - `:missing_clusters`: Count of time steps where clusters are missing for either original or noisy data. - - `:missing_fits`: Count of time steps where there is a mismatch between the number of fits and clusters. - - `:missing_residuals`: Count of time steps where residuals between original and noisy fits are of unequal lengths. - - `:valid_comparisons`: Count of valid time steps where RMSE comparison was successfully performed. +- `rmse_results`: A vector containing the root mean square error (RMSE) or norm differences of the fit comparisons over time. +- `missing_counts`: A dictionary containing counts of mismatches in different categories: + - `fit_mismatch_count`: Mismatches between the number of fits and clusters. + - `residual_mismatch_count`: Mismatches between the number of residuals. + - `norm_mismatch_count`: Mismatches resolved by comparing the second norm of residuals. + +# Method +1. For each time step, the function compares shock fits with the respective clusters of shock points. +2. It calculates the residuals for both the original and noisy fits. +3. If the residuals differ in length, it calculates the difference in their `L2 norms` instead of performing RMSE. +4. Returns the RMSE or norm differences for each time step, along with counts of different types of mismatches. """ function compare_shock_fits_over_time( original_fit, noisy_fit, @@ -259,7 +259,7 @@ function compare_shock_fits_over_time( cluster_fit_mismatch_count = 0 # Number of times clusters don't match fits empty_residuals_count = 0 # Number of times residuals were empty shock_data_missing_count = 0 # Number of times shock points were missing - residual_length_mismatch_count = 0 # Number of times residuals had different lengths + norm_mismatch_count = 0 # Number mismatches resolved by comparing the second norm of residuals. # Cluster the shock points for original and noisy data original_clusters = ShockwaveDetection.cluster_shock_points(dbscan_algo, original_shock_positions_over_time, flow_data) @@ -301,7 +301,7 @@ function compare_shock_fits_over_time( # Calculate residuals for original and noisy fits original_residuals = calculate_residuals(original_fit[t][i], original_data_points) - noisy_residuals = calculate_residuals(noisy_fit[t][i], noisy_data_points) + noisy_residuals = calculate_residuals(noisy_fit[t][i], original_data_points) # If residuals are empty, increment the empty residuals counter if isempty(original_residuals) || isempty(noisy_residuals) @@ -311,9 +311,18 @@ function compare_shock_fits_over_time( # Ensure residuals have the same length if length(original_residuals) != length(noisy_residuals) - rmse_results[t] = NaN + # Calculate the second norms of both residuals + original_norm = norm(original_residuals, 2) + noisy_norm = norm(noisy_residuals, 2) + + # Calculate the difference between the norms + norm_difference = abs(original_norm - noisy_norm) + + # Store the difference instead of RMSE + rmse_results[t] = norm_difference data_points[t] = [] - residual_length_mismatch_count += 1 + + norm_mismatch_count += 1 continue end @@ -326,5 +335,5 @@ function compare_shock_fits_over_time( end # Return the RMSE results along with detailed mismatch counts - return rmse_results, cluster_fit_mismatch_count, empty_residuals_count, shock_data_missing_count, residual_length_mismatch_count + return rmse_results, cluster_fit_mismatch_count, empty_residuals_count, shock_data_missing_count, norm_mismatch_count end \ No newline at end of file From b8d74169da7c3699c5114821518466113a1a0d13 Mon Sep 17 00:00:00 2001 From: Ina Date: Tue, 1 Oct 2024 01:36:09 +0200 Subject: [PATCH 9/9] final touches --- analysis/examples/sod_shock_tube_1d.jl | 3 - analysis/examples/sod_shock_tube_2d.jl | 27 ++------ analysis/noise.jl | 88 +++++++++++++------------- examples/sod_shock_tube_1d.jl | 3 + 4 files changed, 52 insertions(+), 69 deletions(-) diff --git a/analysis/examples/sod_shock_tube_1d.jl b/analysis/examples/sod_shock_tube_1d.jl index c083bd4..e181169 100644 --- a/analysis/examples/sod_shock_tube_1d.jl +++ b/analysis/examples/sod_shock_tube_1d.jl @@ -7,7 +7,6 @@ include("../NoiseAnalysis.jl") using .NoiseAnalysis using Distributions - # Create a NoiseData instance noise_data = NoiseAnalysis.NoiseData(0.01, Normal(0, 0.2)) #Gaussian distribution @@ -20,8 +19,6 @@ point_detect_algo = GradientShockDetectionAlgo(0.8) original_shock_positions_over_time = detect(flow_data, point_detect_algo) noisy_shock_positions_over_time = detect(noisy_flow_data, point_detect_algo) -#println("original_shock_positions_over_time: ", original_shock_positions_over_time) -#println("noisy_shock_positions_over_time: ", noisy_shock_positions_over_time) # Detect and compare shock positions with and without noise shock_diff = NoiseAnalysis.compare_shock_positions_over_time_1d(original_shock_positions_over_time, noisy_shock_positions_over_time; threshold=10.0) \ No newline at end of file diff --git a/analysis/examples/sod_shock_tube_2d.jl b/analysis/examples/sod_shock_tube_2d.jl index cf2ce2b..de642ee 100644 --- a/analysis/examples/sod_shock_tube_2d.jl +++ b/analysis/examples/sod_shock_tube_2d.jl @@ -12,31 +12,12 @@ point_detect_algo = ImageProcessingShockDetectionAlgo(0.8, :prewitt) fitting_algo = FittingAlgo(0.1, true) dbscan_algo = DBSCANAlgo(5.95, 3, 10) -#original_detection = detect(flow_data, point_detect_algo, dbscan_algo, fitting_algo) -#noisy_detection = detect(noisy_flow_data, point_detect_algo, dbscan_algo, fitting_algo) +# Detect shock points, clusters, and fits for original and noisy data +original_result = detect(flow_data, point_detect_algo, dbscan_algo, fitting_algo) +noisy_result = detect(noisy_flow_data, point_detect_algo, dbscan_algo, fitting_algo) -original_shock_positions_over_time = ShockwaveDetection.detect_shock_points(flow_data, point_detect_algo) -noisy_shock_positions_over_time = ShockwaveDetection.detect_shock_points(noisy_flow_data, point_detect_algo) -#println("original_shock_positions_over_time: ", original_shock_positions_over_time) -#println("noisy_shock_positions_over_time: ", noisy_shock_positions_over_time) - -original_shock_clusters_over_time = cluster_shock_points(dbscan_algo, original_shock_positions_over_time, flow_data) -noisy_shock_clusters_over_time = cluster_shock_points(dbscan_algo, noisy_shock_positions_over_time, flow_data) -#println("original_shock_clusters_over_time: ", original_shock_clusters_over_time) -#println("noisy_shock_clusters_over_time: ", noisy_shock_clusters_over_time) - -original_shock_fits_over_time = fit_shock_clusters_over_time(original_shock_clusters_over_time, fitting_algo) -noisy_shock_fits_over_time = fit_shock_clusters_over_time(noisy_shock_clusters_over_time, fitting_algo) -#println("original_shock_fits_over_time: ", original_shock_fits_over_time) -#println("noisy_shock_fits_over_time: ", noisy_shock_fits_over_time) - - -rmse_value = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time, original_shock_positions_over_time, noisy_shock_positions_over_time, flow_data, noisy_flow_data, dbscan_algo; threshold=10.0) +rmse_value = NoiseAnalysis.compare_shock_fits_over_time(original_result, noisy_result, flow_data; threshold=10.0) println("RMSE between fits, cluster_fit_mismatch_count, empty_residuals_count, shock_data_missing_count, residual_length_mismatch_count: ", rmse_value) -#shock_fits_difference = NoiseAnalysis.compare_shock_fits_over_time(original_shock_fits_over_time, noisy_shock_fits_over_time) - #plot_shock_fits_over_time(flow_data, original_detection, true) #plot_shock_fits_over_time(noisy_flow_data, noisy_detection, true) -#shock_position_difference = NoiseAnalysis.compare_shock_positions_over_time_2d(original_shock_positions_over_time, noisy_shock_positions_over_time) -#println("Shock position difference: ", shock_position_difference) diff --git a/analysis/noise.jl b/analysis/noise.jl index 9eaca9d..d113186 100644 --- a/analysis/noise.jl +++ b/analysis/noise.jl @@ -13,32 +13,32 @@ using .NoiseAnalysis A structure to store noise configuration for testing and analysis purposes. # Fields -- `intensity::Float64`: The intensity of the noise. -- `distribution::T`: The probability distribution used to generate noise. +- `intensity::T`: The intensity of the noise. +- `distribution::D`: The probability distribution used to generate noise. """ -struct NoiseData{T} - intensity::Float64 - distribution::T +struct NoiseData{T, D} + intensity::T + distribution::D end """ - NoiseData(intensity::Float64, distribution::T) -> NoiseData{T} + NoiseData(intensity::T, distribution::D) -> NoiseData{T} Constructs a `NoiseData` instance with the specified intensity and distribution. # Arguments -- `intensity`: A `Float64` representing the intensity of the noise. +- `intensity`: Representing the intensity of the noise. - `distribution`: The probability distribution used to generate noise. # Returns - A `NoiseData` instance. """ -function NoiseData(intensity::Float64, distribution::T) where T - return NoiseData{T}(intensity, distribution) +function NoiseData(intensity::T, distribution::D) where {T, D} + return NoiseData{T, D}(intensity, distribution) end """ - apply_noise(data::Array{T}, noise_data::NoiseData) -> Array{T} + apply_noise(data::Array{T}, noise_data::NoiseData) -> Array{T, D} Applies noise to a given data array using the noise intensity and distribution from `NoiseData`. @@ -112,7 +112,7 @@ function find_closest_match(value, noisy_positions) end """ - compare_shock_positions_over_time_1d(original_positions, noisy_positions; threshold=10.0) + compare_shock_positions_over_time_1d(original_positions, noisy_positions; threshold::T=10.0) Main function to compare 1D shock positions between original and noisy data over time. It attempts to match shocks in `original_positions` to those in `noisy_positions` and computes the RMSE (Root Mean Square Error) between matched shocks. @@ -128,8 +128,8 @@ Unmatched shocks are counted as either false positives (extra detections) or fal - `false_negatives_count`: The number of shocks in the original data that were missed in the noisy data. - `false_positives_count`: The number of extra shocks detected in the noisy data that do not correspond to any shocks in the original data. """ -function compare_shock_positions_over_time_1d(original_positions, noisy_positions; threshold=10.0) - diffs = Float64[] # To store the differences between matched shocks +function compare_shock_positions_over_time_1d(original_positions, noisy_positions; threshold::T = 10.0) where T + diffs = Any[] # To store the differences between matched shocks false_negatives_count = 0 # Count of missed shocks (false negatives) false_positives_count = 0 # Count of extra shocks (false positives) @@ -197,8 +197,8 @@ Calculates the residuals between the actual data points and the fitted values ge 3. Calculate the residual as the absolute difference between the model's fitted value and the `actual_value`. 4. Return the vector of residuals. """ -function calculate_residuals(fit::ShockwaveDetection.Fitting, data_points) - residuals = Float64[] +function calculate_residuals(fit::ShockwaveDetection.Fitting{T}, data_points::Vector{Vector{T}}) where T + residuals = Any[] for point in data_points x_value = point[1] @@ -223,59 +223,61 @@ end Compares fitted curves (shock fits) to the respective clusters of shock points over time. # Arguments -- `original_fit`: A vector of fits from the original data over time. -- `noisy_fit`: A vector of fits from the noisy data over time. -- `original_shock_positions_over_time`: A vector of shock positions from the original data, for each time step. -- `noisy_shock_positions_over_time`: A vector of shock positions from the noisy data, for each time step. -- `flow_data`: An object containing flow data properties such as bounds, number of cells, and number of time steps. +- `original_fit`: A vector of fitted curves from the original data over time. +- `noisy_fit`: A vector of fitted curves from the noisy data over time. +- `original_shock_positions_over_time`: A vector of shock positions from the original data for each time step. +- `noisy_shock_positions_over_time`: A vector of shock positions from the noisy data for each time step. +- `flow_data`: An object containing properties of flow data such as bounds, number of cells, and number of time steps. - `threshold`: A numeric value (default 10.0) specifying the maximum allowable difference between matched shocks. # Returns - `rmse_results`: A vector containing the root mean square error (RMSE) or norm differences of the fit comparisons over time. - `missing_counts`: A dictionary containing counts of mismatches in different categories: - - `fit_mismatch_count`: Mismatches between the number of fits and clusters. - - `residual_mismatch_count`: Mismatches between the number of residuals. - - `norm_mismatch_count`: Mismatches resolved by comparing the second norm of residuals. + - `fit_mismatch_count`: Number of mismatches between the number of fits and clusters. + - `residual_mismatch_count`: Number of mismatches between the number of residuals calculated. + - `norm_mismatch_count`: Number of mismatches resolved by comparing the second norm of residuals. # Method -1. For each time step, the function compares shock fits with the respective clusters of shock points. +1. For each time step, the function iterates over the fitted curves and their respective clusters of shock points. 2. It calculates the residuals for both the original and noisy fits. -3. If the residuals differ in length, it calculates the difference in their `L2 norms` instead of performing RMSE. -4. Returns the RMSE or norm differences for each time step, along with counts of different types of mismatches. +3. If the residuals differ in length, it calculates the difference in their L2 norms instead of performing RMSE. +4. It assesses the matches based on the specified `threshold` and counts mismatches in various categories. +5. The function compiles and returns the RMSE or norm differences for each time step along with counts of different types of mismatches for further analysis. """ + function compare_shock_fits_over_time( - original_fit, noisy_fit, - original_shock_positions_over_time, noisy_shock_positions_over_time, - flow_data, noisy_flow_data, dbscan_algo; threshold=10.0) + original_result::ShockwaveDetection.ShockDetectionResult2D, + noisy_result::ShockwaveDetection.ShockDetectionResult2D, + flow_data; threshold=10.0) bounds = flow_data.bounds nsteps = flow_data.nsteps # Preallocate RMSE results for each time step - rmse_results = Vector{Float64}(undef, nsteps) + rmse_results = Vector{Any}(undef, nsteps) data_points = Vector{Any}(undef, nsteps) # Counters for different types of mismatches cluster_fit_mismatch_count = 0 # Number of times clusters don't match fits empty_residuals_count = 0 # Number of times residuals were empty shock_data_missing_count = 0 # Number of times shock points were missing - norm_mismatch_count = 0 # Number mismatches resolved by comparing the second norm of residuals. + norm_mismatch_count = 0 # Number mismatches resolved by comparing the second norm of residuals. - # Cluster the shock points for original and noisy data - original_clusters = ShockwaveDetection.cluster_shock_points(dbscan_algo, original_shock_positions_over_time, flow_data) - noisy_clusters = ShockwaveDetection.cluster_shock_points(dbscan_algo, noisy_shock_positions_over_time, noisy_flow_data) + # Use the shock clusters from the ShockDetectionResult2D struct + original_clusters = original_result.shock_clusters_over_time + noisy_clusters = noisy_result.shock_clusters_over_time # Check for mismatch in the number of total clusters and fits before time step iteration - if length(original_clusters) != length(original_fit) + if length(original_clusters) != length(original_result.shock_fits_over_time) error("Mismatch between number of clusters and fits in original data.") end - if length(noisy_clusters) != length(noisy_fit) + if length(noisy_clusters) != length(noisy_result.shock_fits_over_time) error("Mismatch between number of clusters and fits in noisy data.") end for t in 1:nsteps # Handle cases where shock data is missing in one or both datasets - if isempty(original_shock_positions_over_time[t]) || isempty(noisy_shock_positions_over_time[t]) + if isempty(original_result.shock_positions_over_time[t]) || isempty(noisy_result.shock_positions_over_time[t]) shock_data_missing_count += 1 rmse_results[t] = NaN data_points[t] = [] @@ -284,7 +286,7 @@ function compare_shock_fits_over_time( num_original_clusters = length(original_clusters[t]) num_noisy_clusters = length(noisy_clusters[t]) - num_fits = length(original_fit[t]) # Number of fits at time step t + num_fits = length(original_result.shock_fits_over_time[t]) # Number of fits at time step t # Check if the number of clusters matches the number of fits for both datasets if num_fits != num_original_clusters || num_fits != num_noisy_clusters @@ -300,8 +302,8 @@ function compare_shock_fits_over_time( noisy_data_points = noisy_clusters[t][i] # Calculate residuals for original and noisy fits - original_residuals = calculate_residuals(original_fit[t][i], original_data_points) - noisy_residuals = calculate_residuals(noisy_fit[t][i], original_data_points) + original_residuals = calculate_residuals(original_result.shock_fits_over_time[t][i], original_data_points) + noisy_residuals = calculate_residuals(noisy_result.shock_fits_over_time[t][i], original_data_points) # If residuals are empty, increment the empty residuals counter if isempty(original_residuals) || isempty(noisy_residuals) @@ -314,14 +316,14 @@ function compare_shock_fits_over_time( # Calculate the second norms of both residuals original_norm = norm(original_residuals, 2) noisy_norm = norm(noisy_residuals, 2) - + # Calculate the difference between the norms norm_difference = abs(original_norm - noisy_norm) - + # Store the difference instead of RMSE rmse_results[t] = norm_difference data_points[t] = [] - + norm_mismatch_count += 1 continue end diff --git a/examples/sod_shock_tube_1d.jl b/examples/sod_shock_tube_1d.jl index 816775b..73166c9 100644 --- a/examples/sod_shock_tube_1d.jl +++ b/examples/sod_shock_tube_1d.jl @@ -4,6 +4,9 @@ using ShockwaveProperties using Unitful using ShockwaveDetection +# Create a NoiseData instance +#noise_data = NoiseData(0.01, Normal(0, 1)) # 1% noise intensity, Gaussian distribution + flow_data = FlowData("examples/data/sod_shock_left_1d.tape", false) point_detect_algo = GradientShockDetectionAlgo(0.5)