diff --git a/NEWS.md b/NEWS.md index 216d092..b3fd01d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,10 @@ With this release, you can remake a `OutputVar` using an already existing `Outpu is helpful if you need to construct a new `OutputVar` from an already existing one, but only need to modify one field while leaving the other fields the same. +## Reordering categories +There is now support for reordering the categories in a `RMSEVariable`. This is helpful +when making plots using `Visualize.plot_boxplot!` and `Visualize.plot_leaderboard!`. + v0.5.12 ------- diff --git a/docs/src/api.md b/docs/src/api.md index a05c7b1..80f3e8a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -105,6 +105,8 @@ Leaderboard.add_unit! Leaderboard.find_best_single_model Leaderboard.find_worst_single_model Leaderboard.median +Leaderboard.reorder_categories +Leaderboard.match_category_order ``` ## Utilities diff --git a/src/Leaderboard.jl b/src/Leaderboard.jl index 6aa28aa..2dc9159 100644 --- a/src/Leaderboard.jl +++ b/src/Leaderboard.jl @@ -15,7 +15,9 @@ export RMSEVariable, add_unit!, find_best_single_model, find_worst_single_model, - median + median, + reorder_categories, + match_category_order """ Holding root mean squared errors over multiple categories and models for a single @@ -452,6 +454,59 @@ function add_category(rmse_var::RMSEVariable, categories::String...) ) end +""" + reorder_categories(rmse_var::RMSEVariable, categories::Vector{String}) + +Reorder the categories in `rmse_var` to match `categories`. + +If a category in `categories` is not present in as a category in `rmse_var`, then an error +is thrown. This function is helpful when changing the order of the categories in the plot +produced by `ClimaAnalysis.Visualize.plot_boxplot!`. +""" +function reorder_categories(rmse_var::RMSEVariable, categories::Vector{String}) + # Check if it is possible to reorder the categories + rmse_var_categories = category_names(rmse_var) + same_categories = Set(categories) == Set(rmse_var_categories) + same_categories || error( + "Categories in $(rmse_var_categories) is not the same as $categories", + ) + + # Reorder RMSEs to match `categories` + perm = indexin(categories, rmse_var_categories) + return RMSEVariable( + rmse_var.short_name, + model_names(rmse_var), + categories, + rmse_var.RMSEs[:, perm], + rmse_var.units, + ) +end + +""" + match_category_order(rmse_var1::RMSEVariable, rmse_var2::RMSEVariable) + +Make the order of categories of `rmse_var_src` matches the order of the categories of +`rmse_var_dest`. + +This function is helpful when changing the order of the categories in the plot produced by +`ClimaAnalysis.Visualize.plot_boxplot!`. +""" +function match_category_order( + rmse_var_src::RMSEVariable, + rmse_var_dest::RMSEVariable, +) + rmse_var_src_categories = category_names(rmse_var_src) + rmse_var_dest_categories = category_names(rmse_var_dest) + + same_categories = + Set(rmse_var_src_categories) == Set(rmse_var_dest_categories) + same_categories || error( + "Categories in $rmse_var_src_categories (src) is not the same as $rmse_var_dest_categories (dest)", + ) + + return reorder_categories(rmse_var_src, rmse_var_dest_categories) +end + """ add_model(rmse_var::RMSEVariable, models::String...) diff --git a/test/test_Leaderboard.jl b/test/test_Leaderboard.jl index e065179..8366d66 100644 --- a/test/test_Leaderboard.jl +++ b/test/test_Leaderboard.jl @@ -336,3 +336,72 @@ end @test val == [7.0, 9.0, 11.0, 13.0, 15.0] ./ 2.0 @test val |> size == (5,) end + +@testset "Reordering categories" begin + csv_file_path = joinpath(@__DIR__, "sample_data/test_csv.csv") + rmse_var = ClimaAnalysis.read_rmses(csv_file_path, "ta") + rmse_var[:, :] = [[1.0 2.0 3.0 4.0 5.0]; [6.0 7.0 8.0 9.0 10.0]] + + # Identity reordering + rmse_var_reordered = ClimaAnalysis.reorder_categories( + rmse_var, + ["DJF", "MAM", "JJA", "SON", "ANN"], + ) + @test rmse_var_reordered.RMSEs == rmse_var.RMSEs + @test ClimaAnalysis.category_names(rmse_var_reordered) == + ClimaAnalysis.category_names(rmse_var) + @test ClimaAnalysis.model_names(rmse_var_reordered) == + ClimaAnalysis.model_names(rmse_var) + + # Shift everything by one + rmse_var_reordered = ClimaAnalysis.reorder_categories( + rmse_var, + ["ANN", "DJF", "MAM", "JJA", "SON"], + ) + @test rmse_var_reordered.RMSEs == rmse_var.RMSEs[:, [5, 1, 2, 3, 4]] + @test ClimaAnalysis.category_names(rmse_var_reordered) == + ClimaAnalysis.category_names(rmse_var)[[5, 1, 2, 3, 4]] + @test ClimaAnalysis.model_names(rmse_var_reordered) == + ClimaAnalysis.model_names(rmse_var) + + # Matching the order of two different RMSE vars + rmse_var1 = ClimaAnalysis.read_rmses(csv_file_path, "ta") + rmse_var1[:, :] = [[11.0 12.0 13.0 14.0 15.0]; [16.0 17.0 18.0 19.0 20.0]] + + # Order are the same + rmse_var_reordered = ClimaAnalysis.match_category_order(rmse_var, rmse_var1) + @test rmse_var_reordered.RMSEs == rmse_var.RMSEs + @test ClimaAnalysis.category_names(rmse_var_reordered) == + ClimaAnalysis.category_names(rmse_var) + @test ClimaAnalysis.model_names(rmse_var_reordered) == + ClimaAnalysis.model_names(rmse_var) + + # Order are different + rmse_var1 = ClimaAnalysis.reorder_categories( + rmse_var, + ["ANN", "DJF", "MAM", "JJA", "SON"], + ) + rmse_var_reordered = ClimaAnalysis.match_category_order(rmse_var, rmse_var1) + @test rmse_var_reordered.RMSEs == rmse_var.RMSEs[:, [5, 1, 2, 3, 4]] + @test ClimaAnalysis.category_names(rmse_var_reordered) == + ClimaAnalysis.category_names(rmse_var)[[5, 1, 2, 3, 4]] + @test ClimaAnalysis.model_names(rmse_var_reordered) == + ClimaAnalysis.model_names(rmse_var) + + # Test for error handling + @test_throws ErrorException ClimaAnalysis.reorder_categories( + rmse_var, + ["This", "should", "not", "work!"], + ) + rmse_var_diff_cats = ClimaAnalysis.Leaderboard.RMSEVariable( + rmse_var.short_name, + ClimaAnalysis.model_names(rmse_var), + ["!", "work", "not", "should", "This"], + rmse_var.RMSEs, + rmse_var.units, + ) + @test_throws ErrorException ClimaAnalysis.match_category_order( + rmse_var, + rmse_var_diff_cats, + ) +end