From 27d72a52ae64aa5a09337916e994b4fcd2ca4a9a Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Fri, 8 Sep 2023 15:58:45 +0200 Subject: [PATCH 1/4] use FixedSizeDiffCache for flows When profiling runs with the default `autodiff=true`, this line was responsible for 35% of the time and almost all allocations: https://github.com/Deltares/Ribasim/blob/b3eb044a722d1655c5465bafe50951b75fe960d6/core/src/solve.jl#L1002 `connectivity.flow` is a sparse matrix, but the DiffCache does not seem to like sparse matrixes. The `dual_du` field was a dense vector of length n x n x cache_size, and the `get_tmp` call led to further allocations trying to restructure the sparse matrix from the vector. Luckily there is the FixedSizeDiffCache that helps here: https://docs.sciml.ai/PreallocationTools/stable/#FixedSizeDiffCache This retains the sparsity in the dual, and returns a `ReinterpretArray` from `get_tmp` during autodiff. To avoid materializing this reinterpretarray I needed to additionally fill the parent array with zeros rather than the array itself. There is another unrelated performance fix here, and that is to concretely type the Parameter struct, by adding type parameters from its fields. Otherwise you have situations like ``` struct A a::Vector end ``` where the compiler doesn't know the element type of the Vector, so it can perform less optimizations. The solution: ``` struct A{T} a::Vector{T} end ``` Finally I consistently added AbstractVector/Matrix argument type annotations to ensure the ReinterpretArray could enter everywhere. And I renamed the functions to formulate flows to `formulate_flow`, to make it easier to separate them from the other `formulate!` methods. --- core/src/Ribasim.jl | 2 +- core/src/create.jl | 5 ++- core/src/solve.jl | 80 +++++++++++++++++++++---------------- core/test/run_models.jl | 5 +++ docs/contribute/addnode.qmd | 4 +- 5 files changed, 56 insertions(+), 40 deletions(-) diff --git a/core/src/Ribasim.jl b/core/src/Ribasim.jl index c1bc06678..78b615aaa 100644 --- a/core/src/Ribasim.jl +++ b/core/src/Ribasim.jl @@ -20,7 +20,7 @@ using Legolas: Legolas, @schema, @version, validate, SchemaVersion, declared using Logging: current_logger, min_enabled_level, with_logger using LoggingExtras: EarlyFilteredLogger, LevelOverrideLogger using OrdinaryDiffEq -using PreallocationTools: DiffCache, get_tmp +using PreallocationTools: DiffCache, FixedSizeDiffCache, get_tmp using SciMLBase using SparseArrays using SQLite: SQLite, DB, Query, esc_id diff --git a/core/src/create.jl b/core/src/create.jl index 8221f4504..c75a6c0e8 100644 --- a/core/src/create.jl +++ b/core/src/create.jl @@ -202,10 +202,11 @@ function Connectivity(db::DB, config::Config, chunk_size::Int)::Connectivity edge_ids_flow_inv = Dictionary(values(edge_ids_flow), keys(edge_ids_flow)) flow = adjacency_matrix(graph_flow, Float64) - nonzeros(flow) .= 0.0 + flow .= 0.0 if config.solver.autodiff - flow = DiffCache(flow, chunk_size) + # FixedSizeDiffCache performs better for sparse matrix + flow = FixedSizeDiffCache(flow, chunk_size) end return Connectivity( diff --git a/core/src/solve.jl b/core/src/solve.jl index 9c684d35f..35ad28646 100644 --- a/core/src/solve.jl +++ b/core/src/solve.jl @@ -401,21 +401,21 @@ struct PidControl{T} <: AbstractParameterNode end # TODO Automatically add all nodetypes here -struct Parameters +struct Parameters{T, TSparse, C1, C2} starttime::DateTime - connectivity::Connectivity - basin::Basin + connectivity::Connectivity{TSparse} + basin::Basin{T, C1} linear_resistance::LinearResistance manning_resistance::ManningResistance - tabulated_rating_curve::TabulatedRatingCurve + tabulated_rating_curve::TabulatedRatingCurve{C2} fractional_flow::FractionalFlow level_boundary::LevelBoundary flow_boundary::FlowBoundary - pump::Pump - outlet::Outlet + pump::Pump{T} + outlet::Outlet{T} terminal::Terminal discrete_control::DiscreteControl - pid_control::PidControl + pid_control::PidControl{T} lookup::Dict{Int, Symbol} end @@ -508,8 +508,8 @@ function formulate!( du::AbstractVector, basin::Basin, storage::AbstractVector, - current_area, - current_level, + current_area::AbstractVector, + current_level::AbstractVector, t::Float64, )::Nothing for i in eachindex(storage) @@ -553,13 +553,13 @@ end function continuous_control!( u::ComponentVector, du::ComponentVector, - current_area, + current_area::AbstractVector, pid_control::PidControl, p::Parameters, integral_value::SubArray, current_level::AbstractVector, - flow::SparseMatrixCSC, - pid_error, + flow::AbstractMatrix, + pid_error::AbstractVector, t::Float64, )::Nothing (; connectivity, pump, outlet, basin, fractional_flow) = p @@ -700,11 +700,11 @@ end """ Directed graph: outflow is positive! """ -function formulate!( +function formulate_flow!( linear_resistance::LinearResistance, p::Parameters, - current_level, - flow, + current_level::AbstractVector, + flow::AbstractMatrix, t::Float64, )::Nothing (; connectivity) = p @@ -733,11 +733,11 @@ end """ Directed graph: outflow is positive! """ -function formulate!( +function formulate_flow!( tabulated_rating_curve::TabulatedRatingCurve, p::Parameters, - current_level, - flow::SparseMatrixCSC, + current_level::AbstractVector, + flow::AbstractMatrix, t::Float64, )::Nothing (; connectivity) = p @@ -800,11 +800,11 @@ The average of the upstream and downstream water depth is used to compute cross- hydraulic radius. This ensures that a basin can receive water after it has gone dry. """ -function formulate!( +function formulate_flow!( manning_resistance::ManningResistance, p::Parameters, - current_level, - flow, + current_level::AbstractVector, + flow::AbstractMatrix, t::Float64, )::Nothing (; basin, connectivity) = p @@ -859,7 +859,11 @@ function formulate!( return nothing end -function formulate!(fractional_flow::FractionalFlow, flow, p::Parameters)::Nothing +function formulate_flow!( + fractional_flow::FractionalFlow, + flow::AbstractMatrix, + p::Parameters, +)::Nothing (; connectivity) = p (; graph_flow) = connectivity (; node_id, fraction) = fractional_flow @@ -871,7 +875,12 @@ function formulate!(fractional_flow::FractionalFlow, flow, p::Parameters)::Nothi return nothing end -function formulate!(flow_boundary::FlowBoundary, p::Parameters, flow, t::Float64)::Nothing +function formulate_flow!( + flow_boundary::FlowBoundary, + p::Parameters, + flow::AbstractMatrix, + t::Float64, +)::Nothing (; connectivity) = p (; graph_flow) = connectivity (; node_id, active, flow_rate) = flow_boundary @@ -892,10 +901,10 @@ function formulate!(flow_boundary::FlowBoundary, p::Parameters, flow, t::Float64 end end -function formulate!( +function formulate_flow!( node::Union{Pump, Outlet}, p::Parameters, - flow, + flow::AbstractMatrix, storage::AbstractVector, )::Nothing (; connectivity, basin) = p @@ -938,7 +947,7 @@ end function formulate!( du::ComponentVector, connectivity::Connectivity, - flow::SparseMatrixCSC, + flow::AbstractMatrix, basin::Basin, )::Nothing # loop over basins @@ -960,7 +969,7 @@ function formulate_flows!( p::Parameters, storage::AbstractVector, current_level, - flow::SparseMatrixCSC, + flow::AbstractMatrix, t::Float64, )::Nothing (; @@ -973,13 +982,13 @@ function formulate_flows!( outlet, ) = p - formulate!(linear_resistance, p, current_level, flow, t) - formulate!(manning_resistance, p, current_level, flow, t) - formulate!(tabulated_rating_curve, p, current_level, flow, t) - formulate!(flow_boundary, p, flow, t) - formulate!(fractional_flow, flow, p) - formulate!(pump, p, flow, storage) - formulate!(outlet, p, flow, storage) + formulate_flow!(linear_resistance, p, current_level, flow, t) + formulate_flow!(manning_resistance, p, current_level, flow, t) + formulate_flow!(tabulated_rating_curve, p, current_level, flow, t) + formulate_flow!(flow_boundary, p, flow, t) + formulate_flow!(fractional_flow, flow, p) + formulate_flow!(pump, p, flow, storage) + formulate_flow!(outlet, p, flow, storage) return nothing end @@ -1000,7 +1009,8 @@ function water_balance!( du .= 0.0 flow = get_tmp(connectivity.flow, u) - nonzeros(flow) .= 0.0 + # use parent to avoid materializing the ReinterpretArray from FixedSizeDiffCache + parent(flow) .= 0.0 current_area = get_tmp(basin.current_area, u) current_level = get_tmp(basin.current_level, u) diff --git a/core/test/run_models.jl b/core/test/run_models.jl index e95197086..db2b97f22 100644 --- a/core/test/run_models.jl +++ b/core/test/run_models.jl @@ -32,6 +32,11 @@ end end @test model isa Ribasim.Model + p = model.integrator.p + @test p isa Ribasim.Parameter + @test isconcretetype(typeof(p)) + @test all(isconcretetype, fieldtypes(typeof(p))) + @test successful_retcode(model) @test model.integrator.sol.u[end] ≈ Float32[519.8817, 519.8798, 339.3959, 1418.4331] skip = Sys.isapple() atol = 1.5 diff --git a/docs/contribute/addnode.qmd b/docs/contribute/addnode.qmd index b925824ce..3c15e841f 100644 --- a/docs/contribute/addnode.qmd +++ b/docs/contribute/addnode.qmd @@ -61,10 +61,10 @@ end ## Node behavior -In general if the new node type dictates flow, the behaviour of the new node in the Ribasim core is defined in a method of the `formulate!` function, which is called within the `water_balance!` (both in `solve.jl`) function being the right hand side of the system of differential equations solved by Ribasim. Here the details depend highly on the specifics of the node type. An example structure of a `formulate!` method is given below. +In general if the new node type dictates flow, the behaviour of the new node in the Ribasim core is defined in a method of the `formulate_flow!` function, which is called within the `water_balance!` (both in `solve.jl`) function being the right hand side of the system of differential equations solved by Ribasim. Here the details depend highly on the specifics of the node type. An example structure of a `formulate_flow!` method is given below. ```julia -function formulate!(new_node_type::NewNodeType, p::Parameters)::Nothing +function formulate_flow!(new_node_type::NewNodeType, p::Parameters)::Nothing # Retrieve relevant parameters (; connectivity) = p (; flow) = connectivity From ce69e3d9c75bd67d19df0892e9751e5b662a8858 Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Fri, 8 Sep 2023 17:05:52 +0200 Subject: [PATCH 2/4] s --- core/test/run_models.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/test/run_models.jl b/core/test/run_models.jl index db2b97f22..466bbe2bc 100644 --- a/core/test/run_models.jl +++ b/core/test/run_models.jl @@ -33,7 +33,7 @@ end @test model isa Ribasim.Model p = model.integrator.p - @test p isa Ribasim.Parameter + @test p isa Ribasim.Parameters @test isconcretetype(typeof(p)) @test all(isconcretetype, fieldtypes(typeof(p))) From 8a2f3ad1dcd8a188d4979da60440a6349803c8d1 Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Fri, 8 Sep 2023 17:49:34 +0200 Subject: [PATCH 3/4] avoid allocating value vectors in get_level `Dictionary` uses `Indices{I}` as keys, and `Vector{T}` as values. The Parameters contain both, and therefore it was free to construct a `Dictionary` in a frequently called function like `get_level`. However with autodiff, the values could be a ReinterpretArray with Duals instead of just a Vector. This meant that on Dictionary creation it would convert the ReinterpretArray to a Vector, leading to many allocations. This is on top of #581. After that, this was responsible for 94% of the time spent. With this PR that goes down to about 2%, leading to a nice little speedup. --- core/src/Ribasim.jl | 2 +- core/src/solve.jl | 2 +- core/src/utils.jl | 32 +++++++++++++++++--------------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/core/src/Ribasim.jl b/core/src/Ribasim.jl index 78b615aaa..8821ef2c8 100644 --- a/core/src/Ribasim.jl +++ b/core/src/Ribasim.jl @@ -12,7 +12,7 @@ using ComponentArrays: ComponentVector using DataInterpolations: LinearInterpolation, derivative using Dates using DBInterface: execute, prepare -using Dictionaries: Indices, Dictionary, gettoken, gettokenvalue, dictionary +using Dictionaries: Indices, Dictionary, gettoken, dictionary using ForwardDiff: pickchunksize using DiffEqCallbacks using Graphs: DiGraph, add_edge!, adjacency_matrix, inneighbors, outneighbors diff --git a/core/src/solve.jl b/core/src/solve.jl index 35ad28646..227ced273 100644 --- a/core/src/solve.jl +++ b/core/src/solve.jl @@ -968,7 +968,7 @@ end function formulate_flows!( p::Parameters, storage::AbstractVector, - current_level, + current_level::AbstractVector, flow::AbstractMatrix, t::Float64, )::Nothing diff --git a/core/src/utils.jl b/core/src/utils.jl index 13da0e4b6..d3ebcda82 100644 --- a/core/src/utils.jl +++ b/core/src/utils.jl @@ -406,34 +406,36 @@ end Get the current water level of a node ID. The ID can belong to either a Basin or a LevelBoundary. """ -function get_level(p::Parameters, node_id::Int, current_level, t::Float64)::Real +function get_level( + p::Parameters, + node_id::Int, + current_level::AbstractVector, + t::Float64, +)::Real (; basin, level_boundary) = p - # since the node_id fields are already Indices, Dictionary creation is instant - basin = Dictionary(basin.node_id, current_level) - hasindex, token = gettoken(basin, node_id) + hasindex, i = id_index(basin.node_id, node_id) return if hasindex - gettokenvalue(basin, token) + current_level[i] else - boundary = Dictionary(level_boundary.node_id, level_boundary.level) - boundary[node_id](t) + hasindex, i = id_index(level_boundary.node_id, node_id) + level_boundary.level[i](t) end end "Get the index of an ID in a set of indices." -function id_index(ids::Indices{Int}, id::Int) - # There might be a better approach for this, this feels too internal - # the second return is the token, a Tuple{Int, Int} - hasindex, (_, idx) = gettoken(ids, id) - return hasindex, idx +function id_index(ids::Indices{Int}, id::Int)::Tuple{Bool, Int} + # We avoid creating Dictionary here since it converts the values to a Vector, + # leading to allocations when used with PreallocationTools's ReinterpretArrays. + hasindex, (_, i) = gettoken(ids, id) + return hasindex, i end "Return the bottom elevation of the basin with index i, or nothing if it doesn't exist" function basin_bottom(basin::Basin, node_id::Int)::Union{Float64, Nothing} - basin = Dictionary(basin.node_id, basin.level) - hasindex, token = gettoken(basin, node_id) + hasindex, i = id_index(basin.node_id, node_id) return if hasindex # get level(storage) interpolation function - level_discrete = gettokenvalue(basin, token) + level_discrete = basin.level[i] # and return the first level in this vector, representing the bottom first(level_discrete) else From 5205db61b80203f09d36e20fdd0630f658f2ad75 Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Fri, 8 Sep 2023 18:19:28 +0200 Subject: [PATCH 4/4] level_boundary.node_id are not Indices Perhaps we should make them Indices at some point, if the O(log n) time spent doing searchsorted is much more than the hash table lookup. --- core/src/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/utils.jl b/core/src/utils.jl index d3ebcda82..d4b80baaa 100644 --- a/core/src/utils.jl +++ b/core/src/utils.jl @@ -417,7 +417,7 @@ function get_level( return if hasindex current_level[i] else - hasindex, i = id_index(level_boundary.node_id, node_id) + i = findsorted(level_boundary.node_id, node_id)::Int level_boundary.level[i](t) end end