From 1cad58ccea435a6f67c21f76ddc57fbd175de990 Mon Sep 17 00:00:00 2001 From: Jose Daniel Lara Date: Thu, 19 Oct 2023 13:52:37 -0600 Subject: [PATCH] enable 3D dataframes --- src/core/dataset.jl | 2 - src/core/results_by_time.jl | 13 ++ .../decision_model_simulation_results.jl | 140 ++++++++++++------ src/simulation/hdf_simulation_store.jl | 20 ++- 4 files changed, 128 insertions(+), 47 deletions(-) diff --git a/src/core/dataset.jl b/src/core/dataset.jl index af3c20368b..457f601296 100644 --- a/src/core/dataset.jl +++ b/src/core/dataset.jl @@ -276,9 +276,7 @@ function HDF5Dataset{2}( ) end - function get_column_names(::OptimizationContainerKey, s::HDF5Dataset) - s.column_names return s.column_names end diff --git a/src/core/results_by_time.jl b/src/core/results_by_time.jl index 32d77ede00..f35267e0c3 100644 --- a/src/core/results_by_time.jl +++ b/src/core/results_by_time.jl @@ -89,6 +89,19 @@ function make_dataframe( return df end +function make_dataframe( + results::ResultsByTime{DenseAxisArray{Float64, 3}}, + timestamp::Dates.DateTime, +) + df = DataFrames.DataFrame() + array = results.data[timestamp] + for idx in Iterators.product(array.axes[1:2]...) + df[!, "$(idx)"] = array[idx..., :].data + end + # _add_timestamps!(df, results, timestamp, array) + return df +end + function make_dataframe(results::ResultsByTime{Matrix{Float64}}, timestamp::Dates.DateTime) array = results.data[timestamp] df = DataFrames.DataFrame(array, results.column_names) diff --git a/src/simulation/decision_model_simulation_results.jl b/src/simulation/decision_model_simulation_results.jl index 96cbd04289..8739bd1ca4 100644 --- a/src/simulation/decision_model_simulation_results.jl +++ b/src/simulation/decision_model_simulation_results.jl @@ -114,63 +114,115 @@ function get_forecast_horizon(res::SimulationProblemResults{DecisionModelSimulat end function _get_store_value( - ::Type{T}, res::SimulationProblemResults{DecisionModelSimulationResults}, container_keys::Vector{<:OptimizationContainerKey}, timestamps, ::Nothing, -) where {T <: Union{Matrix{Float64}, DenseAxisArray{Float64, 2}}} +) simulation_store_path = joinpath(get_execution_path(res), "data_store") return open_store(HdfSimulationStore, simulation_store_path, "r") do store - _get_store_value(T, res, container_keys, timestamps, store) + _get_store_value(res, container_keys, timestamps, store) end end function _get_store_value( - ::Type{DenseAxisArray{Float64, 2}}, sim_results::SimulationProblemResults{DecisionModelSimulationResults}, container_keys::Vector{<:OptimizationContainerKey}, timestamps, store::SimulationStore, ) - base_power = get_model_base_power(sim_results) - results_by_key = - Dict{OptimizationContainerKey, ResultsByTime{DenseAxisArray{Float64, 2}}}() + results_by_key = Dict{OptimizationContainerKey, ResultsByTime}() model_name = Symbol(get_model_name(sim_results)) + for ckey in container_keys + n_dims = get_number_of_dimensions(store, DecisionModelIndexType, model_name, ckey) + container_type = DenseAxisArray{Float64, n_dims + 1} + results_by_key[ckey] = _get_store_value(container_type, + sim_results, + ckey, + timestamps, store) + end + return results_by_key +end + +function _get_store_value( + ::Type{T}, + sim_results::SimulationProblemResults{DecisionModelSimulationResults}, + key::OptimizationContainerKey, + timestamps, + store::SimulationStore, +) where {T <: DenseAxisArray{Float64, 2}} resolution = get_resolution(sim_results) horizon = get_forecast_horizon(sim_results) + base_power = get_model_base_power(sim_results) + results_by_time = ResultsByTime( + key, + SortedDict{Dates.DateTime, T}(), + resolution, + get_column_names(store, DecisionModelIndexType, model_name, key), + ) + array_size::Union{Nothing, Tuple{Int, Int}} = nothing + for ts in timestamps + array = read_result(DenseAxisArray, store, model_name, key, ts) + if isnothing(array_size) + array_size = size(array) + elseif size(array) != array_size + error( + "Arrays for $(encode_key_as_string(key)) at different timestamps have different sizes", + ) + end + if convert_result_to_natural_units(key) + array.data .*= base_power + end + if array_size[2] != horizon + @warn "$(encode_key_as_string(key)) has a different horizon than the " * + "problem specification. Can't assign timestamps to the resulting DataFrame." + results_by_time.resolution = Dates.Period(Dates.Millisecond(0)) + end + results_by_time[ts] = array + end - for key in container_keys - results_by_time = ResultsByTime( - key, - SortedDict{Dates.DateTime, DenseAxisArray{Float64, 2}}(), - resolution, - get_column_names(store, DecisionModelIndexType, model_name, key), - ) - array_size::Union{Nothing, Tuple{Int, Int}} = nothing - for ts in timestamps - array = read_result(DenseAxisArray, store, model_name, key, ts) - if isnothing(array_size) - array_size = size(array) - elseif size(array) != array_size - error( - "Arrays for $(encode_key_as_string(key)) at different timestamps have different sizes", - ) - end - if convert_result_to_natural_units(key) - array.data .*= base_power - end - if array_size[2] != horizon - @warn "$(encode_key_as_string(key)) has a different horizon than the " * - "problem specification. Can't assign timestamps to the resulting DataFrame." - results_by_time.resolution = Dates.Period(Dates.Millisecond(0)) - end - results_by_time[ts] = array + return results_by_time +end + +function _get_store_value( + ::Type{T}, + sim_results::SimulationProblemResults{DecisionModelSimulationResults}, + key::OptimizationContainerKey, + timestamps, + store::SimulationStore, +) where {T <: DenseAxisArray{Float64, 3}} + resolution = get_resolution(sim_results) + horizon = get_forecast_horizon(sim_results) + base_power = get_model_base_power(sim_results) + model_name = Symbol(get_model_name(sim_results)) + results_by_time = ResultsByTime( + key, + SortedDict{Dates.DateTime, T}(), + resolution, + get_column_names(store, DecisionModelIndexType, model_name, key), + ) + array_size::Union{Nothing, Tuple{Int, Int, Int}} = nothing + for ts in timestamps + array = read_result(DenseAxisArray, store, model_name, key, ts) + if isnothing(array_size) + array_size = size(array) + elseif size(array) != array_size + error( + "Arrays for $(encode_key_as_string(key)) at different timestamps have different sizes", + ) end - results_by_key[key] = results_by_time + if convert_result_to_natural_units(key) + array.data .*= base_power + end + if array_size[3] != horizon + @warn "$(encode_key_as_string(key)) has a different horizon than the " * + "problem specification. Can't assign timestamps to the resulting DataFrame." + results_by_time.resolution = Dates.Period(Dates.Millisecond(0)) + end + results_by_time[ts] = array end - return results_by_key + return results_by_time end function _get_store_value( @@ -186,6 +238,12 @@ function _get_store_value( resolution = get_resolution(sim_results) for key in container_keys + n_dims = get_number_of_dimensions(store, DecisionModelIndexType, model_name, key) + if n_dims != 1 + error( + "The number of dimensions $(n_dims) is not supported for $(encode_key_as_string(key))", + ) + end results_by_time = ResultsByTime{Matrix{Float64}, 1}( key, SortedDict{Dates.DateTime, Matrix{Float64}}(), @@ -233,13 +291,13 @@ function _process_timestamps( end function _read_results( - ::Type{T}, res::SimulationProblemResults{DecisionModelSimulationResults}, result_keys, timestamps::Vector{Dates.DateTime}, store::Union{Nothing, <:SimulationStore}, -) where {T <: Union{Matrix{Float64}, DenseAxisArray{Float64, 2}}} - isempty(result_keys) && return Dict{OptimizationContainerKey, ResultsByTime{T}}() +) + isempty(result_keys) && + return Dict{OptimizationContainerKey, ResultsByTime{DenseAxisArray{Float64, 2}}}() if store === nothing && res.store !== nothing # In this case we have an InMemorySimulationStore. @@ -253,7 +311,7 @@ function _read_results( vals = Dict(k => cached_results[k] for k in result_keys) else @debug "reading results from data store" - vals = _get_store_value(T, res, result_keys, timestamps, store) + vals = _get_store_value(res, result_keys, timestamps, store) end return vals end @@ -285,9 +343,7 @@ function read_variable( ) key = _deserialize_key(VariableKey, res, args...) timestamps = _process_timestamps(res, initial_time, count) - return make_dataframes( - _read_results(DenseAxisArray{Float64, 2}, res, [key], timestamps, store)[key], - ) + return make_dataframes(_read_results(res, [key], timestamps, store)[key]) end """ diff --git a/src/simulation/hdf_simulation_store.jl b/src/simulation/hdf_simulation_store.jl index 313759f01b..fe235e6a79 100644 --- a/src/simulation/hdf_simulation_store.jl +++ b/src/simulation/hdf_simulation_store.jl @@ -459,6 +459,23 @@ function get_column_names( return get_column_names(key, dataset) end +function get_number_of_dimensions( + store::HdfSimulationStore, + i::Type{DecisionModelIndexType}, + model_name::Symbol, + key::OptimizationContainerKey, +) + return length(get_column_names(store, i, model_name, key)) +end + +function get_number_of_dimensions( + store::HdfSimulationStore, + i::Type{EmulationModelIndexType}, + key::OptimizationContainerKey, +) + return length(get_column_names(store, i, model_name, key)) +end + function get_emulation_model_dataset_size( store::HdfSimulationStore, key::OptimizationContainerKey, @@ -746,8 +763,6 @@ function _deserialize_attributes!(store::HdfSimulationStore) get_resolution(get_decision_model_params(store, model_name)) dims = (horizon, size(dataset)[2:end]..., size(dataset)[1]) n_dims = max(1, ndims(dataset) - 2) - @error n_dims - @error dims item = HDF5Dataset{n_dims}( dataset, column_dataset, @@ -1055,7 +1070,6 @@ function _write_dataset!( array::Array{Float64, 4}, row_range::UnitRange{Int64}, ) - @show dataset[:, :, :, row_range] = array @debug "wrote dataset" dataset row_range return