From 280adfeafc66570bf614b805e53eb934ab456705 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Mon, 24 Jun 2024 22:52:13 -0300 Subject: [PATCH 01/34] minimally working --- script.jl | 51 +++++++++ src/PSRDatabaseSQLite/attribute.jl | 11 ++ src/PSRDatabaseSQLite/collection.jl | 107 +++++++++++++++++- src/PSRDatabaseSQLite/create.jl | 55 +++++++++ src/PSRDatabaseSQLite/database_sqlite.jl | 23 ++++ src/PSRDatabaseSQLite/utils.jl | 3 - src/PSRDatabaseSQLite/validate.jl | 42 ++++++- .../test_create/test_create_time_series.sql | 58 ++++++++++ 8 files changed, 339 insertions(+), 11 deletions(-) create mode 100644 script.jl create mode 100644 test/PSRDatabaseSQLite/test_create/test_create_time_series.sql diff --git a/script.jl b/script.jl new file mode 100644 index 00000000..28b40855 --- /dev/null +++ b/script.jl @@ -0,0 +1,51 @@ +using PSRClassesInterface.PSRDatabaseSQLite +using SQLite +using DataFrames +using Dates +using Test + +db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") +GC.gc() +GC.gc() +if isfile(db_path) + rm(db_path) +end + +function test_create_time_series() + path_schema = raw"C:\Users\guilhermebodin\Documents\Github\PSRClassesInterface.jl\test\PSRDatabaseSQLite\test_create\test_create_time_series.sql" + db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + for i in 1:3 + df_timeseries_group1 = DataFrame( + date = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0], + some_vector2 = [2.0, 3.0] + ) + df_timeseries_group2 = DataFrame( + date = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + block = [1, 2, 1, 2], + some_vector3 = [1.0, 2.0, 3.0, 4.0], + ) + df_timeseries_group3 = DataFrame( + date = [DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001), DateTime(2001), DateTime(2001)], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_timeseries_group1, + group2 = df_timeseries_group2, + group3 = df_timeseries_group3 + ) + end + PSRDatabaseSQLite.close!(db) + rm(db_path) + @test true +end + +test_create_time_series() \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/attribute.jl b/src/PSRDatabaseSQLite/attribute.jl index 6cf2c1f9..7f4ad7e2 100644 --- a/src/PSRDatabaseSQLite/attribute.jl +++ b/src/PSRDatabaseSQLite/attribute.jl @@ -99,6 +99,17 @@ mutable struct VectorRelation{T} <: VectorAttribute end end +mutable struct TimeSeries{T} <: VectorAttribute + id::String + type::Type{T} + default_value::Union{Missing, T} + not_null::Bool + group_id::String + parent_collection::String + table_where_is_located::String + dimension_names::Vector{String} +end + mutable struct TimeSeriesFile{T} <: ReferenceToFileAttribute id::String type::Type{T} diff --git a/src/PSRDatabaseSQLite/collection.jl b/src/PSRDatabaseSQLite/collection.jl index 080275f8..8e8b585c 100644 --- a/src/PSRDatabaseSQLite/collection.jl +++ b/src/PSRDatabaseSQLite/collection.jl @@ -10,6 +10,7 @@ mutable struct Collection scalar_relations::OrderedDict{String, ScalarRelation} vector_parameters::OrderedDict{String, VectorParameter} vector_relations::OrderedDict{String, VectorRelation} + time_series::OrderedDict{String, TimeSeries} time_series_files::OrderedDict{String, TimeSeriesFile} end @@ -27,7 +28,8 @@ function _create_collections_map!( scalar_relations = _create_collection_scalar_relations(db, collection_id) vector_parameters = _create_collection_vector_parameters(db, collection_id) vector_relations = _create_collection_vector_relations(db, collection_id) - time_series = _get_collection_time_series(db, collection_id) + time_series = _create_collection_time_series(db, collection_id) + time_series_files = _create_collection_time_series_files(db, collection_id) collection = Collection( collection_id, scalar_parameters, @@ -35,6 +37,7 @@ function _create_collections_map!( vector_parameters, vector_relations, time_series, + time_series_files, ) collections_map[collection_id] = collection end @@ -159,7 +162,7 @@ function _create_collection_vector_parameters(db::SQLite.DB, collection_id::Stri not_null = Bool(vector_attribute.notnull) if haskey(vector_parameters, id) psr_database_sqlite_error( - "Duplicated vector parameter \"$name\" in collection \"$collection_id\"", + "Duplicated vector parameter \"$id\" in collection \"$collection_id\"", ) end vector_parameters[id] = VectorParameter( @@ -237,8 +240,76 @@ function _create_collection_vector_relations(db::SQLite.DB, collection_id::Strin return vector_relations end -function _get_collection_time_series(db::SQLite.DB, collection_id::String) - time_series_table = _get_collection_time_series_tables(db, collection_id) +function _get_timeseries_dimension_names(df_table_infos::DataFrame) + dimension_names = Vector{String}(undef, 0) + for timeseries_attribute in eachrow(df_table_infos) + if timeseries_attribute.name == "id" + continue + end + if timeseries_attribute.pk != 0 + push!(dimension_names, timeseries_attribute.name) + end + end + return dimension_names +end + +function _create_collection_time_series(db::SQLite.DB, collection_id::String) + time_series_tables = _get_collection_time_series_tables(db, collection_id) + time_series = OrderedDict{String, TimeSeries}() + parent_collection = collection_id + for table_name in time_series_tables + group_id = _id_of_timeseries_group(table_name) + table_where_is_located = table_name + df_table_infos = table_info(db, table_name) + dimension_names = _get_timeseries_dimension_names(df_table_infos) + for timeseries_attribute in eachrow(df_table_infos) + id = timeseries_attribute.name + if id == "id" || id == "date" + # These are obligatory for every vector table + # and have no point in being stored in the database definition. + if timeseries_attribute.pk == 0 + psr_database_sqlite_error( + "Invalid table \"$(table_name)\" of timeseries attributes of collection \"$(collection_id)\". " * + "The column \"$(timeseries_attribute.name)\" is not a primary key but it should.", + ) + end + continue + end + # There is no point in storing the other primary keys of these tables + if timeseries_attribute.pk != 0 + if _sql_type_to_julia_type(id, timeseries_attribute.type) != Int64 + psr_database_sqlite_error( + "Invalid table \"$(table_name)\" of timeseries attributes of collection \"$(collection_id)\". " * + "The column \"$(timeseries_attribute.name)\" is not an integer primary key but it should." + ) + end + continue + end + type = _sql_type_to_julia_type(id, timeseries_attribute.type) + default_value = _get_default_value(type, timeseries_attribute.dflt_value) + not_null = Bool(timeseries_attribute.notnull) + if haskey(time_series, id) + psr_database_sqlite_error( + "Duplicated timeseries attribute \"$id\" in collection \"$collection_id\"", + ) + end + time_series[id] = TimeSeries( + id, + type, + default_value, + not_null, + group_id, + parent_collection, + table_where_is_located, + dimension_names, + ) + end + end + return time_series +end + +function _create_collection_time_series_files(db::SQLite.DB, collection_id::String) + time_series_table = _get_collection_time_series_files_tables(db, collection_id) time_series = OrderedDict{String, TimeSeriesFile}() df_table_infos = table_info(db, time_series_table) for time_series_id in eachrow(df_table_infos) @@ -332,7 +403,27 @@ function _id_of_vector_group(table_name::String) return string(matches.captures[1]) end -function _get_collection_time_series_tables(::SQLite.DB, collection_id::String) +function _id_of_timeseries_group(table_name::String) + matches = match(r"_timeseries_(.*)", table_name) + return string(matches.captures[1]) +end + +function _get_collection_time_series_tables( + sqlite_db::SQLite.DB, + collection_id::String, +) + tables = SQLite.tables(sqlite_db) + time_series_tables = Vector{String}(undef, 0) + for table in tables + table_name = table.name + if _is_collection_time_series_table_name(table_name, collection_id) + push!(time_series_tables, table_name) + end + end + return time_series_tables +end + +function _get_collection_time_series_files_tables(::SQLite.DB, collection_id::String) return string(collection_id, "_timeseriesfiles") end @@ -387,6 +478,7 @@ function _validate_collections(collections_map::OrderedDict{String, Collection}) num_errors = 0 for (_, collection) in collections_map num_errors += _no_duplicated_attributes(collection) + num_errors += _no_duplicated_groups(collection) num_errors += _all_scalar_parameters_are_in_same_table(collection) num_errors += _relations_do_not_have_null_constraints(collection) num_errors += _relations_do_not_have_default_values(collection) @@ -419,6 +511,11 @@ function _no_duplicated_attributes(collection::Collection) return num_errors end +function _no_duplicated_groups(collection::Collection) + @warn "must write this function _no_duplicated_groups" + return 0 +end + function _all_scalar_parameters_are_in_same_table(collection::Collection) num_errors = 0 scalar_parameters = collection.scalar_parameters diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 27bfdd7a..0866e55e 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -51,6 +51,7 @@ function _create_vector_group!( vector_index = collect(1:num_values) DataFrames.insertcols!(df, 1, :vector_index => vector_index) DataFrames.insertcols!(df, 1, :id => ids) + # Code to insert rows without using a transaction cols = join(string.(names(df)), ", ") num_cols = size(df, 2) for row in eachrow(df) @@ -103,6 +104,45 @@ function _create_vectors!( return nothing end +function _create_time_series!( + db::DatabaseSQLite, + collection_id::String, + id::Integer, + dict_timeseries_attributes, +) + for (group, df) in dict_timeseries_attributes + timeseries_group_table_name = _timeseries_group_table_name(collection_id, string(group)) + ids = fill(id, nrow(df)) + DataFrames.insertcols!(df, 1, :id => ids) + # Convert datetime column to string + df[!, :date] = string.(df[!, :date]) + # Add missing columns + missing_names_in_df = setdiff(_attributes_in_timeseries_group(db, collection_id, string(group)), string.(names(df))) + for missing_attribute in missing_names_in_df + df[!, Symbol(missing_attribute)] = fill(missing, nrow(df)) + end + + # Code to insert rows without using a transaction + cols = join(string.(names(df)), ", ") + num_cols = size(df, 2) + for row in eachrow(df) + query = "INSERT INTO $timeseries_group_table_name ($cols) VALUES (" + for (i, value) in enumerate(row) + if ismissing(value) + query *= "NULL, " + else + query *= "\'$value\', " + end + if i == num_cols + query = query[1:end-2] + query *= ")" + end + end + DBInterface.execute(db.sqlite_db, query) + end + end +end + function _create_element!( db::DatabaseSQLite, collection_id::String; @@ -111,7 +151,9 @@ function _create_element!( _throw_if_collection_does_not_exist(db, collection_id) dict_scalar_attributes = Dict{Symbol, Any}() dict_vector_attributes = Dict{Symbol, Any}() + dict_timeseries_attributes = Dict{Symbol, Any}() + # Validate that the arguments will be valid for (key, value) in kwargs if isa(value, AbstractVector) _throw_if_not_vector_attribute(db, collection_id, string(key)) @@ -121,6 +163,10 @@ function _create_element!( ) end dict_vector_attributes[key] = value + elseif isa(value, DataFrame) + _throw_if_not_timeseries_group(db, collection_id, string(key)) + @warn("Still not validating types of the time series on creation.") + dict_timeseries_attributes[key] = value else _throw_if_is_time_series_file(db, collection_id, string(key)) _throw_if_not_scalar_attribute(db, collection_id, string(key)) @@ -146,6 +192,15 @@ function _create_element!( _create_vectors!(db, collection_id, id, dict_vector_attributes) end + if !isempty(dict_timeseries_attributes) + id = get( + dict_scalar_attributes, + :id, + _get_id(db, collection_id, dict_scalar_attributes[:label]), + ) + _create_time_series!(db, collection_id, id, dict_timeseries_attributes) + end + return nothing end diff --git a/src/PSRDatabaseSQLite/database_sqlite.jl b/src/PSRDatabaseSQLite/database_sqlite.jl index cc221d9d..44fab1f2 100644 --- a/src/PSRDatabaseSQLite/database_sqlite.jl +++ b/src/PSRDatabaseSQLite/database_sqlite.jl @@ -240,10 +240,29 @@ function _map_of_groups_to_vector_attributes( return map_of_groups_to_vector_attributes end +function _attributes_in_timeseries_group( + db::DatabaseSQLite, + collection_id::String, + group_id::String +) + collection = _get_collection(db, collection_id) + attributes_in_timeseries_group = Vector{String}(undef, 0) + for (_, attribute) in collection.time_series + if attribute.group_id == group_id + push!(attributes_in_timeseries_group, attribute.id) + end + end + return attributes_in_timeseries_group +end + function _vectors_group_table_name(collection_id::String, group::String) return string(collection_id, "_vector_", group) end +function _timeseries_group_table_name(collection_id::String, group::String) + return string(collection_id, "_timeseries_", group) +end + function _is_collection_id(name::String) # Collections don't have underscores in their names return !occursin("_", name) @@ -253,6 +272,10 @@ function _is_collection_vector_table_name(name::String, collection_id::String) return startswith(name, "$(collection_id)_vector_") end +function _is_collection_time_series_table_name(name::String, collection_id::String) + return startswith(name, "$(collection_id)_timeseries_") +end + _get_collection_ids(db::DatabaseSQLite) = collect(keys(db.collections_map)) function _get_collection_ids(db::SQLite.DB) tables = SQLite.tables(db) diff --git a/src/PSRDatabaseSQLite/utils.jl b/src/PSRDatabaseSQLite/utils.jl index 699c93d3..5aeffe47 100644 --- a/src/PSRDatabaseSQLite/utils.jl +++ b/src/PSRDatabaseSQLite/utils.jl @@ -127,6 +127,3 @@ function table_names(db::SQLite.DB) end return tables end - -_timeseries_table_name(table::String) = table * "_timeseriesfiles" -_relation_table_name(table_1::String, table_2::String) = table_1 * "_relation_" * table_2 diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 81211ae5..04d620f9 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -18,7 +18,15 @@ _is_valid_table_vector_name(table::String) = ), ) -_is_valid_table_timeseries_name(table::String) = +_is_valid_time_series_name(table::String) = + !isnothing( + match( + r"^(?:[A-Z][a-z]*)+_timeseries_[a-z][a-z0-9]*(?:_{1}[a-z0-9]+)*$", + table, + ), + ) + +_is_valid_table_timeseriesfiles_name(table::String) = !isnothing(match(r"^(?:[A-Z][a-z]*)+_timeseriesfiles", table)) _is_valid_time_series_attribute_value(value::String) = @@ -68,6 +76,22 @@ function _validate_table(db::SQLite.DB, table::String) end function _validate_timeseries_table(db::SQLite.DB, table::String) + attributes = column_names(db, table) + num_errors = 0 + if !("id" in attributes) + @error("Table $table is a timeseries table and does not have an \"id\" column.") + num_errors += 1 + end + if !("date" in attributes) + @error( + "Table $table is a timeseries table and does not have an \"date\" column.", + ) + num_errors += 1 + end + return num_errors +end + +function _validate_timeseriesfiles_table(db::SQLite.DB, table::String) attributes = column_names(db, table) num_errors = 0 if ("id" in attributes) @@ -124,7 +148,9 @@ function _validate_database(db::SQLite.DB) end if _is_valid_table_name(table) num_errors += _validate_table(db, table) - elseif _is_valid_table_timeseries_name(table) + elseif _is_valid_table_timeseriesfiles_name(table) + num_errors += _validate_timeseriesfiles_table(db, table) + elseif _is_valid_time_series_name(table) num_errors += _validate_timeseries_table(db, table) elseif _is_valid_table_vector_name(table) num_errors += _validate_vector_table(db, table) @@ -134,7 +160,8 @@ function _validate_database(db::SQLite.DB) Valid table name formats are: - Collections: NameOfCollection - Vector attributes: NameOfCollection_vector_group_id - - Time series: NameOfCollection_timeseriesfiles + - Time series: NameOfCollection_timeseries_group_id + - Time series files: NameOfCollection_timeseriesfiles """) num_errors += 1 end @@ -314,6 +341,15 @@ function _throw_if_relation_does_not_exist( end end +function _throw_if_not_timeseries_group( + db::DatabaseSQLite, + collection::String, + group::String, +) + @warn("We are not validating that if it is a valid group") + return nothing +end + function _throw_if_is_time_series_file( db::DatabaseSQLite, collection::String, diff --git a/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql b/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql new file mode 100644 index 00000000..7936b997 --- /dev/null +++ b/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql @@ -0,0 +1,58 @@ +PRAGMA user_version = 1; +PRAGMA foreign_keys = ON; + +CREATE TABLE Configuration ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + value1 REAL NOT NULL DEFAULT 100, + enum1 TEXT NOT NULL DEFAULT 'A' CHECK(enum1 IN ('A', 'B', 'C')) +) STRICT; + + +CREATE TABLE Resource ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + type TEXT NOT NULL DEFAULT "D" +) STRICT; + +CREATE TABLE Resource_timeseries_group1 ( + id INTEGER, + date TEXT NOT NULL, + some_vector1 REAL, + some_vector2 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date) +) STRICT; + +CREATE TABLE Resource_timeseries_group2 ( + id INTEGER, + date TEXT NOT NULL, + block INTEGER NOT NULL, + some_vector3 REAL, + some_vector4 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date, block) +) STRICT; + +CREATE TABLE Resource_timeseries_group3 ( + id INTEGER, + date TEXT NOT NULL, + block INTEGER NOT NULL, + segment INTEGER NOT NULL, + some_vector5 REAL, + some_vector6 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date, block, segment) +) STRICT; + +CREATE TABLE Resource_timeseries_group4 ( + id INTEGER, + date TEXT NOT NULL, + block INTEGER NOT NULL, + segment INTEGER NOT NULL, + some_other_dimension INTEGER NOT NULL, + some_vector7 REAL, + some_vector8 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date, block, segment, some_other_dimension) +) STRICT; \ No newline at end of file From 8e602160afa5fe3557e42219ca4ff75b00cf1608 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Wed, 26 Jun 2024 12:46:26 -0300 Subject: [PATCH 02/34] updates --- script.jl | 54 ++++++++-- src/PSRDatabaseSQLite/attribute.jl | 1 + src/PSRDatabaseSQLite/collection.jl | 9 +- src/PSRDatabaseSQLite/create.jl | 70 ++++++------ src/PSRDatabaseSQLite/database_sqlite.jl | 37 ++++++- src/PSRDatabaseSQLite/read.jl | 100 +++++++++++++++++- src/PSRDatabaseSQLite/validate.jl | 65 ++++++++---- .../test_create/test_create_time_series.sql | 16 +-- 8 files changed, 263 insertions(+), 89 deletions(-) diff --git a/script.jl b/script.jl index 28b40855..890e86cb 100644 --- a/script.jl +++ b/script.jl @@ -18,21 +18,21 @@ function test_create_time_series() PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) for i in 1:3 df_timeseries_group1 = DataFrame( - date = [DateTime(2000), DateTime(2001)], - some_vector1 = [1.0, 2.0], - some_vector2 = [2.0, 3.0] + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0] .* i, + some_vector2 = [2.0, 3.0] .* i ) df_timeseries_group2 = DataFrame( - date = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], block = [1, 2, 1, 2], - some_vector3 = [1.0, 2.0, 3.0, 4.0], + some_vector3 = [1.0, missing, 3.0, 4.0] .* i, ) df_timeseries_group3 = DataFrame( - date = [DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001), DateTime(2001), DateTime(2001)], + date_time = [DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001), DateTime(2001), DateTime(2009)], block = [1, 1, 1, 1, 2, 2, 2, 2], segment = [1, 2, 3, 4, 1, 2, 3, 4], - some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], - some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, ) PSRDatabaseSQLite.create_element!( db, @@ -43,6 +43,44 @@ function test_create_time_series() group3 = df_timeseries_group3 ) end + + results = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource 1"; + date_time = DateTime(2000) + ) + @show results + + results = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource 1"; + date_time = DateTime(2002) + ) + @show results + + results = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource 1" + ) + @show results + + @show labels = PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") + + results = PSRDatabaseSQLite.read_time_series_dfs( + db, + "Resource", + "some_vector5"; + date_time = DateTime(2010) + ) + @show results + + PSRDatabaseSQLite.close!(db) rm(db_path) @test true diff --git a/src/PSRDatabaseSQLite/attribute.jl b/src/PSRDatabaseSQLite/attribute.jl index 7f4ad7e2..41e92127 100644 --- a/src/PSRDatabaseSQLite/attribute.jl +++ b/src/PSRDatabaseSQLite/attribute.jl @@ -108,6 +108,7 @@ mutable struct TimeSeries{T} <: VectorAttribute parent_collection::String table_where_is_located::String dimension_names::Vector{String} + num_dimensions::Int end mutable struct TimeSeriesFile{T} <: ReferenceToFileAttribute diff --git a/src/PSRDatabaseSQLite/collection.jl b/src/PSRDatabaseSQLite/collection.jl index 8e8b585c..372af693 100644 --- a/src/PSRDatabaseSQLite/collection.jl +++ b/src/PSRDatabaseSQLite/collection.jl @@ -264,7 +264,7 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) dimension_names = _get_timeseries_dimension_names(df_table_infos) for timeseries_attribute in eachrow(df_table_infos) id = timeseries_attribute.name - if id == "id" || id == "date" + if id == "id" || id == "date_time" # These are obligatory for every vector table # and have no point in being stored in the database definition. if timeseries_attribute.pk == 0 @@ -302,6 +302,7 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) parent_collection, table_where_is_located, dimension_names, + length(dimension_names), ) end end @@ -478,7 +479,6 @@ function _validate_collections(collections_map::OrderedDict{String, Collection}) num_errors = 0 for (_, collection) in collections_map num_errors += _no_duplicated_attributes(collection) - num_errors += _no_duplicated_groups(collection) num_errors += _all_scalar_parameters_are_in_same_table(collection) num_errors += _relations_do_not_have_null_constraints(collection) num_errors += _relations_do_not_have_default_values(collection) @@ -511,11 +511,6 @@ function _no_duplicated_attributes(collection::Collection) return num_errors end -function _no_duplicated_groups(collection::Collection) - @warn "must write this function _no_duplicated_groups" - return 0 -end - function _all_scalar_parameters_are_in_same_table(collection::Collection) num_errors = 0 scalar_parameters = collection.scalar_parameters diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 0866e55e..66113e47 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -21,6 +21,32 @@ function _create_scalar_attributes!( return nothing end +function _insert_vectors_from_df( + db::DatabaseSQLite, + df::DataFrame, + table_name::String +) + # Code to insert rows without using a transaction + cols = join(string.(names(df)), ", ") + num_cols = size(df, 2) + for row in eachrow(df) + query = "INSERT INTO $table_name ($cols) VALUES (" + for (i, value) in enumerate(row) + if ismissing(value) + query *= "NULL, " + else + query *= "\'$value\', " + end + if i == num_cols + query = query[1:end-2] + query *= ")" + end + end + DBInterface.execute(db.sqlite_db, query) + end + return nothing +end + function _create_vector_group!( db::DatabaseSQLite, collection_id::String, @@ -51,24 +77,7 @@ function _create_vector_group!( vector_index = collect(1:num_values) DataFrames.insertcols!(df, 1, :vector_index => vector_index) DataFrames.insertcols!(df, 1, :id => ids) - # Code to insert rows without using a transaction - cols = join(string.(names(df)), ", ") - num_cols = size(df, 2) - for row in eachrow(df) - query = "INSERT INTO $vectors_group_table_name ($cols) VALUES (" - for (i, value) in enumerate(row) - if ismissing(value) - query *= "NULL, " - else - query *= "\'$value\', " - end - if i == num_cols - query = query[1:end-2] - query *= ")" - end - end - DBInterface.execute(db.sqlite_db, query) - end + _insert_vectors_from_df(db, df, vectors_group_table_name) return nothing end @@ -115,31 +124,13 @@ function _create_time_series!( ids = fill(id, nrow(df)) DataFrames.insertcols!(df, 1, :id => ids) # Convert datetime column to string - df[!, :date] = string.(df[!, :date]) + df[!, :date_time] = string.(df[!, :date_time]) # Add missing columns missing_names_in_df = setdiff(_attributes_in_timeseries_group(db, collection_id, string(group)), string.(names(df))) for missing_attribute in missing_names_in_df df[!, Symbol(missing_attribute)] = fill(missing, nrow(df)) end - - # Code to insert rows without using a transaction - cols = join(string.(names(df)), ", ") - num_cols = size(df, 2) - for row in eachrow(df) - query = "INSERT INTO $timeseries_group_table_name ($cols) VALUES (" - for (i, value) in enumerate(row) - if ismissing(value) - query *= "NULL, " - else - query *= "\'$value\', " - end - if i == num_cols - query = query[1:end-2] - query *= ")" - end - end - DBInterface.execute(db.sqlite_db, query) - end + _insert_vectors_from_df(db, df, timeseries_group_table_name) end end @@ -165,7 +156,6 @@ function _create_element!( dict_vector_attributes[key] = value elseif isa(value, DataFrame) _throw_if_not_timeseries_group(db, collection_id, string(key)) - @warn("Still not validating types of the time series on creation.") dict_timeseries_attributes[key] = value else _throw_if_is_time_series_file(db, collection_id, string(key)) @@ -323,7 +313,7 @@ function _validate_attribute_types_on_creation!( collection_id, label_or_id, dict_scalar_attributes, - dict_vector_attributes, + dict_vector_attributes ) return nothing end diff --git a/src/PSRDatabaseSQLite/database_sqlite.jl b/src/PSRDatabaseSQLite/database_sqlite.jl index 44fab1f2..26c1abff 100644 --- a/src/PSRDatabaseSQLite/database_sqlite.jl +++ b/src/PSRDatabaseSQLite/database_sqlite.jl @@ -3,13 +3,18 @@ mutable struct DatabaseSQLite collections_map::OrderedDict{String, Collection} end +function _set_default_pragmas!(sqlite_db::SQLite.DB) + DBInterface.execute(sqlite_db, "PRAGMA busy_timeout = 5000;") + return nothing +end + function DatabaseSQLite_from_schema( database_path::String; path_schema::String = "", ) sqlite_db = SQLite.DB(database_path) - DBInterface.execute(sqlite_db, "PRAGMA busy_timeout = 5000;") + _set_default_pragmas!(sqlite_db) collections_map = try execute_statements(sqlite_db, path_schema) @@ -34,7 +39,7 @@ function DatabaseSQLite_from_migrations( ) sqlite_db = SQLite.DB(database_path) - DBInterface.execute(sqlite_db, "PRAGMA busy_timeout = 5000;") + _set_default_pragmas!(sqlite_db) collections_map = try current_version = get_user_version(sqlite_db) @@ -70,7 +75,7 @@ function DatabaseSQLite( read_only ? SQLite.DB("file:" * database_path * "?mode=ro&immutable=1") : SQLite.DB(database_path) - DBInterface.execute(sqlite_db, "PRAGMA busy_timeout = 5000;") + _set_default_pragmas!(sqlite_db) collections_map = try _validate_database(sqlite_db) @@ -123,6 +128,29 @@ function _is_vector_relation( return haskey(collection.vector_relations, attribute_id) end +function _is_time_series( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, +) + collection = _get_collection(db, collection_id) + return haskey(collection.time_series, attribute_id) +end + +function _is_timeseries_group( + db::DatabaseSQLite, + collection_id::String, + group_id::String, +) + collection = _get_collection(db, collection_id) + for (_, attribute) in collection.time_series + if attribute.group_id == group_id + return true + end + end + return false +end + function _is_time_series_file( db::DatabaseSQLite, collection_id::String, @@ -157,6 +185,8 @@ function _get_attribute( return collection.scalar_relations[attribute_id] elseif _is_vector_relation(db, collection_id, attribute_id) return collection.vector_relations[attribute_id] + elseif _is_time_series(db, collection_id, attribute_id) + return collection.time_series[attribute_id] elseif _is_time_series_file(db, collection_id, attribute_id) return collection.time_series_files[attribute_id] else @@ -206,6 +236,7 @@ function _attribute_exists( _is_vector_parameter(db, collection_id, attribute_id) || _is_scalar_relation(db, collection_id, attribute_id) || _is_vector_relation(db, collection_id, attribute_id) || + _is_time_series(db, collection_id, attribute_id) || _is_time_series_file(db, collection_id, attribute_id) end diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 29fbe163..ce4830fe 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -62,9 +62,7 @@ function read_scalar_parameter( :read, ) - attribute = _get_attribute(db, collection_id, attribute_id) - table = _table_where_is_located(attribute) - id = _get_id(db, table, label) + id = _get_id(db, collection_id, label) return read_scalar_parameter(db, collection_id, attribute_id, id; default) end @@ -146,6 +144,102 @@ function _query_vector( return results end +function read_time_series_dfs( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String; + read_exact_date::Bool = false, + dimensions..., +) + _throw_if_attribute_is_not_time_series( + db, + collection_id, + attribute_id, + :read, + ) + attribute = _get_attribute(db, collection_id, attribute_id) + ids_in_table = read_scalar_parameters(db, collection_id, "id") + + results = DataFrame[] + for id in ids_in_table + push!(results, _read_time_series_df(db, collection_id, attribute, id; read_exact_date, dimensions...)) + end + + return results +end + +function read_time_series_df( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + label::String; + read_exact_date::Bool = false, + dimensions..., +) + _throw_if_attribute_is_not_time_series( + db, + collection_id, + attribute_id, + :read, + ) + attribute = _get_attribute(db, collection_id, attribute_id) + id = _get_id(db, collection_id, label) + + return _read_time_series_df( + db, + collection_id, + attribute, + id; + read_exact_date, + dimensions... + ) +end + +function _read_time_series_df( + db::DatabaseSQLite, + collection_id::String, + attribute::Attribute, + id::Int; + read_exact_date::Bool = false, + dimensions..., +) + _validate_time_series_dimensions(collection_id, attribute, dimensions) + + query = string("SELECT ", join(attribute.dimension_names, ",", ", "), ", ", attribute.id) + query *= " FROM $(attribute.table_where_is_located) WHERE id = '$id'" + if !isempty(dimensions) + query *= " AND " + i = 0 + for (dim_name, dim_value) in dimensions + if dim_name == :date_time + if read_exact_date + query *= "DATE($dim_name) = DATE('$(dim_value)')" + else + # Query the nearest date before the provided date + closest_date_query = "SELECT DISTINCT $dim_name FROM $(attribute.table_where_is_located) WHERE DATE($dim_name) <= DATE('$(dim_value)') ORDER BY DATE($dim_name) DESC LIMIT 1" + closest_date = DBInterface.execute(db.sqlite_db, closest_date_query) |> DataFrame + # If there is no date query the data with date 0 (which will probably return no data.) + date_to_equal_in_query = if isempty(closest_date) + DateTime(0) + else + closest_date[!, 1][1] + end + # query the closest date and make it equal to the provided date. + query *= "DATE($dim_name) = DATE('$(date_to_equal_in_query)')" + end + else + query *= "$(dim_name) = '$dim_value'" + end + i += 1 + if i < length(dimensions) + query *= " AND " + end + end + end + + return DBInterface.execute(db.sqlite_db, query) |> DataFrame +end + """ TODO """ diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 04d620f9..5e7f90f7 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -82,9 +82,9 @@ function _validate_timeseries_table(db::SQLite.DB, table::String) @error("Table $table is a timeseries table and does not have an \"id\" column.") num_errors += 1 end - if !("date" in attributes) + if !("date_time" in attributes) @error( - "Table $table is a timeseries table and does not have an \"date\" column.", + "Table $table is a timeseries table and does not have an \"date_time\" column.", ) num_errors += 1 end @@ -272,6 +272,27 @@ function _throw_if_attribute_is_not_vector_relation( return nothing end +function _throw_if_attribute_is_not_time_series( + db::DatabaseSQLite, + collection::String, + attribute::String, + action::Symbol, +) + _throw_if_collection_or_attribute_do_not_exist(db, collection, attribute) + + if !_is_time_series(db, collection, attribute) + correct_composity_type = + _attribute_composite_type(db, collection, attribute) + string_of_composite_types = _string_for_composite_types(correct_composity_type) + correct_method_to_use = _get_correct_method_to_use(correct_composity_type, action) + psr_database_sqlite_error( + "Attribute \"$attribute\" is not a time series. It is a $string_of_composite_types. Use `$correct_method_to_use` instead.", + ) + end + return nothing + +end + function _throw_if_attribute_is_not_time_series_file( db::DatabaseSQLite, collection::String, @@ -326,27 +347,16 @@ function _throw_if_not_vector_attribute( return nothing end -function _throw_if_relation_does_not_exist( - collection_from::String, - collection_to::String, - relation_type::String, -) - if !_scalar_relation_exists(collection_from, collection_to, relation_type) && - !_vector_relation_exists(collection_from, collection_to, relation_type) - psr_database_sqlite_error( - "relation `$relation_type` between $collection_from and $collection_to does not exist. \n" * - "This is the list of relations that exist: " * - "$(_show_existing_relation_types(_list_of_relation_types(collection_from, collection_to)))", - ) - end -end - function _throw_if_not_timeseries_group( db::DatabaseSQLite, collection::String, group::String, ) - @warn("We are not validating that if it is a valid group") + if !_is_timeseries_group(db, collection, group) + psr_database_sqlite_error( + "Group \"$group\" is not a time series group. " + ) + end return nothing end @@ -379,8 +389,8 @@ function _validate_attribute_types!( db::DatabaseSQLite, collection_id::String, label_or_id::Union{Integer, String}, - dict_scalar_attributes, - dict_vector_attributes, + dict_scalar_attributes::AbstractDict, + dict_vector_attributes::AbstractDict, ) for (key, value) in dict_scalar_attributes attribute = _get_attribute(db, collection_id, string(key)) @@ -453,6 +463,21 @@ function _validate_vector_relation_type( end end +function _validate_time_series_dimensions( + collection_id::String, + attribute::Attribute, + dimensions... +) + for dim_name in keys(dimensions...) + if !(string(dim_name) in attribute.dimension_names) + psr_database_sqlite_error( + "The dimension \"$dim_name\" is not defined in the time series attribute \"$(attribute.id)\" of collection \"$collection_id\". " * + "The available dimensions are: $(attribute.dimension_names).", + ) + end + end +end + function _set_default_pragmas!(db::SQLite.DB) _set_foreign_keys_on!(db) return nothing diff --git a/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql b/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql index 7936b997..6fd20e5c 100644 --- a/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql +++ b/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql @@ -17,42 +17,42 @@ CREATE TABLE Resource ( CREATE TABLE Resource_timeseries_group1 ( id INTEGER, - date TEXT NOT NULL, + date_time TEXT NOT NULL, some_vector1 REAL, some_vector2 REAL, FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, - PRIMARY KEY (id, date) + PRIMARY KEY (id, date_time) ) STRICT; CREATE TABLE Resource_timeseries_group2 ( id INTEGER, - date TEXT NOT NULL, + date_time TEXT NOT NULL, block INTEGER NOT NULL, some_vector3 REAL, some_vector4 REAL, FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, - PRIMARY KEY (id, date, block) + PRIMARY KEY (id, date_time, block) ) STRICT; CREATE TABLE Resource_timeseries_group3 ( id INTEGER, - date TEXT NOT NULL, + date_time TEXT NOT NULL, block INTEGER NOT NULL, segment INTEGER NOT NULL, some_vector5 REAL, some_vector6 REAL, FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, - PRIMARY KEY (id, date, block, segment) + PRIMARY KEY (id, date_time, block, segment) ) STRICT; CREATE TABLE Resource_timeseries_group4 ( id INTEGER, - date TEXT NOT NULL, + date_time TEXT NOT NULL, block INTEGER NOT NULL, segment INTEGER NOT NULL, some_other_dimension INTEGER NOT NULL, some_vector7 REAL, some_vector8 REAL, FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, - PRIMARY KEY (id, date, block, segment, some_other_dimension) + PRIMARY KEY (id, date_time, block, segment, some_other_dimension) ) STRICT; \ No newline at end of file From 00d7823c637eac7968900a2f30bbad1586f9187a Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 26 Jun 2024 19:18:16 -0300 Subject: [PATCH 03/34] Add tests --- src/PSRDatabaseSQLite/collection.jl | 2 +- src/PSRDatabaseSQLite/create.jl | 4 +- src/PSRDatabaseSQLite/database_sqlite.jl | 2 +- src/PSRDatabaseSQLite/read.jl | 10 +- src/PSRDatabaseSQLite/validate.jl | 11 +- .../test_create/test_create.jl | 66 ++ test/PSRDatabaseSQLite/test_read/test_read.jl | 811 ++++++++++++------ .../test_read/test_read_time_series.sql | 58 ++ test/runtests.jl | 102 +-- 9 files changed, 742 insertions(+), 324 deletions(-) create mode 100644 test/PSRDatabaseSQLite/test_read/test_read_time_series.sql diff --git a/src/PSRDatabaseSQLite/collection.jl b/src/PSRDatabaseSQLite/collection.jl index 372af693..fbea77d3 100644 --- a/src/PSRDatabaseSQLite/collection.jl +++ b/src/PSRDatabaseSQLite/collection.jl @@ -280,7 +280,7 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) if _sql_type_to_julia_type(id, timeseries_attribute.type) != Int64 psr_database_sqlite_error( "Invalid table \"$(table_name)\" of timeseries attributes of collection \"$(collection_id)\". " * - "The column \"$(timeseries_attribute.name)\" is not an integer primary key but it should." + "The column \"$(timeseries_attribute.name)\" is not an integer primary key but it should.", ) end continue diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 66113e47..7672f434 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -24,7 +24,7 @@ end function _insert_vectors_from_df( db::DatabaseSQLite, df::DataFrame, - table_name::String + table_name::String, ) # Code to insert rows without using a transaction cols = join(string.(names(df)), ", ") @@ -313,7 +313,7 @@ function _validate_attribute_types_on_creation!( collection_id, label_or_id, dict_scalar_attributes, - dict_vector_attributes + dict_vector_attributes, ) return nothing end diff --git a/src/PSRDatabaseSQLite/database_sqlite.jl b/src/PSRDatabaseSQLite/database_sqlite.jl index 26c1abff..c1993988 100644 --- a/src/PSRDatabaseSQLite/database_sqlite.jl +++ b/src/PSRDatabaseSQLite/database_sqlite.jl @@ -274,7 +274,7 @@ end function _attributes_in_timeseries_group( db::DatabaseSQLite, collection_id::String, - group_id::String + group_id::String, ) collection = _get_collection(db, collection_id) attributes_in_timeseries_group = Vector{String}(undef, 0) diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index ce4830fe..3826cad4 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -186,12 +186,12 @@ function read_time_series_df( id = _get_id(db, collection_id, label) return _read_time_series_df( - db, - collection_id, - attribute, + db, + collection_id, + attribute, id; read_exact_date, - dimensions... + dimensions..., ) end @@ -206,7 +206,7 @@ function _read_time_series_df( _validate_time_series_dimensions(collection_id, attribute, dimensions) query = string("SELECT ", join(attribute.dimension_names, ",", ", "), ", ", attribute.id) - query *= " FROM $(attribute.table_where_is_located) WHERE id = '$id'" + query *= " FROM $(attribute.table_where_is_located) WHERE id = '$id'" if !isempty(dimensions) query *= " AND " i = 0 diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 5e7f90f7..75728c12 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -290,7 +290,6 @@ function _throw_if_attribute_is_not_time_series( ) end return nothing - end function _throw_if_attribute_is_not_time_series_file( @@ -354,11 +353,11 @@ function _throw_if_not_timeseries_group( ) if !_is_timeseries_group(db, collection, group) psr_database_sqlite_error( - "Group \"$group\" is not a time series group. " + "Group \"$group\" is not a time series group. ", ) end return nothing -end +end function _throw_if_is_time_series_file( db::DatabaseSQLite, @@ -464,9 +463,9 @@ function _validate_vector_relation_type( end function _validate_time_series_dimensions( - collection_id::String, - attribute::Attribute, - dimensions... + collection_id::String, + attribute::Attribute, + dimensions..., ) for dim_name in keys(dimensions...) if !(string(dim_name) in attribute.dimension_names) diff --git a/test/PSRDatabaseSQLite/test_create/test_create.jl b/test/PSRDatabaseSQLite/test_create/test_create.jl index 2bb8ddc8..77651c22 100644 --- a/test/PSRDatabaseSQLite/test_create/test_create.jl +++ b/test/PSRDatabaseSQLite/test_create/test_create.jl @@ -3,6 +3,7 @@ module TestCreate using PSRClassesInterface.PSRDatabaseSQLite using SQLite using Dates +using DataFrames using Test function test_create_parameters() @@ -265,6 +266,71 @@ function test_create_vectors_with_relations() return nothing end +function test_create_time_series() + path_schema = joinpath(@__DIR__, "test_create_time_series.sql") + db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + for i in 1:3 + df_timeseries_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0] .* i, + some_vector2 = [2.0, 3.0] .* i, + ) + df_timeseries_group2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + block = [1, 2, 1, 2], + some_vector3 = [1.0, missing, 3.0, 4.0] .* i, + ) + df_timeseries_group3 = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_timeseries_group1, + group2 = df_timeseries_group2, + group3 = df_timeseries_group3, + ) + end + + df_timeseries_group5 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0], + some_vector2 = [2.0, 3.0], + ) + + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 4", + group5 = df_timeseries_group5, + ) + + PSRDatabaseSQLite.close!(db) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing +end + function runtests() Base.GC.gc() Base.GC.gc() diff --git a/test/PSRDatabaseSQLite/test_read/test_read.jl b/test/PSRDatabaseSQLite/test_read/test_read.jl index a933bbc5..7945fd2a 100644 --- a/test/PSRDatabaseSQLite/test_read/test_read.jl +++ b/test/PSRDatabaseSQLite/test_read/test_read.jl @@ -3,272 +3,567 @@ module TestRead using PSRClassesInterface.PSRDatabaseSQLite using SQLite using Dates +using DataFrames using Test -function test_read_parameters() - path_schema = joinpath(@__DIR__, "test_read.sql") - db_path = joinpath(@__DIR__, "test_read.sqlite") - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - PSRDatabaseSQLite.create_element!( - db, - "Configuration"; - label = "Toy Case", - date_initial = DateTime(2020, 1, 1), - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource 1", - some_value = [1, 2, 3.0], - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource 2", - some_value = [1, 2, 4.0], - ) - PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") - PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2", value = 10.0) - PSRDatabaseSQLite.create_element!( - db, - "Plant"; - label = "Plant 1", - capacity = 2.02, - some_factor = [1.0], - date_some_date = [DateTime(2020, 1, 1)], - ) - PSRDatabaseSQLite.create_element!( - db, - "Plant"; - label = "Plant 2", - capacity = 53.0, - some_factor = [1.0, 2.0], - date_some_date = [DateTime(2020, 1, 1), DateTime(2020, 1, 2)], - ) - PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) - PSRDatabaseSQLite.create_element!( - db, - "Plant"; - label = "Plant 4", - capacity = 53.0, - some_factor = [1.0, 2.0], - ) - - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "label") == - ["Toy Case"] - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "date_initial") == - [DateTime(2020, 1, 1)] - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == - ["Resource 1", "Resource 2"] - @test PSRDatabaseSQLite.read_scalar_parameter(db, "Resource", "label", "Resource 1") == - "Resource 1" - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value") == [100.0, 10.0] - @test any( - isnan, - PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value_without_default"), - ) - @test PSRDatabaseSQLite.read_scalar_parameters( - db, - "Cost", - "value_without_default"; - default = 2.0, - ) == [2.0, 2.0] - @test PSRDatabaseSQLite.read_scalar_parameter(db, "Plant", "capacity", "Plant 3") == - 54.0 - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameter( - db, - "Plant", - "capacity", - "Plant 5", - ) - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( - db, - "Resource", - "capacity", - ) - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "label") == - ["Plant 1", "Plant 2", "Plant 3", "Plant 4"] - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == - [2.02, 53.0, 54.0, 53.0] - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( - db, - "Resource", - "some_value", - ) - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameters( - db, - "Plant", - "capacity", - ) - @test PSRDatabaseSQLite.read_vector_parameters(db, "Resource", "some_value") == - [[1, 2, 3.0], [1, 2, 4.0]] - @test PSRDatabaseSQLite.read_vector_parameters(db, "Plant", "some_factor") == - [[1.0], [1.0, 2.0], Float64[], [1.0, 2.0]] - @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 1") == - [1.0] - @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 2") == - [1.0, 2.0] - @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 3") == - Float64[] - @test PSRDatabaseSQLite.read_vector_parameter( - db, - "Plant", - "date_some_date", - "Plant 2", - ) == - [DateTime(2020, 1, 1), DateTime(2020, 1, 2)] - @test PSRDatabaseSQLite.read_vector_parameter( - db, - "Plant", - "date_some_date", - "Plant 3", - ) == - DateTime[] - @test PSRDatabaseSQLite.read_vector_parameter( - db, - "Plant", - "date_some_date", - "Plant 4", - ) == - DateTime[typemin(DateTime), typemin(DateTime)] - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameter( - db, - "Plant", - "some_factor", - "Plant 500", - ) - - PSRDatabaseSQLite.update_scalar_parameter!(db, "Plant", "capacity", "Plant 1", 2.0) - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == - [2.0, 53.0, 54.0, 53.0] - PSRDatabaseSQLite.delete_element!(db, "Resource", "Resource 1") - @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == - ["Resource 2"] +# function test_read_parameters() +# path_schema = joinpath(@__DIR__, "test_read.sql") +# db_path = joinpath(@__DIR__, "test_read.sqlite") +# db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) +# PSRDatabaseSQLite.create_element!( +# db, +# "Configuration"; +# label = "Toy Case", +# date_initial = DateTime(2020, 1, 1), +# ) +# PSRDatabaseSQLite.create_element!( +# db, +# "Resource"; +# label = "Resource 1", +# some_value = [1, 2, 3.0], +# ) +# PSRDatabaseSQLite.create_element!( +# db, +# "Resource"; +# label = "Resource 2", +# some_value = [1, 2, 4.0], +# ) +# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") +# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2", value = 10.0) +# PSRDatabaseSQLite.create_element!( +# db, +# "Plant"; +# label = "Plant 1", +# capacity = 2.02, +# some_factor = [1.0], +# date_some_date = [DateTime(2020, 1, 1)], +# ) +# PSRDatabaseSQLite.create_element!( +# db, +# "Plant"; +# label = "Plant 2", +# capacity = 53.0, +# some_factor = [1.0, 2.0], +# date_some_date = [DateTime(2020, 1, 1), DateTime(2020, 1, 2)], +# ) +# PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) +# PSRDatabaseSQLite.create_element!( +# db, +# "Plant"; +# label = "Plant 4", +# capacity = 53.0, +# some_factor = [1.0, 2.0], +# ) - PSRDatabaseSQLite.close!(db) - return rm(db_path) -end +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "label") == +# ["Toy Case"] +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "date_initial") == +# [DateTime(2020, 1, 1)] +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == +# ["Resource 1", "Resource 2"] +# @test PSRDatabaseSQLite.read_scalar_parameter(db, "Resource", "label", "Resource 1") == +# "Resource 1" +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value") == [100.0, 10.0] +# @test any( +# isnan, +# PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value_without_default"), +# ) +# @test PSRDatabaseSQLite.read_scalar_parameters( +# db, +# "Cost", +# "value_without_default"; +# default = 2.0, +# ) == [2.0, 2.0] +# @test PSRDatabaseSQLite.read_scalar_parameter(db, "Plant", "capacity", "Plant 3") == +# 54.0 +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameter( +# db, +# "Plant", +# "capacity", +# "Plant 5", +# ) +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( +# db, +# "Resource", +# "capacity", +# ) +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "label") == +# ["Plant 1", "Plant 2", "Plant 3", "Plant 4"] +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == +# [2.02, 53.0, 54.0, 53.0] +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( +# db, +# "Resource", +# "some_value", +# ) +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameters( +# db, +# "Plant", +# "capacity", +# ) +# @test PSRDatabaseSQLite.read_vector_parameters(db, "Resource", "some_value") == +# [[1, 2, 3.0], [1, 2, 4.0]] +# @test PSRDatabaseSQLite.read_vector_parameters(db, "Plant", "some_factor") == +# [[1.0], [1.0, 2.0], Float64[], [1.0, 2.0]] +# @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 1") == +# [1.0] +# @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 2") == +# [1.0, 2.0] +# @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 3") == +# Float64[] +# @test PSRDatabaseSQLite.read_vector_parameter( +# db, +# "Plant", +# "date_some_date", +# "Plant 2", +# ) == +# [DateTime(2020, 1, 1), DateTime(2020, 1, 2)] +# @test PSRDatabaseSQLite.read_vector_parameter( +# db, +# "Plant", +# "date_some_date", +# "Plant 3", +# ) == +# DateTime[] +# @test PSRDatabaseSQLite.read_vector_parameter( +# db, +# "Plant", +# "date_some_date", +# "Plant 4", +# ) == +# DateTime[typemin(DateTime), typemin(DateTime)] +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameter( +# db, +# "Plant", +# "some_factor", +# "Plant 500", +# ) -function test_read_relations() - path_schema = joinpath(@__DIR__, "test_read.sql") - db_path = joinpath(@__DIR__, "test_read.sqlite") - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource 1", - some_value = [1, 2, 3.0], - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource 2", - some_value = [1, 2, 4.0], - ) - PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") - PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2") - PSRDatabaseSQLite.create_element!( - db, - "Plant"; - label = "Plant 1", - capacity = 2.02, - some_factor = [1.0], - ) - PSRDatabaseSQLite.create_element!( - db, - "Plant"; - label = "Plant 2", - capacity = 53.0, - some_factor = [1.0, 2.0], - ) - PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) - - PSRDatabaseSQLite.set_scalar_relation!( - db, - "Plant", - "Resource", - "Plant 1", - "Resource 1", - "id", - ) - PSRDatabaseSQLite.set_scalar_relation!( - db, - "Plant", - "Plant", - "Plant 3", - "Plant 2", - "turbine_to", - ) - PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 1"], "id") - PSRDatabaseSQLite.set_vector_relation!( - db, - "Plant", - "Cost", - "Plant 2", - ["Cost 1", "Cost 2"], - "id", - ) - - @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Resource", "id") == - ["Resource 1", "", ""] - @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Plant", "turbine_to") == - ["", "", "Plant 2"] - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_relations( - db, - "Plant", - "Cost", - "id", - ) - @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == - [["Cost 1"], ["Cost 1", "Cost 2"], String[]] - PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 2"], "id") - @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == - [["Cost 2"], ["Cost 1", "Cost 2"], String[]] - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_relations( - db, - "Plant", - "Resource", - "id", - ) - @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 1", "id") == - ["Cost 2"] - @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 2", "id") == - ["Cost 1", "Cost 2"] +# PSRDatabaseSQLite.update_scalar_parameter!(db, "Plant", "capacity", "Plant 1", 2.0) +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == +# [2.0, 53.0, 54.0, 53.0] +# PSRDatabaseSQLite.delete_element!(db, "Resource", "Resource 1") +# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == +# ["Resource 2"] - PSRDatabaseSQLite.close!(db) - return rm(db_path) -end +# PSRDatabaseSQLite.close!(db) +# return rm(db_path) +# end + +# function test_read_relations() +# path_schema = joinpath(@__DIR__, "test_read.sql") +# db_path = joinpath(@__DIR__, "test_read.sqlite") +# db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) +# PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") +# PSRDatabaseSQLite.create_element!( +# db, +# "Resource"; +# label = "Resource 1", +# some_value = [1, 2, 3.0], +# ) +# PSRDatabaseSQLite.create_element!( +# db, +# "Resource"; +# label = "Resource 2", +# some_value = [1, 2, 4.0], +# ) +# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") +# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2") +# PSRDatabaseSQLite.create_element!( +# db, +# "Plant"; +# label = "Plant 1", +# capacity = 2.02, +# some_factor = [1.0], +# ) +# PSRDatabaseSQLite.create_element!( +# db, +# "Plant"; +# label = "Plant 2", +# capacity = 53.0, +# some_factor = [1.0, 2.0], +# ) +# PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) -function test_read_time_series_files() - path_schema = joinpath(@__DIR__, "test_read.sql") - db_path = joinpath(@__DIR__, "test_read.sqlite") +# PSRDatabaseSQLite.set_scalar_relation!( +# db, +# "Plant", +# "Resource", +# "Plant 1", +# "Resource 1", +# "id", +# ) +# PSRDatabaseSQLite.set_scalar_relation!( +# db, +# "Plant", +# "Plant", +# "Plant 3", +# "Plant 2", +# "turbine_to", +# ) +# PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 1"], "id") +# PSRDatabaseSQLite.set_vector_relation!( +# db, +# "Plant", +# "Cost", +# "Plant 2", +# ["Cost 1", "Cost 2"], +# "id", +# ) + +# @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Resource", "id") == +# ["Resource 1", "", ""] +# @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Plant", "turbine_to") == +# ["", "", "Plant 2"] +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_relations( +# db, +# "Plant", +# "Cost", +# "id", +# ) +# @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == +# [["Cost 1"], ["Cost 1", "Cost 2"], String[]] +# PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 2"], "id") +# @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == +# [["Cost 2"], ["Cost 1", "Cost 2"], String[]] +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_relations( +# db, +# "Plant", +# "Resource", +# "id", +# ) +# @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 1", "id") == +# ["Cost 2"] +# @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 2", "id") == +# ["Cost 1", "Cost 2"] + +# PSRDatabaseSQLite.close!(db) +# return rm(db_path) +# end + +# function test_read_time_series_files() +# path_schema = joinpath(@__DIR__, "test_read.sql") +# db_path = joinpath(@__DIR__, "test_read.sqlite") +# db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) +# PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") +# PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 1") + +# PSRDatabaseSQLite.set_time_series_file!( +# db, +# "Plant"; +# wind_speed = "some_file.txt", +# wind_direction = "some_file2", +# ) +# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == +# "some_file.txt" +# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == +# "some_file2" +# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_time_series_file( +# db, +# "Plant", +# "spill", +# ) +# PSRDatabaseSQLite.set_time_series_file!(db, "Plant"; wind_speed = "some_file3.txt") +# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == +# "some_file3.txt" +# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == +# "some_file2" +# PSRDatabaseSQLite.close!(db) +# return rm(db_path) +# end + +function test_read_timeseries() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") - PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 1") - - PSRDatabaseSQLite.set_time_series_file!( - db, - "Plant"; - wind_speed = "some_file.txt", - wind_direction = "some_file2", - ) - @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == - "some_file.txt" - @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == - "some_file2" - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_time_series_file( - db, - "Plant", - "spill", - ) - PSRDatabaseSQLite.set_time_series_file!(db, "Plant"; wind_speed = "some_file3.txt") - @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == - "some_file3.txt" - @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == - "some_file2" + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + for i in 1:3 + df_timeseries_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0] .* i, + some_vector2 = [2.0, 3.0] .* i, + ) + df_timeseries_group2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + block = [1, 2, 1, 2], + some_vector3 = [1.0, missing, 3.0, 4.0] .* i, + ) + df_timeseries_group3 = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_timeseries_group1, + group2 = df_timeseries_group2, + group3 = df_timeseries_group3, + ) + end + + for i in 1:3 + df_timeseries_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0] .* i, + some_vector2 = [2.0, 3.0] .* i, + ) + df_timeseries_group2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + block = [1, 2, 1, 2], + some_vector3 = [1.0, missing, 3.0, 4.0] .* i, + ) + df_timeseries_group3 = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + ) + + for row in eachrow(df_timeseries_group1) + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource $i"; + row.date_time, + ) + @test df.date_time == string.([row.date_time]) + @test df.some_vector1 == [row.some_vector1] + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector2", + "Resource $i"; + row.date_time, + ) + @test df.date_time == string.([row.date_time]) + @test df.some_vector2 == [row.some_vector2] + end + + for row in eachrow(df_timeseries_group2) + + # single element query + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector3", + "Resource $i"; + row.date_time, + block = row.block, + ) + if ismissing(row.some_vector3) + @test ismissing(df.some_vector3[1]) + else + @test df.some_vector3 == [row.some_vector3] + end + @test df.block == [row.block] + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector4", + "Resource $i"; + row.date_time, + block = row.block, + ) + @test ismissing(df.some_vector4[1]) + + # two-element query + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector3", + "Resource $i"; + row.date_time, + ) + df_to_compare = df_timeseries_group2[ + (df_timeseries_group2.date_time.==row.date_time), :] + @test size(df, 1) == size(df_to_compare, 1) + for df_i in 1:size(df, 1) + if ismissing(df_to_compare.some_vector3[df_i]) + @test ismissing(df.some_vector3[df_i]) + else + @test df.some_vector3[df_i] == df_to_compare.some_vector3[df_i] + end + @test df.block[df_i] == df_to_compare.block[df_i] + end + + # all elements query + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector3", + "Resource $i"; + ) + for df_i in 1:size(df, 1) + if ismissing(df_timeseries_group2.some_vector3[df_i]) + @test ismissing(df.some_vector3[df_i]) + else + @test df.some_vector3[df_i] == df_timeseries_group2.some_vector3[df_i] + end + @test df.block[df_i] == df_timeseries_group2.block[df_i] + @test df.date_time[df_i] == string.(df_timeseries_group2.date_time[df_i]) + end + end + + for row in eachrow(df_timeseries_group3) + + # single element query + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource $i"; + row.date_time, + block = row.block, + segment = row.segment, + ) + @test df.date_time == string.([row.date_time]) + @test df.block == [row.block] + @test df.segment == [row.segment] + @test df.some_vector5 == [row.some_vector5] + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector6", + "Resource $i"; + row.date_time, + block = row.block, + segment = row.segment, + ) + @test df.date_time == string.([row.date_time]) + @test df.block == [row.block] + @test df.segment == [row.segment] + @test df.some_vector6 == [row.some_vector6] + + # two-element query + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource $i"; + row.date_time, + block = row.block, + ) + df_to_compare = df_timeseries_group3[ + (df_timeseries_group3.date_time.==row.date_time).&(df_timeseries_group3.block.==row.block), :] + @test size(df, 1) == size(df_to_compare, 1) + for df_i in 1:size(df, 1) + @test df.some_vector5[df_i] == df_to_compare.some_vector5[df_i] + @test df.block[df_i] == df_to_compare.block[df_i] + end + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource $i"; + row.date_time, + segment = row.segment, + ) + + df_to_compare = df_timeseries_group3[ + (df_timeseries_group3.date_time.==row.date_time).&(df_timeseries_group3.segment.==row.segment), :] + @test size(df, 1) == size(df_to_compare, 1) + for df_i in 1:size(df, 1) + @test df.some_vector5[df_i] == df_to_compare.some_vector5[df_i] + @test df.block[df_i] == df_to_compare.block[df_i] + end + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector6", + "Resource $i"; + row.date_time, + block = row.block, + segment = row.segment, + ) + + df_to_compare = df_timeseries_group3[ + (df_timeseries_group3.date_time.==row.date_time).&(df_timeseries_group3.block.==row.block).&(df_timeseries_group3.segment.==row.segment), + :] + @test size(df, 1) == size(df_to_compare, 1) + for df_i in 1:size(df, 1) + @test df.some_vector6[df_i] == df_to_compare.some_vector6[df_i] + @test df.block[df_i] == df_to_compare.block[df_i] + @test df.segment[df_i] == df_to_compare.segment[df_i] + @test df.date_time[df_i] == string.(df_to_compare.date_time[df_i]) + end + + # three-element query + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource $i"; + row.date_time, + ) + df_to_compare = df_timeseries_group3[ + (df_timeseries_group3.date_time.==row.date_time), :] + @test size(df, 1) == size(df_to_compare, 1) + for df_i in 1:size(df, 1) + @test df.some_vector5[df_i] == df_to_compare.some_vector5[df_i] + @test df.block[df_i] == df_to_compare.block[df_i] + @test df.segment[df_i] == df_to_compare.segment[df_i] + end + + # all elements query + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource $i"; + ) + for df_i in 1:size(df, 1) + @test df.some_vector5[df_i] == df_timeseries_group3.some_vector5[df_i] + @test df.block[df_i] == df_timeseries_group3.block[df_i] + @test df.segment[df_i] == df_timeseries_group3.segment[df_i] + @test df.date_time[df_i] == string.(df_timeseries_group3.date_time[df_i]) + end + end + end + PSRDatabaseSQLite.close!(db) - return rm(db_path) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing end function runtests() diff --git a/test/PSRDatabaseSQLite/test_read/test_read_time_series.sql b/test/PSRDatabaseSQLite/test_read/test_read_time_series.sql new file mode 100644 index 00000000..6fd20e5c --- /dev/null +++ b/test/PSRDatabaseSQLite/test_read/test_read_time_series.sql @@ -0,0 +1,58 @@ +PRAGMA user_version = 1; +PRAGMA foreign_keys = ON; + +CREATE TABLE Configuration ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + value1 REAL NOT NULL DEFAULT 100, + enum1 TEXT NOT NULL DEFAULT 'A' CHECK(enum1 IN ('A', 'B', 'C')) +) STRICT; + + +CREATE TABLE Resource ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + type TEXT NOT NULL DEFAULT "D" +) STRICT; + +CREATE TABLE Resource_timeseries_group1 ( + id INTEGER, + date_time TEXT NOT NULL, + some_vector1 REAL, + some_vector2 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time) +) STRICT; + +CREATE TABLE Resource_timeseries_group2 ( + id INTEGER, + date_time TEXT NOT NULL, + block INTEGER NOT NULL, + some_vector3 REAL, + some_vector4 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time, block) +) STRICT; + +CREATE TABLE Resource_timeseries_group3 ( + id INTEGER, + date_time TEXT NOT NULL, + block INTEGER NOT NULL, + segment INTEGER NOT NULL, + some_vector5 REAL, + some_vector6 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time, block, segment) +) STRICT; + +CREATE TABLE Resource_timeseries_group4 ( + id INTEGER, + date_time TEXT NOT NULL, + block INTEGER NOT NULL, + segment INTEGER NOT NULL, + some_other_dimension INTEGER NOT NULL, + some_vector7 REAL, + some_vector8 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time, block, segment, some_other_dimension) +) STRICT; \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 61838460..6348e4ae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,58 +6,58 @@ import DataFrames using Test const PSRI = PSRClassesInterface -@testset "File Loop" begin - @time include("loop_file.jl") -end +# @testset "File Loop" begin +# @time include("loop_file.jl") +# end @testset "PSRClassesInterface" begin - @testset "PMD Parser" begin - @time include("pmd_parser.jl") - end - @testset "Read json parameters" begin - @time include("read_json_parameters.jl") - end - @testset "Read json durations" begin - @time include("duration.jl") - end - @testset "OpenBinary file format" begin - @testset "Read and write with monthly data" begin - @time include("OpenBinary/read_and_write_blocks.jl") - end - @testset "Read and write with hourly data" begin - @time include("OpenBinary/read_and_write_hourly.jl") - end - @testset "Read hourly data from psrclasses c++" begin - @time include("OpenBinary/read_hourly.jl") - end - @testset "Read data with Nonpositive Indices" begin - @time include("OpenBinary/nonpositive_indices.jl") - end - @testset "Write file partially" begin - @time include("OpenBinary/incomplete_file.jl") - end - end - @testset "ReaderMapper" begin - @time include("reader_mapper.jl") - end - @testset "TS Utils" begin - @time include("time_series_utils.jl") - end - @testset "Modification API" begin - @time include("modification_api.jl") - @time include("custom_study.jl") - end - @testset "Model Template" begin - @time include("model_template.jl") - end - @testset "Relations" begin - @time include("relations.jl") - end - @testset "Graf Files" begin - @time include("graf_files.jl") - end - @testset "Utils" begin - @time include("utils.jl") - end + # @testset "PMD Parser" begin + # @time include("pmd_parser.jl") + # end + # @testset "Read json parameters" begin + # @time include("read_json_parameters.jl") + # end + # @testset "Read json durations" begin + # @time include("duration.jl") + # end + # @testset "OpenBinary file format" begin + # @testset "Read and write with monthly data" begin + # @time include("OpenBinary/read_and_write_blocks.jl") + # end + # @testset "Read and write with hourly data" begin + # @time include("OpenBinary/read_and_write_hourly.jl") + # end + # @testset "Read hourly data from psrclasses c++" begin + # @time include("OpenBinary/read_hourly.jl") + # end + # @testset "Read data with Nonpositive Indices" begin + # @time include("OpenBinary/nonpositive_indices.jl") + # end + # @testset "Write file partially" begin + # @time include("OpenBinary/incomplete_file.jl") + # end + # end + # @testset "ReaderMapper" begin + # @time include("reader_mapper.jl") + # end + # @testset "TS Utils" begin + # @time include("time_series_utils.jl") + # end + # @testset "Modification API" begin + # @time include("modification_api.jl") + # @time include("custom_study.jl") + # end + # @testset "Model Template" begin + # @time include("model_template.jl") + # end + # @testset "Relations" begin + # @time include("relations.jl") + # end + # @testset "Graf Files" begin + # @time include("graf_files.jl") + # end + # @testset "Utils" begin + # @time include("utils.jl") + # end @testset "PSRDatabaseSQLite" begin include("PSRDatabaseSQLite/runtests.jl") end From 6044361ac4db9f95a2bf3036abebc9ded04ca4ec Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 26 Jun 2024 19:19:01 -0300 Subject: [PATCH 04/34] Uncomment test --- test/PSRDatabaseSQLite/test_read/test_read.jl | 530 +++++++++--------- 1 file changed, 265 insertions(+), 265 deletions(-) diff --git a/test/PSRDatabaseSQLite/test_read/test_read.jl b/test/PSRDatabaseSQLite/test_read/test_read.jl index 7945fd2a..adf7a8d9 100644 --- a/test/PSRDatabaseSQLite/test_read/test_read.jl +++ b/test/PSRDatabaseSQLite/test_read/test_read.jl @@ -6,271 +6,271 @@ using Dates using DataFrames using Test -# function test_read_parameters() -# path_schema = joinpath(@__DIR__, "test_read.sql") -# db_path = joinpath(@__DIR__, "test_read.sqlite") -# db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) -# PSRDatabaseSQLite.create_element!( -# db, -# "Configuration"; -# label = "Toy Case", -# date_initial = DateTime(2020, 1, 1), -# ) -# PSRDatabaseSQLite.create_element!( -# db, -# "Resource"; -# label = "Resource 1", -# some_value = [1, 2, 3.0], -# ) -# PSRDatabaseSQLite.create_element!( -# db, -# "Resource"; -# label = "Resource 2", -# some_value = [1, 2, 4.0], -# ) -# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") -# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2", value = 10.0) -# PSRDatabaseSQLite.create_element!( -# db, -# "Plant"; -# label = "Plant 1", -# capacity = 2.02, -# some_factor = [1.0], -# date_some_date = [DateTime(2020, 1, 1)], -# ) -# PSRDatabaseSQLite.create_element!( -# db, -# "Plant"; -# label = "Plant 2", -# capacity = 53.0, -# some_factor = [1.0, 2.0], -# date_some_date = [DateTime(2020, 1, 1), DateTime(2020, 1, 2)], -# ) -# PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) -# PSRDatabaseSQLite.create_element!( -# db, -# "Plant"; -# label = "Plant 4", -# capacity = 53.0, -# some_factor = [1.0, 2.0], -# ) - -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "label") == -# ["Toy Case"] -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "date_initial") == -# [DateTime(2020, 1, 1)] -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == -# ["Resource 1", "Resource 2"] -# @test PSRDatabaseSQLite.read_scalar_parameter(db, "Resource", "label", "Resource 1") == -# "Resource 1" -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value") == [100.0, 10.0] -# @test any( -# isnan, -# PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value_without_default"), -# ) -# @test PSRDatabaseSQLite.read_scalar_parameters( -# db, -# "Cost", -# "value_without_default"; -# default = 2.0, -# ) == [2.0, 2.0] -# @test PSRDatabaseSQLite.read_scalar_parameter(db, "Plant", "capacity", "Plant 3") == -# 54.0 -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameter( -# db, -# "Plant", -# "capacity", -# "Plant 5", -# ) -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( -# db, -# "Resource", -# "capacity", -# ) -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "label") == -# ["Plant 1", "Plant 2", "Plant 3", "Plant 4"] -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == -# [2.02, 53.0, 54.0, 53.0] -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( -# db, -# "Resource", -# "some_value", -# ) -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameters( -# db, -# "Plant", -# "capacity", -# ) -# @test PSRDatabaseSQLite.read_vector_parameters(db, "Resource", "some_value") == -# [[1, 2, 3.0], [1, 2, 4.0]] -# @test PSRDatabaseSQLite.read_vector_parameters(db, "Plant", "some_factor") == -# [[1.0], [1.0, 2.0], Float64[], [1.0, 2.0]] -# @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 1") == -# [1.0] -# @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 2") == -# [1.0, 2.0] -# @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 3") == -# Float64[] -# @test PSRDatabaseSQLite.read_vector_parameter( -# db, -# "Plant", -# "date_some_date", -# "Plant 2", -# ) == -# [DateTime(2020, 1, 1), DateTime(2020, 1, 2)] -# @test PSRDatabaseSQLite.read_vector_parameter( -# db, -# "Plant", -# "date_some_date", -# "Plant 3", -# ) == -# DateTime[] -# @test PSRDatabaseSQLite.read_vector_parameter( -# db, -# "Plant", -# "date_some_date", -# "Plant 4", -# ) == -# DateTime[typemin(DateTime), typemin(DateTime)] -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameter( -# db, -# "Plant", -# "some_factor", -# "Plant 500", -# ) - -# PSRDatabaseSQLite.update_scalar_parameter!(db, "Plant", "capacity", "Plant 1", 2.0) -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == -# [2.0, 53.0, 54.0, 53.0] -# PSRDatabaseSQLite.delete_element!(db, "Resource", "Resource 1") -# @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == -# ["Resource 2"] - -# PSRDatabaseSQLite.close!(db) -# return rm(db_path) -# end - -# function test_read_relations() -# path_schema = joinpath(@__DIR__, "test_read.sql") -# db_path = joinpath(@__DIR__, "test_read.sqlite") -# db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) -# PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") -# PSRDatabaseSQLite.create_element!( -# db, -# "Resource"; -# label = "Resource 1", -# some_value = [1, 2, 3.0], -# ) -# PSRDatabaseSQLite.create_element!( -# db, -# "Resource"; -# label = "Resource 2", -# some_value = [1, 2, 4.0], -# ) -# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") -# PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2") -# PSRDatabaseSQLite.create_element!( -# db, -# "Plant"; -# label = "Plant 1", -# capacity = 2.02, -# some_factor = [1.0], -# ) -# PSRDatabaseSQLite.create_element!( -# db, -# "Plant"; -# label = "Plant 2", -# capacity = 53.0, -# some_factor = [1.0, 2.0], -# ) -# PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) - -# PSRDatabaseSQLite.set_scalar_relation!( -# db, -# "Plant", -# "Resource", -# "Plant 1", -# "Resource 1", -# "id", -# ) -# PSRDatabaseSQLite.set_scalar_relation!( -# db, -# "Plant", -# "Plant", -# "Plant 3", -# "Plant 2", -# "turbine_to", -# ) -# PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 1"], "id") -# PSRDatabaseSQLite.set_vector_relation!( -# db, -# "Plant", -# "Cost", -# "Plant 2", -# ["Cost 1", "Cost 2"], -# "id", -# ) - -# @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Resource", "id") == -# ["Resource 1", "", ""] -# @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Plant", "turbine_to") == -# ["", "", "Plant 2"] -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_relations( -# db, -# "Plant", -# "Cost", -# "id", -# ) -# @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == -# [["Cost 1"], ["Cost 1", "Cost 2"], String[]] -# PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 2"], "id") -# @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == -# [["Cost 2"], ["Cost 1", "Cost 2"], String[]] -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_relations( -# db, -# "Plant", -# "Resource", -# "id", -# ) -# @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 1", "id") == -# ["Cost 2"] -# @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 2", "id") == -# ["Cost 1", "Cost 2"] - -# PSRDatabaseSQLite.close!(db) -# return rm(db_path) -# end - -# function test_read_time_series_files() -# path_schema = joinpath(@__DIR__, "test_read.sql") -# db_path = joinpath(@__DIR__, "test_read.sqlite") -# db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) -# PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") -# PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 1") - -# PSRDatabaseSQLite.set_time_series_file!( -# db, -# "Plant"; -# wind_speed = "some_file.txt", -# wind_direction = "some_file2", -# ) -# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == -# "some_file.txt" -# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == -# "some_file2" -# @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_time_series_file( -# db, -# "Plant", -# "spill", -# ) -# PSRDatabaseSQLite.set_time_series_file!(db, "Plant"; wind_speed = "some_file3.txt") -# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == -# "some_file3.txt" -# @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == -# "some_file2" -# PSRDatabaseSQLite.close!(db) -# return rm(db_path) -# end +function test_read_parameters() + path_schema = joinpath(@__DIR__, "test_read.sql") + db_path = joinpath(@__DIR__, "test_read.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!( + db, + "Configuration"; + label = "Toy Case", + date_initial = DateTime(2020, 1, 1), + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + some_value = [1, 2, 3.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 2", + some_value = [1, 2, 4.0], + ) + PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") + PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2", value = 10.0) + PSRDatabaseSQLite.create_element!( + db, + "Plant"; + label = "Plant 1", + capacity = 2.02, + some_factor = [1.0], + date_some_date = [DateTime(2020, 1, 1)], + ) + PSRDatabaseSQLite.create_element!( + db, + "Plant"; + label = "Plant 2", + capacity = 53.0, + some_factor = [1.0, 2.0], + date_some_date = [DateTime(2020, 1, 1), DateTime(2020, 1, 2)], + ) + PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) + PSRDatabaseSQLite.create_element!( + db, + "Plant"; + label = "Plant 4", + capacity = 53.0, + some_factor = [1.0, 2.0], + ) + + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "label") == + ["Toy Case"] + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Configuration", "date_initial") == + [DateTime(2020, 1, 1)] + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == + ["Resource 1", "Resource 2"] + @test PSRDatabaseSQLite.read_scalar_parameter(db, "Resource", "label", "Resource 1") == + "Resource 1" + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value") == [100.0, 10.0] + @test any( + isnan, + PSRDatabaseSQLite.read_scalar_parameters(db, "Cost", "value_without_default"), + ) + @test PSRDatabaseSQLite.read_scalar_parameters( + db, + "Cost", + "value_without_default"; + default = 2.0, + ) == [2.0, 2.0] + @test PSRDatabaseSQLite.read_scalar_parameter(db, "Plant", "capacity", "Plant 3") == + 54.0 + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameter( + db, + "Plant", + "capacity", + "Plant 5", + ) + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( + db, + "Resource", + "capacity", + ) + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "label") == + ["Plant 1", "Plant 2", "Plant 3", "Plant 4"] + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == + [2.02, 53.0, 54.0, 53.0] + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_parameters( + db, + "Resource", + "some_value", + ) + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameters( + db, + "Plant", + "capacity", + ) + @test PSRDatabaseSQLite.read_vector_parameters(db, "Resource", "some_value") == + [[1, 2, 3.0], [1, 2, 4.0]] + @test PSRDatabaseSQLite.read_vector_parameters(db, "Plant", "some_factor") == + [[1.0], [1.0, 2.0], Float64[], [1.0, 2.0]] + @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 1") == + [1.0] + @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 2") == + [1.0, 2.0] + @test PSRDatabaseSQLite.read_vector_parameter(db, "Plant", "some_factor", "Plant 3") == + Float64[] + @test PSRDatabaseSQLite.read_vector_parameter( + db, + "Plant", + "date_some_date", + "Plant 2", + ) == + [DateTime(2020, 1, 1), DateTime(2020, 1, 2)] + @test PSRDatabaseSQLite.read_vector_parameter( + db, + "Plant", + "date_some_date", + "Plant 3", + ) == + DateTime[] + @test PSRDatabaseSQLite.read_vector_parameter( + db, + "Plant", + "date_some_date", + "Plant 4", + ) == + DateTime[typemin(DateTime), typemin(DateTime)] + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_parameter( + db, + "Plant", + "some_factor", + "Plant 500", + ) + + PSRDatabaseSQLite.update_scalar_parameter!(db, "Plant", "capacity", "Plant 1", 2.0) + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Plant", "capacity") == + [2.0, 53.0, 54.0, 53.0] + PSRDatabaseSQLite.delete_element!(db, "Resource", "Resource 1") + @test PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") == + ["Resource 2"] + + PSRDatabaseSQLite.close!(db) + return rm(db_path) +end + +function test_read_relations() + path_schema = joinpath(@__DIR__, "test_read.sql") + db_path = joinpath(@__DIR__, "test_read.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + some_value = [1, 2, 3.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 2", + some_value = [1, 2, 4.0], + ) + PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 1") + PSRDatabaseSQLite.create_element!(db, "Cost"; label = "Cost 2") + PSRDatabaseSQLite.create_element!( + db, + "Plant"; + label = "Plant 1", + capacity = 2.02, + some_factor = [1.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Plant"; + label = "Plant 2", + capacity = 53.0, + some_factor = [1.0, 2.0], + ) + PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 3", capacity = 54.0) + + PSRDatabaseSQLite.set_scalar_relation!( + db, + "Plant", + "Resource", + "Plant 1", + "Resource 1", + "id", + ) + PSRDatabaseSQLite.set_scalar_relation!( + db, + "Plant", + "Plant", + "Plant 3", + "Plant 2", + "turbine_to", + ) + PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 1"], "id") + PSRDatabaseSQLite.set_vector_relation!( + db, + "Plant", + "Cost", + "Plant 2", + ["Cost 1", "Cost 2"], + "id", + ) + + @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Resource", "id") == + ["Resource 1", "", ""] + @test PSRDatabaseSQLite.read_scalar_relations(db, "Plant", "Plant", "turbine_to") == + ["", "", "Plant 2"] + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_scalar_relations( + db, + "Plant", + "Cost", + "id", + ) + @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == + [["Cost 1"], ["Cost 1", "Cost 2"], String[]] + PSRDatabaseSQLite.set_vector_relation!(db, "Plant", "Cost", "Plant 1", ["Cost 2"], "id") + @test PSRDatabaseSQLite.read_vector_relations(db, "Plant", "Cost", "id") == + [["Cost 2"], ["Cost 1", "Cost 2"], String[]] + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_vector_relations( + db, + "Plant", + "Resource", + "id", + ) + @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 1", "id") == + ["Cost 2"] + @test PSRDatabaseSQLite.read_vector_relation(db, "Plant", "Cost", "Plant 2", "id") == + ["Cost 1", "Cost 2"] + + PSRDatabaseSQLite.close!(db) + return rm(db_path) +end + +function test_read_time_series_files() + path_schema = joinpath(@__DIR__, "test_read.sql") + db_path = joinpath(@__DIR__, "test_read.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case") + PSRDatabaseSQLite.create_element!(db, "Plant"; label = "Plant 1") + + PSRDatabaseSQLite.set_time_series_file!( + db, + "Plant"; + wind_speed = "some_file.txt", + wind_direction = "some_file2", + ) + @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == + "some_file.txt" + @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == + "some_file2" + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.read_time_series_file( + db, + "Plant", + "spill", + ) + PSRDatabaseSQLite.set_time_series_file!(db, "Plant"; wind_speed = "some_file3.txt") + @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_speed") == + "some_file3.txt" + @test PSRDatabaseSQLite.read_time_series_file(db, "Plant", "wind_direction") == + "some_file2" + PSRDatabaseSQLite.close!(db) + return rm(db_path) +end function test_read_timeseries() path_schema = joinpath(@__DIR__, "test_read_time_series.sql") From 5f4c05b8c96b4599d8cc61878b025645f5d4b581 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 26 Jun 2024 19:27:49 -0300 Subject: [PATCH 05/34] Uncomment more tests --- test/runtests.jl | 102 +++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 6348e4ae..61838460 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,58 +6,58 @@ import DataFrames using Test const PSRI = PSRClassesInterface -# @testset "File Loop" begin -# @time include("loop_file.jl") -# end +@testset "File Loop" begin + @time include("loop_file.jl") +end @testset "PSRClassesInterface" begin - # @testset "PMD Parser" begin - # @time include("pmd_parser.jl") - # end - # @testset "Read json parameters" begin - # @time include("read_json_parameters.jl") - # end - # @testset "Read json durations" begin - # @time include("duration.jl") - # end - # @testset "OpenBinary file format" begin - # @testset "Read and write with monthly data" begin - # @time include("OpenBinary/read_and_write_blocks.jl") - # end - # @testset "Read and write with hourly data" begin - # @time include("OpenBinary/read_and_write_hourly.jl") - # end - # @testset "Read hourly data from psrclasses c++" begin - # @time include("OpenBinary/read_hourly.jl") - # end - # @testset "Read data with Nonpositive Indices" begin - # @time include("OpenBinary/nonpositive_indices.jl") - # end - # @testset "Write file partially" begin - # @time include("OpenBinary/incomplete_file.jl") - # end - # end - # @testset "ReaderMapper" begin - # @time include("reader_mapper.jl") - # end - # @testset "TS Utils" begin - # @time include("time_series_utils.jl") - # end - # @testset "Modification API" begin - # @time include("modification_api.jl") - # @time include("custom_study.jl") - # end - # @testset "Model Template" begin - # @time include("model_template.jl") - # end - # @testset "Relations" begin - # @time include("relations.jl") - # end - # @testset "Graf Files" begin - # @time include("graf_files.jl") - # end - # @testset "Utils" begin - # @time include("utils.jl") - # end + @testset "PMD Parser" begin + @time include("pmd_parser.jl") + end + @testset "Read json parameters" begin + @time include("read_json_parameters.jl") + end + @testset "Read json durations" begin + @time include("duration.jl") + end + @testset "OpenBinary file format" begin + @testset "Read and write with monthly data" begin + @time include("OpenBinary/read_and_write_blocks.jl") + end + @testset "Read and write with hourly data" begin + @time include("OpenBinary/read_and_write_hourly.jl") + end + @testset "Read hourly data from psrclasses c++" begin + @time include("OpenBinary/read_hourly.jl") + end + @testset "Read data with Nonpositive Indices" begin + @time include("OpenBinary/nonpositive_indices.jl") + end + @testset "Write file partially" begin + @time include("OpenBinary/incomplete_file.jl") + end + end + @testset "ReaderMapper" begin + @time include("reader_mapper.jl") + end + @testset "TS Utils" begin + @time include("time_series_utils.jl") + end + @testset "Modification API" begin + @time include("modification_api.jl") + @time include("custom_study.jl") + end + @testset "Model Template" begin + @time include("model_template.jl") + end + @testset "Relations" begin + @time include("relations.jl") + end + @testset "Graf Files" begin + @time include("graf_files.jl") + end + @testset "Utils" begin + @time include("utils.jl") + end @testset "PSRDatabaseSQLite" begin include("PSRDatabaseSQLite/runtests.jl") end From ac4ce804aa808fc193cda40317ba3c2e3eb6c548 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Thu, 27 Jun 2024 17:10:57 -0300 Subject: [PATCH 06/34] Add more tests --- src/PSRDatabaseSQLite/read.jl | 14 +- test/PSRDatabaseSQLite/test_read/test_read.jl | 129 +++++++++++++++++- 2 files changed, 138 insertions(+), 5 deletions(-) diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 3826cad4..753fbb6b 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -215,11 +215,17 @@ function _read_time_series_df( if read_exact_date query *= "DATE($dim_name) = DATE('$(dim_value)')" else - # Query the nearest date before the provided date - closest_date_query = "SELECT DISTINCT $dim_name FROM $(attribute.table_where_is_located) WHERE DATE($dim_name) <= DATE('$(dim_value)') ORDER BY DATE($dim_name) DESC LIMIT 1" - closest_date = DBInterface.execute(db.sqlite_db, closest_date_query) |> DataFrame + # First checks if the date or dimension value is within the range of the data. + # Then it queries the closest date before the provided date. # If there is no date query the data with date 0 (which will probably return no data.) - date_to_equal_in_query = if isempty(closest_date) + end_date_query = "SELECT MAX(DATE($dim_name)) FROM $(attribute.table_where_is_located)" + end_date = DBInterface.execute(db.sqlite_db, end_date_query) |> DataFrame + # Query the nearest date before the provided date + closest_date_query_earlier = "SELECT DISTINCT $dim_name FROM $(attribute.table_where_is_located) WHERE DATE($dim_name) <= DATE('$(dim_value)') ORDER BY DATE($dim_name) DESC LIMIT 1" + closest_date = DBInterface.execute(db.sqlite_db, closest_date_query_earlier) |> DataFrame + date_to_equal_in_query = if dim_value > DateTime(end_date[!, 1][1]) + DateTime(0) + elseif isempty(closest_date) DateTime(0) else closest_date[!, 1][1] diff --git a/test/PSRDatabaseSQLite/test_read/test_read.jl b/test/PSRDatabaseSQLite/test_read/test_read.jl index adf7a8d9..a27ab996 100644 --- a/test/PSRDatabaseSQLite/test_read/test_read.jl +++ b/test/PSRDatabaseSQLite/test_read/test_read.jl @@ -272,7 +272,7 @@ function test_read_time_series_files() return rm(db_path) end -function test_read_timeseries() +function test_read_timeseries_single() path_schema = joinpath(@__DIR__, "test_read_time_series.sql") db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) @@ -316,6 +316,47 @@ function test_read_timeseries() ) end + # some errors + + df_empty = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource 1"; + date_time = DateTime(1998), + ) + @test isempty(df_empty) + + df_empty = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource 1"; + date_time = DateTime(2030), + ) + @test isempty(df_empty) + + df_empty = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource 1"; + date_time = DateTime(2030), + block = 20, + ) + @test isempty(df_empty) + + df_wrong_date = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector5", + "Resource 1"; + date_time = DateTime(2003), + ) + @test df_wrong_date.date_time[1] == string(DateTime(2001)) + + # return single dataframe + for i in 1:3 df_timeseries_group1 = DataFrame(; date_time = [DateTime(2000), DateTime(2001)], @@ -566,6 +607,92 @@ function test_read_timeseries() return nothing end +function test_read_timeseries_multiple() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + for i in 1:3 + df_timeseries_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0] .* i, + some_vector2 = [2.0, 3.0] .* i, + ) + df_timeseries_group2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + block = [1, 2, 1, 2], + some_vector3 = [1.0, missing, 3.0, 4.0] .* i, + ) + df_timeseries_group3 = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_timeseries_group1, + group2 = df_timeseries_group2, + group3 = df_timeseries_group3, + ) + end + + # return multiple DataFrames + + dates_df1 = [DateTime(2000), DateTime(2001)] + some_vector1 = [[1.0, 2.0, 3.0] .* i for i in 1:3] + some_vector2 = [[2.0, 3.0, 4.0] .* i for i in 1:3] + + for i in eachindex(dates_df1) + dfs = PSRDatabaseSQLite.read_time_series_dfs( + db, + "Resource", + "some_vector1"; + date_time = dates_df1[i], + ) + + for j in 1:3 + df = dfs[j] + @test df.date_time == string.([dates_df1[i]]) + @test df.some_vector1 == [some_vector1[j][i]] + end + + dfs = PSRDatabaseSQLite.read_time_series_dfs( + db, + "Resource", + "some_vector2"; + date_time = dates_df1[i], + ) + + for j in 1:3 + df = dfs[j] + @test df.date_time == string.([dates_df1[i]]) + @test df.some_vector2 == [some_vector2[j][i]] + end + end + + PSRDatabaseSQLite.close!(db) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing +end + function runtests() Base.GC.gc() Base.GC.gc() From 1f8be1b5089e7c8bcc7025e039d7a959e45bdbe4 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Fri, 28 Jun 2024 01:24:51 -0300 Subject: [PATCH 07/34] update _set_default_pragmas --- src/PSRDatabaseSQLite/database_sqlite.jl | 21 +++++++++++++++++++-- src/PSRDatabaseSQLite/validate.jl | 16 ---------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/PSRDatabaseSQLite/database_sqlite.jl b/src/PSRDatabaseSQLite/database_sqlite.jl index c1993988..243e283a 100644 --- a/src/PSRDatabaseSQLite/database_sqlite.jl +++ b/src/PSRDatabaseSQLite/database_sqlite.jl @@ -3,8 +3,25 @@ mutable struct DatabaseSQLite collections_map::OrderedDict{String, Collection} end -function _set_default_pragmas!(sqlite_db::SQLite.DB) - DBInterface.execute(sqlite_db, "PRAGMA busy_timeout = 5000;") +function _set_default_pragmas!(db::SQLite.DB) + _set_foreign_keys_on!(db) + _set_busy_timeout!(db, 5000) + return nothing +end + +function _set_foreign_keys_on!(db::SQLite.DB) + # https://www.sqlite.org/foreignkeys.html#fk_enable + # Foreign keys are enabled per connection, they are not something + # that can be stored in the database itself like user_version. + # This is needed to ensure that the foreign keys are enabled + # behaviours like cascade delete and update are enabled. + DBInterface.execute(db, "PRAGMA foreign_keys = ON;") + return nothing +end + +function _set_busy_timeout!(db::SQLite.DB, timeout::Int) + # https://www.sqlite.org/pragma.html#pragma_busy_timeout + DBInterface.execute(db, "PRAGMA busy_timeout = $timeout;") return nothing end diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 75728c12..2412708f 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -140,7 +140,6 @@ function _validate_database(db::SQLite.DB) psr_database_sqlite_error("Database does not have a \"Configuration\" table.") end _validate_database_pragmas(db) - _set_default_pragmas!(db) num_errors = 0 for table in tables if table == "sqlite_sequence" @@ -477,21 +476,6 @@ function _validate_time_series_dimensions( end end -function _set_default_pragmas!(db::SQLite.DB) - _set_foreign_keys_on!(db) - return nothing -end - -function _set_foreign_keys_on!(db::SQLite.DB) - # https://www.sqlite.org/foreignkeys.html#fk_enable - # Foreign keys are enabled per connection, they are not something - # that can be stored in the database itself like user_version. - # This is needed to ensure that the foreign keys are enabled - # behaviours like cascade delete and update are enabled. - DBInterface.execute(db, "PRAGMA foreign_keys = ON;") - return nothing -end - function _validate_database_pragmas(db::SQLite.DB) _validate_user_version(db) return nothing From 3d198564ce198822bb0eb2fdd094ede0b1b1b38f Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Mon, 1 Jul 2024 12:48:44 -0300 Subject: [PATCH 08/34] Update timeseries methods --- src/PSRDatabaseSQLite/read.jl | 28 +++++----- src/PSRDatabaseSQLite/time_controller.jl | 68 ++++++++++++++++++++++++ src/PSRDatabaseSQLite/validate.jl | 4 +- 3 files changed, 84 insertions(+), 16 deletions(-) create mode 100644 src/PSRDatabaseSQLite/time_controller.jl diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 753fbb6b..1dba7ee5 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -41,7 +41,7 @@ function read_scalar_parameters( attribute = _get_attribute(db, collection_id, attribute_id) table = _table_where_is_located(attribute) - query = "SELECT $attribute_id FROM $table ORDER BY rowid" + query = "SELECT $attribute_id FROM $table ORDER BY id" df = DBInterface.execute(db.sqlite_db, query) |> DataFrame results = df[!, 1] results = _treat_query_result(results, attribute, default) @@ -264,7 +264,7 @@ function read_scalar_relations( names_in_collection_to = read_scalar_parameters(db, collection_to, "label") num_elements = length(names_in_collection_to) replace_dict = Dict{Any, String}(zip(collect(1:num_elements), names_in_collection_to)) - push!(replace_dict, _opensql_default_value_for_type(Int) => "") + push!(replace_dict, _psrdatabasesqlite_null_value(Int) => "") return replace(map_of_elements, replace_dict...) end @@ -301,7 +301,7 @@ function _get_scalar_relation_map( ) attribute = _get_attribute(db, collection_from, attribute_on_collection_from) - query = "SELECT $(attribute.id) FROM $(attribute.table_where_is_located) ORDER BY rowid" + query = "SELECT $(attribute.id) FROM $(attribute.table_where_is_located)" df = DBInterface.execute(db.sqlite_db, query) |> DataFrame results = df[!, 1] num_results = length(results) @@ -309,7 +309,7 @@ function _get_scalar_relation_map( ids_in_collection_to = read_scalar_parameters(db, collection_to, "id") for i in 1:num_results if ismissing(results[i]) - map_of_indexes[i] = _opensql_default_value_for_type(Int) + map_of_indexes[i] = _psrdatabasesqlite_null_value(Int) else map_of_indexes[i] = findfirst(isequal(results[i]), ids_in_collection_to) end @@ -333,7 +333,7 @@ function read_vector_relations( names_in_collection_to = read_scalar_parameters(db, collection_to, "label") num_elements = length(names_in_collection_to) replace_dict = Dict{Any, String}(zip(collect(1:num_elements), names_in_collection_to)) - push!(replace_dict, _opensql_default_value_for_type(Int) => "") + push!(replace_dict, _psrdatabasesqlite_null_value(Int) => "") map_with_labels = Vector{Vector{String}}(undef, length(map_of_vector_with_indexes)) @@ -400,7 +400,7 @@ function _get_vector_relation_map( if isnothing(index_of_id_collection_to) push!( map_of_vector_with_indexes[index_of_id], - _opensql_default_value_for_type(Int), + _psrdatabasesqlite_null_value(Int), ) else push!(map_of_vector_with_indexes[index_of_id], index_of_id_collection_to) @@ -424,7 +424,7 @@ function read_time_series_file( attribute = _get_attribute(db, collection_id, attribute_id) table = attribute.table_where_is_located - query = "SELECT $(attribute.id) FROM $table ORDER BY rowid" + query = "SELECT $(attribute.id) FROM $table" df = DBInterface.execute(db.sqlite_db, query) |> DataFrame result = df[!, 1] if isempty(result) @@ -447,7 +447,7 @@ function _treat_query_result( ) type_of_attribute = _type(attribute) default = if isnothing(default) - _opensql_default_value_for_type(type_of_attribute) + _psrdatabasesqlite_null_value(type_of_attribute) else default end @@ -461,7 +461,7 @@ function _treat_query_result( ) where {T <: Union{Int64, Float64}} type_of_attribute = _type(attribute) default = if isnothing(default) - _opensql_default_value_for_type(type_of_attribute) + _psrdatabasesqlite_null_value(type_of_attribute) else if isa(default, type_of_attribute) default @@ -486,7 +486,7 @@ function _treat_query_result( ) type_of_attribute = _type(attribute) default = if isnothing(default) - _opensql_default_value_for_type(type_of_attribute) + _psrdatabasesqlite_null_value(type_of_attribute) else if isa(default, type_of_attribute) default @@ -514,10 +514,10 @@ _treat_query_result( ::Union{Nothing, Any}, ) where {T <: Union{Int64, Float64}} = results -_opensql_default_value_for_type(::Type{Float64}) = NaN -_opensql_default_value_for_type(::Type{Int64}) = typemin(Int64) -_opensql_default_value_for_type(::Type{String}) = "" -_opensql_default_value_for_type(::Type{DateTime}) = typemin(DateTime) +_psrdatabasesqlite_null_value(::Type{Float64}) = NaN +_psrdatabasesqlite_null_value(::Type{Int64}) = typemin(Int64) +_psrdatabasesqlite_null_value(::Type{String}) = "" +_psrdatabasesqlite_null_value(::Type{DateTime}) = typemin(DateTime) function _is_null_in_db(value::Float64) return isnan(value) diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl new file mode 100644 index 00000000..b0cc441c --- /dev/null +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -0,0 +1,68 @@ +abstract type TimeSeriesRequestStatus end + +const CollectionAttributeElement = Tuple{String, String, Int} + +struct TimeSeriesDidNotChange <: TimeSeriesRequestStatus end +struct TimeSeriesChanged <: TimeSeriesRequestStatus end + +# TODOs +# We need to write a query function that will return a certain data for all ids +# If an id does not exist it will simply return missing. The query must return +# the closest previous date for each id. + +mutable struct TimeSeriesElementCache + # The last date requested by the user + last_date_requested::DateTime + # The next available date after the last date requested + next_date_possible::DateTime +end + +mutable struct TimeSeriesCache{T, N} + # Tell which dimensions were mapped in a given vector + # This is probably wrong + dimensions_mapped + data::Array{T, N} = fill(_psrdatabasesqlite_null_value(T), zeros(Int, N)...) +end + +""" + TimeController + +TimeController in PSRDatabaseSQLite is a cache that allows PSRDatabaseSQLite to +store information about the last timeseries query. This is useful for avoiding to +re-query the database when the same query is made multiple times. TimeController +is a private behaviour and it only exists when querying all labels from a TimeSeries +element. +""" +Base.@kwdef mutable struct TimeController + # The tuple stores the cache for a given collection id, attribute id and id of the element in a database + element_cache::Dict{CollectionAttributeElement, TimeSeriesElementCache} = Dict{CollectionAttributeElement, TimeSeriesElementCache}() +end + +function closest_previous_date( + db::DatabaseSQLite, + attribute::Attribute, + date_time::DateTime +)::DateTime + closest_previous_date_query = string( + "SELECT DISTINCT date_time FROM", + attribute.table_where_is_located, + "WHERE DATE(date_time) <= DATE('", date_time, "') ORDER BY DATE(date_time) DESC LIMIT 1") + result = DBInterface.execute(db.sqlite_db, closest_previous_date_query) + +end + +function closest_next_date( + db::DatabaseSQLite, + attribute::Attribute, + date_time::DateTime +)::DateTime + closest_date_query_later = "SELECT DISTINCT date_time FROM $(attribute.table_where_is_located) WHERE DATE(date_time) > DATE('$(date_time)') ORDER BY DATE(date_time) ASC LIMIT 1" +end + +function read_mapped_time_series( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, +) + +end \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 2412708f..ac2ac566 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -318,8 +318,8 @@ function _throw_if_not_scalar_attribute( ) _throw_if_collection_or_attribute_do_not_exist(db, collection, attribute) - if _is_vector_parameter(db, collection, attribute) || - _is_vector_relation(db, collection, attribute) + if !_is_scalar_parameter(db, collection, attribute) && + !_is_scalar_relation(db, collection, attribute) psr_database_sqlite_error( "Attribute \"$attribute\" is not a scalar attribute. You must input a vector for this attribute.", ) From 9002524bd3c6a8a473ef34fdfc1795dfa6c03197 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Mon, 1 Jul 2024 14:12:36 -0300 Subject: [PATCH 09/34] Fix --- src/PSRDatabaseSQLite/read.jl | 2 +- test/PSRDatabaseSQLite/test_read/test_read.jl | 36 +++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 753fbb6b..f3996620 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -221,7 +221,7 @@ function _read_time_series_df( end_date_query = "SELECT MAX(DATE($dim_name)) FROM $(attribute.table_where_is_located)" end_date = DBInterface.execute(db.sqlite_db, end_date_query) |> DataFrame # Query the nearest date before the provided date - closest_date_query_earlier = "SELECT DISTINCT $dim_name FROM $(attribute.table_where_is_located) WHERE DATE($dim_name) <= DATE('$(dim_value)') ORDER BY DATE($dim_name) DESC LIMIT 1" + closest_date_query_earlier = "SELECT DISTINCT $dim_name FROM $(attribute.table_where_is_located) WHERE $(attribute.id) IS NOT NULL AND DATE($dim_name) <= DATE('$(dim_value)') ORDER BY DATE($dim_name) DESC LIMIT 1" closest_date = DBInterface.execute(db.sqlite_db, closest_date_query_earlier) |> DataFrame date_to_equal_in_query = if dim_value > DateTime(end_date[!, 1][1]) DateTime(0) diff --git a/test/PSRDatabaseSQLite/test_read/test_read.jl b/test/PSRDatabaseSQLite/test_read/test_read.jl index a27ab996..c68c30a9 100644 --- a/test/PSRDatabaseSQLite/test_read/test_read.jl +++ b/test/PSRDatabaseSQLite/test_read/test_read.jl @@ -434,7 +434,7 @@ function test_read_timeseries_single() row.date_time, block = row.block, ) - @test ismissing(df.some_vector4[1]) + @test isempty(df.some_vector4) # two-element query @@ -602,7 +602,7 @@ function test_read_timeseries_single() PSRDatabaseSQLite.close!(db) GC.gc() GC.gc() - rm(db_path) + # rm(db_path) @test true return nothing end @@ -693,6 +693,38 @@ function test_read_timeseries_multiple() return nothing end +function test_read_wrong_date() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + df = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + some_vector1 = [1.0, 2.0, missing], + some_vector2 = [2.0, 3.0, 4.0], + ) + + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df, + ) + + df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource 1"; + date_time = DateTime(2002), + ) + + @test df.date_time == string.([DateTime(2001)]) + @test df.some_vector1 == [2.0] +end + function runtests() Base.GC.gc() Base.GC.gc() From 69d466d11649993acc7f842fcd38c92f21afab6a Mon Sep 17 00:00:00 2001 From: pedroripper Date: Mon, 1 Jul 2024 15:50:34 -0300 Subject: [PATCH 10/34] Add script --- script copy.jl | 128 ++++++++++++++++++++++++++++++++++++++++++++ time_controller.sql | 27 ++++++++++ 2 files changed, 155 insertions(+) create mode 100644 script copy.jl create mode 100644 time_controller.sql diff --git a/script copy.jl b/script copy.jl new file mode 100644 index 00000000..5f4fe6c3 --- /dev/null +++ b/script copy.jl @@ -0,0 +1,128 @@ +using PSRClassesInterface.PSRDatabaseSQLite +using SQLite +using DataFrames +using Dates +using Test + +abstract type TimeSeriesRequestStatus end + + +const CollectionAttributeElement = Tuple{String, String, Int} + +struct TimeSeriesDidNotChange <: TimeSeriesRequestStatus end +struct TimeSeriesChanged <: TimeSeriesRequestStatus end + +mutable struct TimeSeriesElementCache + # The last date requested by the user + last_date_requested::DateTime + # The next available date after the last date requested + next_date_possible::DateTime +end + +# mutable struct TimeSeriesCache{T, N} +# # Tell which dimensions were mapped in a given vector +# # This is probably wrong +# dimensions_mapped +# data::Array{T, N} = fill(_psrdatabasesqlite_null_value(T), zeros(Int, N)...) +# end + + +# db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") +# GC.gc() +# GC.gc() +# if isfile(db_path) +# rm(db_path) +# end + +function test_create_time_series() + path_schema = raw"C:\Users\pedroripper\Documents\Github\PSRClassesInterface.jl\time_controller.sql" + db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") + db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end + + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do + for i in 1:500 + df_timeseries_group1 = DataFrame( + date_time = vcat([DateTime(0)],[DateTime(i) for i in 1900:1979]), + some_vector1 = vcat([missing],[j for j in 1:80] .* i), + some_vector2 = vcat([1.0],[missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]), + some_vector3 = vcat([1.0], [missing for j in 1:80]), + some_vector4 = vcat([missing], [missing for j in 1:80]), + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_timeseries_group1, + ) + println(i) + end + end + + PSRDatabaseSQLite.close!(db) +end + +function test_read_time_series() + + db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") + + db = PSRDatabaseSQLite.load_db(db_path) + + some_vector1 = vcat([missing],[j for j in 1:80] .* i) + some_vector2 = vcat([1.0],[missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]) + some_vector3 = vcat([1.0], [missing for j in 1:80]) + some_vector4 = vcat([missing], [missing for j in 1:80]) + + # todos os agentes p cada tempo + + + # tenta ler 10 vezes cada data + + + for date_time in [DateTime(i) for i in 1900:1979] + for i in 1:500 + results = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource $i"; + date_time = date_time + ) + @assert results.date_time[1] == string.([date_time]) + + results = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector2", + "Resource $i"; + date_time = date_time + ) + @assert results.date_time[1] == string.([date_time]) + + results = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector2", + "Resource $i"; + date_time = date_time + ) + @assert results.date_time[1] == string.([date_time]) + println(date_time) + end + + end + + + PSRDatabaseSQLite.close!(db) + # rm(db_path) + # @test true +end + +test_create_time_series() +# test_read_time_series() \ No newline at end of file diff --git a/time_controller.sql b/time_controller.sql new file mode 100644 index 00000000..271147ea --- /dev/null +++ b/time_controller.sql @@ -0,0 +1,27 @@ +PRAGMA user_version = 1; +PRAGMA foreign_keys = ON; + +CREATE TABLE Configuration ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + value1 REAL NOT NULL DEFAULT 100, + enum1 TEXT NOT NULL DEFAULT 'A' CHECK(enum1 IN ('A', 'B', 'C')) +) STRICT; + + +CREATE TABLE Resource ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + type TEXT NOT NULL DEFAULT "D" +) STRICT; + +CREATE TABLE Resource_timeseries_group1 ( + id INTEGER, + date_time TEXT NOT NULL, + some_vector1 REAL, + some_vector2 REAL, + some_vector3 REAL, + some_vector4 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time) +) STRICT; \ No newline at end of file From fe57b520a4114201634da1fbf0c7c494e10d6c7a Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Mon, 1 Jul 2024 19:22:29 -0300 Subject: [PATCH 11/34] first try of time controller --- .gitignore | 1 + profiling/Project.toml | 3 + profiling/create_profile.jl | 13 ++ profiling/open_profile.jl | 8 + script copy.jl | 128 -------------- script.jl | 11 -- script_time_controller.jl | 76 ++++++++ src/PSRDatabaseSQLite/PSRDatabaseSQLite.jl | 1 + src/PSRDatabaseSQLite/database_sqlite.jl | 20 ++- src/PSRDatabaseSQLite/read.jl | 37 ++-- src/PSRDatabaseSQLite/time_controller.jl | 196 ++++++++++++++++----- src/PSRDatabaseSQLite/utils.jl | 2 +- time_controller.sql | 1 - 13 files changed, 297 insertions(+), 200 deletions(-) create mode 100644 profiling/Project.toml create mode 100644 profiling/create_profile.jl create mode 100644 profiling/open_profile.jl delete mode 100644 script copy.jl create mode 100644 script_time_controller.jl diff --git a/.gitignore b/.gitignore index 54d595e3..177a3129 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,5 @@ Manifest.toml *.out *.ok debug_psrclasses +*.gz *.sqlite \ No newline at end of file diff --git a/profiling/Project.toml b/profiling/Project.toml new file mode 100644 index 00000000..31ced8d1 --- /dev/null +++ b/profiling/Project.toml @@ -0,0 +1,3 @@ +[deps] +PProf = "e4faabce-9ead-11e9-39d9-4379958e3056" +Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" diff --git a/profiling/create_profile.jl b/profiling/create_profile.jl new file mode 100644 index 00000000..2142a68e --- /dev/null +++ b/profiling/create_profile.jl @@ -0,0 +1,13 @@ +# You should run the script from the profiling directory + +using Profile +using PProf +import Pkg +root_path = dirname(@__DIR__) +Pkg.activate(root_path) +using PSRClassesInterface + + +include("../script_time_controller.jl") +@profile include("../script_time_controller.jl") +pprof() diff --git a/profiling/open_profile.jl b/profiling/open_profile.jl new file mode 100644 index 00000000..5fcff148 --- /dev/null +++ b/profiling/open_profile.jl @@ -0,0 +1,8 @@ +# You should run the script from the profiling directory + +using Profile +using PProf + +file_name = "profile.pb.gz" + +PProf.refresh(file=file_name, webport = 57998) diff --git a/script copy.jl b/script copy.jl deleted file mode 100644 index 5f4fe6c3..00000000 --- a/script copy.jl +++ /dev/null @@ -1,128 +0,0 @@ -using PSRClassesInterface.PSRDatabaseSQLite -using SQLite -using DataFrames -using Dates -using Test - -abstract type TimeSeriesRequestStatus end - - -const CollectionAttributeElement = Tuple{String, String, Int} - -struct TimeSeriesDidNotChange <: TimeSeriesRequestStatus end -struct TimeSeriesChanged <: TimeSeriesRequestStatus end - -mutable struct TimeSeriesElementCache - # The last date requested by the user - last_date_requested::DateTime - # The next available date after the last date requested - next_date_possible::DateTime -end - -# mutable struct TimeSeriesCache{T, N} -# # Tell which dimensions were mapped in a given vector -# # This is probably wrong -# dimensions_mapped -# data::Array{T, N} = fill(_psrdatabasesqlite_null_value(T), zeros(Int, N)...) -# end - - -# db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") -# GC.gc() -# GC.gc() -# if isfile(db_path) -# rm(db_path) -# end - -function test_create_time_series() - path_schema = raw"C:\Users\pedroripper\Documents\Github\PSRClassesInterface.jl\time_controller.sql" - db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") - db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") - GC.gc() - GC.gc() - if isfile(db_path) - rm(db_path) - end - - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do - for i in 1:500 - df_timeseries_group1 = DataFrame( - date_time = vcat([DateTime(0)],[DateTime(i) for i in 1900:1979]), - some_vector1 = vcat([missing],[j for j in 1:80] .* i), - some_vector2 = vcat([1.0],[missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]), - some_vector3 = vcat([1.0], [missing for j in 1:80]), - some_vector4 = vcat([missing], [missing for j in 1:80]), - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource $i", - group1 = df_timeseries_group1, - ) - println(i) - end - end - - PSRDatabaseSQLite.close!(db) -end - -function test_read_time_series() - - db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") - - db = PSRDatabaseSQLite.load_db(db_path) - - some_vector1 = vcat([missing],[j for j in 1:80] .* i) - some_vector2 = vcat([1.0],[missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]) - some_vector3 = vcat([1.0], [missing for j in 1:80]) - some_vector4 = vcat([missing], [missing for j in 1:80]) - - # todos os agentes p cada tempo - - - # tenta ler 10 vezes cada data - - - for date_time in [DateTime(i) for i in 1900:1979] - for i in 1:500 - results = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector1", - "Resource $i"; - date_time = date_time - ) - @assert results.date_time[1] == string.([date_time]) - - results = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector2", - "Resource $i"; - date_time = date_time - ) - @assert results.date_time[1] == string.([date_time]) - - results = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector2", - "Resource $i"; - date_time = date_time - ) - @assert results.date_time[1] == string.([date_time]) - println(date_time) - end - - end - - - PSRDatabaseSQLite.close!(db) - # rm(db_path) - # @test true -end - -test_create_time_series() -# test_read_time_series() \ No newline at end of file diff --git a/script.jl b/script.jl index 890e86cb..f10ca803 100644 --- a/script.jl +++ b/script.jl @@ -70,17 +70,6 @@ function test_create_time_series() ) @show results - @show labels = PSRDatabaseSQLite.read_scalar_parameters(db, "Resource", "label") - - results = PSRDatabaseSQLite.read_time_series_dfs( - db, - "Resource", - "some_vector5"; - date_time = DateTime(2010) - ) - @show results - - PSRDatabaseSQLite.close!(db) rm(db_path) @test true diff --git a/script_time_controller.jl b/script_time_controller.jl new file mode 100644 index 00000000..79d5ad71 --- /dev/null +++ b/script_time_controller.jl @@ -0,0 +1,76 @@ +using PSRClassesInterface.PSRDatabaseSQLite +using SQLite +using DataFrames +using Dates +using Test + +function test_create_time_series() + path_schema = joinpath(@__DIR__, "time_controller.sql") + db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end + + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do + for i in 1:500 + df_timeseries_group1 = DataFrame( + date_time = vcat([DateTime(0)],[DateTime(i) for i in 1900:1979]), + some_vector1 = vcat([missing],[j for j in 1:80] .* i), + some_vector2 = vcat([1.0],[missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]), + some_vector3 = vcat([1.0], [missing for j in 1:80]), + some_vector4 = vcat([missing], [missing for j in 1:80]), + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_timeseries_group1, + ) + end + end + + PSRDatabaseSQLite.close!(db) +end + +function test_read_time_series() + db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") + + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + for date_time in [DateTime(i) for i in 1900:1901] + @show date_time + for i in 1:50 + PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + date_time = date_time + ) + + PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector2", + date_time = date_time + ) + + PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector3", + date_time = date_time + ) + end + end + + + PSRDatabaseSQLite.close!(db) + rm(db_path) +end + +test_create_time_series() +test_read_time_series() \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/PSRDatabaseSQLite.jl b/src/PSRDatabaseSQLite/PSRDatabaseSQLite.jl index 284f1b8a..7bf589bf 100644 --- a/src/PSRDatabaseSQLite/PSRDatabaseSQLite.jl +++ b/src/PSRDatabaseSQLite/PSRDatabaseSQLite.jl @@ -20,6 +20,7 @@ include("exceptions.jl") include("utils.jl") include("attribute.jl") include("collection.jl") +include("time_controller.jl") include("database_sqlite.jl") include("create.jl") include("read.jl") diff --git a/src/PSRDatabaseSQLite/database_sqlite.jl b/src/PSRDatabaseSQLite/database_sqlite.jl index 243e283a..9587b74d 100644 --- a/src/PSRDatabaseSQLite/database_sqlite.jl +++ b/src/PSRDatabaseSQLite/database_sqlite.jl @@ -1,8 +1,17 @@ -mutable struct DatabaseSQLite +Base.@kwdef mutable struct DatabaseSQLite sqlite_db::SQLite.DB collections_map::OrderedDict{String, Collection} + read_only::Bool = false + # TimeController is a cache that allows PSRDatabaseSQLite to + # store information about the last timeseries query. This is useful for avoiding to + # re-query the database when the same query is made multiple times. + # The TimeController is a private behaviour and whenever it is used + # it changes the database mode to read-only. + _time_controller::TimeController = TimeController() end +_is_read_only(db::DatabaseSQLite) = db.read_only + function _set_default_pragmas!(db::SQLite.DB) _set_foreign_keys_on!(db) _set_busy_timeout!(db, 5000) @@ -42,7 +51,7 @@ function DatabaseSQLite_from_schema( rethrow(e) end - db = DatabaseSQLite( + db = DatabaseSQLite(; sqlite_db, collections_map, ) @@ -76,7 +85,7 @@ function DatabaseSQLite_from_migrations( rethrow(e) end - db = DatabaseSQLite( + db = DatabaseSQLite(; sqlite_db, collections_map, ) @@ -89,7 +98,7 @@ function DatabaseSQLite( read_only::Bool = false, ) sqlite_db = - read_only ? SQLite.DB("file:" * database_path * "?mode=ro&immutable=1") : + # read_only ? SQLite.DB("file:" * database_path * "?mode=ro&immutable=1") : SQLite.DB(database_path) _set_default_pragmas!(sqlite_db) @@ -102,9 +111,10 @@ function DatabaseSQLite( rethrow(e) end - db = DatabaseSQLite( + db = DatabaseSQLite(; sqlite_db, collections_map, + read_only ) return db end diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 3737e62c..100b3162 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -195,6 +195,27 @@ function read_time_series_df( ) end +function end_date_query(db::DatabaseSQLite, attribute::Attribute) + # First checks if the date or dimension value is within the range of the data. + # Then it queries the closest date before the provided date. + # If there is no date query the data with date 0 (which will probably return no data.) + end_date_query = "SELECT MAX(DATE(date_time)) FROM $(attribute.table_where_is_located)" + end_date = DBInterface.execute(db.sqlite_db, end_date_query) |> DataFrame + if isempty(end_date) + return DateTime(0) + end + return DateTime(end_date[!, 1][1]) +end + +function closest_date_query(db::DatabaseSQLite, attribute::Attribute, dim_value::DateTime) + closest_date_query_earlier = "SELECT DISTINCT date_time FROM $(attribute.table_where_is_located) WHERE $(attribute.id) IS NOT NULL AND DATE(date_time) <= DATE('$(dim_value)') ORDER BY DATE(date_time) DESC LIMIT 1" + closest_date = DBInterface.execute(db.sqlite_db, closest_date_query_earlier) |> DataFrame + if isempty(closest_date) + return DateTime(0) + end + return DateTime(closest_date[!, 1][1]) +end + function _read_time_series_df( db::DatabaseSQLite, collection_id::String, @@ -215,20 +236,12 @@ function _read_time_series_df( if read_exact_date query *= "DATE($dim_name) = DATE('$(dim_value)')" else - # First checks if the date or dimension value is within the range of the data. - # Then it queries the closest date before the provided date. - # If there is no date query the data with date 0 (which will probably return no data.) - end_date_query = "SELECT MAX(DATE($dim_name)) FROM $(attribute.table_where_is_located)" - end_date = DBInterface.execute(db.sqlite_db, end_date_query) |> DataFrame - # Query the nearest date before the provided date - closest_date_query_earlier = "SELECT DISTINCT $dim_name FROM $(attribute.table_where_is_located) WHERE $(attribute.id) IS NOT NULL AND DATE($dim_name) <= DATE('$(dim_value)') ORDER BY DATE($dim_name) DESC LIMIT 1" - closest_date = DBInterface.execute(db.sqlite_db, closest_date_query_earlier) |> DataFrame - date_to_equal_in_query = if dim_value > DateTime(end_date[!, 1][1]) - DateTime(0) - elseif isempty(closest_date) + end_date = end_date_query(db, attribute) + closest_date = closest_date_query(db, attribute, dim_value) + date_to_equal_in_query = if dim_value > end_date DateTime(0) else - closest_date[!, 1][1] + closest_date end # query the closest date and make it equal to the provided date. query *= "DATE($dim_name) = DATE('$(date_to_equal_in_query)')" diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index b0cc441c..e6208db8 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -1,68 +1,180 @@ abstract type TimeSeriesRequestStatus end -const CollectionAttributeElement = Tuple{String, String, Int} - struct TimeSeriesDidNotChange <: TimeSeriesRequestStatus end struct TimeSeriesChanged <: TimeSeriesRequestStatus end -# TODOs -# We need to write a query function that will return a certain data for all ids -# If an id does not exist it will simply return missing. The query must return -# the closest previous date for each id. +const CollectionAttribute = Tuple{String, String} -mutable struct TimeSeriesElementCache - # The last date requested by the user +mutable struct TimeControllerCache{T} + data::Vector{T} + # Control of dates requested per element in a given pair collection attribute + closest_previous_date_with_data::Vector{DateTime} last_date_requested::DateTime - # The next available date after the last date requested - next_date_possible::DateTime + closest_next_date_with_data::Vector{DateTime} + + # Private caches with the closest previous and next dates + # _closest_previous_date_with_data = maximum(closest_previous_date_with_data) + # _closest_next_date_with_data = minimum(closest_next_date_with_data) + _closest_previous_date_with_data::DateTime + _closest_next_date_with_data::DateTime + + # Cache of collection_ids + _collection_ids::Vector{Int} end -mutable struct TimeSeriesCache{T, N} - # Tell which dimensions were mapped in a given vector - # This is probably wrong - dimensions_mapped - data::Array{T, N} = fill(_psrdatabasesqlite_null_value(T), zeros(Int, N)...) +Base.@kwdef mutable struct TimeController + cache::Dict{CollectionAttribute, TimeControllerCache} = Dict{CollectionAttribute, TimeControllerCache}() end -""" - TimeController +function _collection_attribute(collection_id::String, attribute_id::String)::CollectionAttribute + return (collection_id, attribute_id) +end -TimeController in PSRDatabaseSQLite is a cache that allows PSRDatabaseSQLite to -store information about the last timeseries query. This is useful for avoiding to -re-query the database when the same query is made multiple times. TimeController -is a private behaviour and it only exists when querying all labels from a TimeSeries -element. -""" -Base.@kwdef mutable struct TimeController - # The tuple stores the cache for a given collection id, attribute id and id of the element in a database - element_cache::Dict{CollectionAttributeElement, TimeSeriesElementCache} = Dict{CollectionAttributeElement, TimeSeriesElementCache}() +function _closes_previous_date_with_data( + db, + attribute::Attribute, + id::Int, + date_time::DateTime +) + # TODO this query could probably be optimized + # It is reading many things that are not necessary + # And filtering and sorting in the end + query = """ + SELECT date_time + FROM $(attribute.table_where_is_located) + WHERE $(attribute.id) IS NOT NULL AND DATE(date_time) < DATE('$date_time') AND id = '$id' + ORDER BY date_time DESC + LIMIT 1 + """ + result = DBInterface.execute(db.sqlite_db, query) + # See how to get the query without the need to convert into DataFrame + # If it is empty what should we return? + return result +end + +function _closes_next_date_with_data( + db, + attribute::Attribute, + id::Int, + date_time::DateTime +) + # TODO this query could probably be optimized + # It is reading many things that are not necessary + # And filtering and sorting in the end + query = """ + SELECT date_time + FROM $(attribute.table_where_is_located) + WHERE $(attribute.id) IS NOT NULL AND DATE(date_time) > DATE('$date_time') AND id = '$id' + ORDER BY date_time ASC + LIMIT 1 + """ + result = DBInterface.execute(db.sqlite_db, query) + # See how to get the query without the need to convert into DataFrame + # If it is empty what should we return? + return result +end + +function _update_global_closest_dates_with_data!( + cache::TimeControllerCache +) + cache._closest_previous_date_with_data = maximum(closest_previous_date_with_data) + cache._closest_next_date_with_data = minimum(closest_next_date_with_data) end -function closest_previous_date( - db::DatabaseSQLite, +function _start_time_controller_cache( + db, attribute::Attribute, date_time::DateTime -)::DateTime - closest_previous_date_query = string( - "SELECT DISTINCT date_time FROM", - attribute.table_where_is_located, - "WHERE DATE(date_time) <= DATE('", date_time, "') ORDER BY DATE(date_time) DESC LIMIT 1") - result = DBInterface.execute(db.sqlite_db, closest_previous_date_query) +) + ids = read_scalar_parameters(db, attribute.parent_collection, "id") + closest_previous_date_with_data = Vector{DateTime}(undef, length(ids)) + closest_next_date_with_data = Vector{DateTime}(undef, length(ids)) + for (i, id) in enumerate(ids) + closest_previous_date_with_data[i] = _closes_previous_date_with_data(db, attribute, id, date_time) + closest_next_date_with_data[i] = _closes_next_date_with_data(db, attribute, id, date_time) + _collection_ids[i] = id + end + _closest_previous_date_with_data = maximum(closest_previous_date_with_data) + _closest_next_date_with_data = minimum(closest_next_date_with_data) + # Query the data for the first time + for (i, id) in enumerate(ids) + data = _request_time_series_data_for_time_controller_cache(db, attribute, id, closest_previous_date_with_data[i]) + cache.data[i] = data + end + + return TimeControllerCache( + data, + closest_previous_date_with_data, + date_time, + closest_next_date_with_data, + _closest_previous_date_with_data, + _closest_next_date_with_data, + _collection_ids, + ) end -function closest_next_date( - db::DatabaseSQLite, +function _request_time_series_data_for_time_controller_cache( + db, attribute::Attribute, + id::Int, date_time::DateTime -)::DateTime - closest_date_query_later = "SELECT DISTINCT date_time FROM $(attribute.table_where_is_located) WHERE DATE(date_time) > DATE('$(date_time)') ORDER BY DATE(date_time) ASC LIMIT 1" +) + query = """ + SELECT $(attribute.id) + FROM $(attribute.table_where_is_located) + WHERE id = $id AND date_time = $date_time + """ + result = DBInterface.execute(db.sqlite_db, query) + # See how to get the query without the need to convert into DataFrame + # If it is empty what should we return? + return result end -function read_mapped_time_series( - db::DatabaseSQLite, - collection_id::String, - attribute_id::String, +function _update_time_controller_cache!( + cache::TimeControllerCache, + db, + date_time::DateTime ) + cache.last_date_requested = date_time + for (i, id) in enumerate(cache._collection_ids) + # If date is whitin the range we do not need to update anything + if cache.closest_previous_date_with_data[i] < date_time < cache.closest_previous_date_with_data[i] + continue + end + cache.closest_previous_date_with_data[i] = _closes_previous_date_with_data(db, attribute, id, date_time) + cache.closest_next_date_with_data[i] = _closes_next_date_with_data(db, attribute, id, date_time) + cache.data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, closest_previous_date_with_data[i]) + end + _update_global_closest_dates_with_data!(cache) + return nothing +end +function read_mapped_timeseries( + db, + collection_id::String, + attribute_id::String; + date_time::DateTime +) + _throw_if_attribute_is_not_time_series( + db, + collection_id, + attribute_id, + :read, + ) + @assert _is_read_only(db) "Time series mapping only works in read only databases" + collection_attribute = _collection_attribute(collection_id, attribute_id) + attribute = _get_attribute(db, collection_id, attribute_id) + if !haskey(db._time_controller.cache, collection_attribute) + db._time_controller.cache[collection_attribute] = _start_time_controller_cache(db, attribute, date_time) + end + cache = db._time_controller.cache[collection_attribute] + # If we don`t need to update anything we just return the data + if cache._closest_previous_date_with_data < date_time < cache._closest_next_date_with_data + cache.last_date_requested = date_time + return cache.data + end + # If we need to update the cache we update the dates and the data + _update_time_controller_cache!(cache, db, date_time) + return cache.data end \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/utils.jl b/src/PSRDatabaseSQLite/utils.jl index 5aeffe47..dcc3e0d8 100644 --- a/src/PSRDatabaseSQLite/utils.jl +++ b/src/PSRDatabaseSQLite/utils.jl @@ -75,7 +75,7 @@ end function load_db(database_path::String; read_only::Bool = false) db = try DatabaseSQLite( - database_path; + database_path, read_only = read_only, ) catch e diff --git a/time_controller.sql b/time_controller.sql index 271147ea..4ec4c837 100644 --- a/time_controller.sql +++ b/time_controller.sql @@ -8,7 +8,6 @@ CREATE TABLE Configuration ( enum1 TEXT NOT NULL DEFAULT 'A' CHECK(enum1 IN ('A', 'B', 'C')) ) STRICT; - CREATE TABLE Resource ( id INTEGER PRIMARY KEY AUTOINCREMENT, label TEXT UNIQUE NOT NULL, From 6dca9f3773df8b4eb3c71c3c81db8034ef80ac78 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Mon, 1 Jul 2024 23:25:07 -0300 Subject: [PATCH 12/34] minimally working --- script_time_controller.jl | 32 ++++-- src/PSRDatabaseSQLite/time_controller.jl | 130 +++++++++++++---------- 2 files changed, 100 insertions(+), 62 deletions(-) diff --git a/script_time_controller.jl b/script_time_controller.jl index 79d5ad71..50a2c420 100644 --- a/script_time_controller.jl +++ b/script_time_controller.jl @@ -41,36 +41,56 @@ function test_read_time_series() db = PSRDatabaseSQLite.load_db(db_path; read_only = true) - for date_time in [DateTime(i) for i in 1900:1901] + times = zeros(4) + + + for (j, date_time) in enumerate([DateTime(i) for i in 1900:1901]) @show date_time for i in 1:50 - PSRDatabaseSQLite.read_mapped_timeseries( + t1 = @timed PSRDatabaseSQLite.read_mapped_timeseries( db, "Resource", "some_vector1", + Float64, date_time = date_time ) - PSRDatabaseSQLite.read_mapped_timeseries( + t2 = @timed PSRDatabaseSQLite.read_mapped_timeseries( db, "Resource", "some_vector2", + Float64, date_time = date_time ) - PSRDatabaseSQLite.read_mapped_timeseries( + t3 = @timed PSRDatabaseSQLite.read_mapped_timeseries( db, "Resource", "some_vector3", + Float64, date_time = date_time ) + + t4 = @timed PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector4", + Float64, + date_time = date_time + ) + + times .+= [t1.time, t2.time, t3.time, t4.time] end end + @show times + PSRDatabaseSQLite.close!(db) rm(db_path) end -test_create_time_series() -test_read_time_series() \ No newline at end of file +@testset "Time Controller" begin + test_create_time_series() + test_read_time_series() +end \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index e6208db8..6cae9c17 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -5,6 +5,8 @@ struct TimeSeriesChanged <: TimeSeriesRequestStatus end const CollectionAttribute = Tuple{String, String} +const DATETIME_FORMAT = "yyyy-mm-dd HH:MM:SS" + mutable struct TimeControllerCache{T} data::Vector{T} # Control of dates requested per element in a given pair collection attribute @@ -15,8 +17,8 @@ mutable struct TimeControllerCache{T} # Private caches with the closest previous and next dates # _closest_previous_date_with_data = maximum(closest_previous_date_with_data) # _closest_next_date_with_data = minimum(closest_next_date_with_data) - _closest_previous_date_with_data::DateTime - _closest_next_date_with_data::DateTime + _closest_global_previous_date_with_data::DateTime + _closest_global_next_date_with_data::DateTime # Cache of collection_ids _collection_ids::Vector{Int} @@ -30,86 +32,97 @@ function _collection_attribute(collection_id::String, attribute_id::String)::Col return (collection_id, attribute_id) end -function _closes_previous_date_with_data( +function _closest_previous_date_with_data( db, attribute::Attribute, id::Int, date_time::DateTime -) +)::DateTime # TODO this query could probably be optimized # It is reading many things that are not necessary # And filtering and sorting in the end query = """ - SELECT date_time + SELECT MAX(DATETIME(date_time)) FROM $(attribute.table_where_is_located) - WHERE $(attribute.id) IS NOT NULL AND DATE(date_time) < DATE('$date_time') AND id = '$id' - ORDER BY date_time DESC - LIMIT 1 + WHERE $(attribute.id) IS NOT NULL AND DATETIME(date_time) <= DATETIME('$date_time') AND id = '$id' """ result = DBInterface.execute(db.sqlite_db, query) - # See how to get the query without the need to convert into DataFrame - # If it is empty what should we return? - return result + for row in result + answer = row[1] + if ismissing(answer) + return typemin(DateTime) + end + return DateTime(answer, DATETIME_FORMAT) + end end -function _closes_next_date_with_data( +function _closest_next_date_with_data( db, attribute::Attribute, id::Int, date_time::DateTime -) +)::DateTime # TODO this query could probably be optimized # It is reading many things that are not necessary # And filtering and sorting in the end query = """ - SELECT date_time + SELECT MIN(DATETIME(date_time)) FROM $(attribute.table_where_is_located) - WHERE $(attribute.id) IS NOT NULL AND DATE(date_time) > DATE('$date_time') AND id = '$id' - ORDER BY date_time ASC - LIMIT 1 + WHERE $(attribute.id) IS NOT NULL AND DATETIME(date_time) > DATETIME('$date_time') AND id = '$id' """ result = DBInterface.execute(db.sqlite_db, query) - # See how to get the query without the need to convert into DataFrame - # If it is empty what should we return? - return result + for row in result + answer = row[1] + if ismissing(answer) + return typemax(DateTime) + end + return DateTime(answer, DATETIME_FORMAT) + end end function _update_global_closest_dates_with_data!( cache::TimeControllerCache ) - cache._closest_previous_date_with_data = maximum(closest_previous_date_with_data) - cache._closest_next_date_with_data = minimum(closest_next_date_with_data) + cache._closest_global_previous_date_with_data = maximum(cache.closest_previous_date_with_data) + cache._closest_global_next_date_with_data = minimum(cache.closest_next_date_with_data) +end + +function _no_need_to_query_any_id( + cache::TimeControllerCache, + date_time::DateTime +)::Bool + return cache._closest_global_previous_date_with_data <= date_time < cache._closest_global_next_date_with_data end function _start_time_controller_cache( db, attribute::Attribute, - date_time::DateTime -) - ids = read_scalar_parameters(db, attribute.parent_collection, "id") - closest_previous_date_with_data = Vector{DateTime}(undef, length(ids)) - closest_next_date_with_data = Vector{DateTime}(undef, length(ids)) - for (i, id) in enumerate(ids) - closest_previous_date_with_data[i] = _closes_previous_date_with_data(db, attribute, id, date_time) - closest_next_date_with_data[i] = _closes_next_date_with_data(db, attribute, id, date_time) - _collection_ids[i] = id + date_time::DateTime, + ::Type{T} +) where T + _collection_ids = read_scalar_parameters(db, attribute.parent_collection, "id") + data = Vector{T}(undef, length(_collection_ids)) + closest_previous_date_with_data = Vector{DateTime}(undef, length(_collection_ids)) + closest_next_date_with_data = Vector{DateTime}(undef, length(_collection_ids)) + for (i, id) in enumerate(_collection_ids) + closest_previous_date_with_data[i] = _closest_previous_date_with_data(db, attribute, id, date_time) + closest_next_date_with_data[i] = _closest_next_date_with_data(db, attribute, id, date_time) end - _closest_previous_date_with_data = maximum(closest_previous_date_with_data) - _closest_next_date_with_data = minimum(closest_next_date_with_data) + _closest_global_previous_date_with_data = maximum(closest_previous_date_with_data) + _closest_global_next_date_with_data = minimum(closest_next_date_with_data) # Query the data for the first time - for (i, id) in enumerate(ids) - data = _request_time_series_data_for_time_controller_cache(db, attribute, id, closest_previous_date_with_data[i]) - cache.data[i] = data + for (i, id) in enumerate(_collection_ids) + data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, closest_previous_date_with_data[i], T) end - return TimeControllerCache( + return TimeControllerCache{T}( data, closest_previous_date_with_data, date_time, closest_next_date_with_data, - _closest_previous_date_with_data, - _closest_next_date_with_data, + _closest_global_previous_date_with_data, + _closest_global_next_date_with_data, _collection_ids, ) end @@ -118,33 +131,37 @@ function _request_time_series_data_for_time_controller_cache( db, attribute::Attribute, id::Int, - date_time::DateTime -) + date_time::DateTime, + ::Type{T} +) where T query = """ SELECT $(attribute.id) FROM $(attribute.table_where_is_located) - WHERE id = $id AND date_time = $date_time + WHERE id = $id AND DATETIME(date_time) = DATETIME('$date_time') """ result = DBInterface.execute(db.sqlite_db, query) - # See how to get the query without the need to convert into DataFrame - # If it is empty what should we return? - return result + for row in result + return T(row[1]) + end + return _psrdatabasesqlite_null_value(T) end function _update_time_controller_cache!( cache::TimeControllerCache, db, - date_time::DateTime -) + attribute::Attribute, + date_time::DateTime, + ::Type{T} +) where T cache.last_date_requested = date_time for (i, id) in enumerate(cache._collection_ids) # If date is whitin the range we do not need to update anything - if cache.closest_previous_date_with_data[i] < date_time < cache.closest_previous_date_with_data[i] + if cache.closest_previous_date_with_data[i] <= date_time < cache.closest_previous_date_with_data[i] continue end - cache.closest_previous_date_with_data[i] = _closes_previous_date_with_data(db, attribute, id, date_time) - cache.closest_next_date_with_data[i] = _closes_next_date_with_data(db, attribute, id, date_time) - cache.data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, closest_previous_date_with_data[i]) + cache.closest_previous_date_with_data[i] = _closest_previous_date_with_data(db, attribute, id, date_time) + cache.closest_next_date_with_data[i] = _closest_next_date_with_data(db, attribute, id, date_time) + cache.data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, cache.closest_previous_date_with_data[i], T) end _update_global_closest_dates_with_data!(cache) return nothing @@ -153,9 +170,10 @@ end function read_mapped_timeseries( db, collection_id::String, - attribute_id::String; + attribute_id::String, + type::Type{T}; date_time::DateTime -) +) where T _throw_if_attribute_is_not_time_series( db, collection_id, @@ -166,15 +184,15 @@ function read_mapped_timeseries( collection_attribute = _collection_attribute(collection_id, attribute_id) attribute = _get_attribute(db, collection_id, attribute_id) if !haskey(db._time_controller.cache, collection_attribute) - db._time_controller.cache[collection_attribute] = _start_time_controller_cache(db, attribute, date_time) + db._time_controller.cache[collection_attribute] = _start_time_controller_cache(db, attribute, date_time, type) end cache = db._time_controller.cache[collection_attribute] # If we don`t need to update anything we just return the data - if cache._closest_previous_date_with_data < date_time < cache._closest_next_date_with_data + if _no_need_to_query_any_id(cache, date_time) cache.last_date_requested = date_time return cache.data end # If we need to update the cache we update the dates and the data - _update_time_controller_cache!(cache, db, date_time) + _update_time_controller_cache!(cache, db, attribute, date_time, T) return cache.data end \ No newline at end of file From cfe96638e96043b1d6e2a9c54c5907f45a9cdb4b Mon Sep 17 00:00:00 2001 From: pedroripper Date: Tue, 2 Jul 2024 14:42:43 -0300 Subject: [PATCH 13/34] Add tests for timecontroller --- .../test_time_controller.jl | 220 ++++++++++++++++++ .../test_time_controller.sql | 26 +++ 2 files changed, 246 insertions(+) create mode 100644 test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl create mode 100644 test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl new file mode 100644 index 00000000..bf4f4c29 --- /dev/null +++ b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl @@ -0,0 +1,220 @@ +module TestTimeController + +using PSRClassesInterface.PSRDatabaseSQLite +using SQLite +using Dates +using DataFrames +using Test + +function test_time_controller_read() + path_schema = joinpath(@__DIR__, "test_time_controller.sql") + db_path = joinpath(@__DIR__, "test_time_controller.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end + + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do + for i in 1:500 + df_timeseries_group1 = DataFrame(; + date_time = vcat([DateTime(0)], [DateTime(i) for i in 1900:1979]), + some_vector1 = vcat([missing], [j for j in 1:80] .* i), + some_vector2 = vcat([1.0], [missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]), + some_vector3 = vcat([1.0], [missing for j in 1:80]), + some_vector4 = vcat([missing], [missing for j in 1:80]), + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_timeseries_group1, + ) + end + end + + PSRDatabaseSQLite.close!(db) + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + for (j, date_time) in enumerate([DateTime(i) for i in 1900:1979]) + some_vector1_check = vcat(Float64[j * k for k in 1:500]) + + some_vector2_check = if j <= 10 + vcat([1.0 for k in 1:500]) + elseif j <= 20 && j > 10 + l_idx = indexin(j, 11:20)[1] + vcat(Float64[l_idx * k for k in 1:500]) + else + vcat([10.0 * k for k in 1:500]) + end + some_vector3_check = vcat([1.0 for k in 1:500]) + some_vector4_check = vcat([missing for k in 1:500]) + + cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = date_time, + ) + for k in 1:500 + @test cached_data_new[k] == some_vector1_check[k] + end + + cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector2", + Float64; + date_time = date_time, + ) + for k in 1:500 + @test cached_data_new[k] == some_vector2_check[k] + end + + cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector3", + Float64; + date_time = date_time, + ) + + for k in 1:500 + @test cached_data_new[k] == some_vector3_check[k] + end + + cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector4", + Float64; + date_time = date_time, + ) + + for k in 1:500 + @test isnan(cached_data_new[k]) + end + end + + PSRDatabaseSQLite.close!(db) + return rm(db_path) +end + +function test_time_controller_missing() + path_schema = joinpath(@__DIR__, "test_time_controller.sql") + db_path = joinpath(@__DIR__, "test_time_controller_missing.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end + + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do + df_timeseries_group1 = DataFrame(; + date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), + some_vector1 = vcat([missing], [1.0, 2.0, 3.0]), + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_timeseries_group1, + ) + + df_timeseries_group1 = DataFrame(; + date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), + some_vector1 = vcat([missing], [1.0, missing, 3.0]), + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 2", + group1 = df_timeseries_group1, + ) + + df_timeseries_group1 = DataFrame(; + date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2002)]), + some_vector1 = vcat([missing], [1.0, 3.0]), + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 3", + group1 = df_timeseries_group1, + ) + + df_timeseries_group1 = DataFrame(; + date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), + some_vector1 = [missing for i in 1:4], + ) + return PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 4", + group1 = df_timeseries_group1, + ) + end + + PSRDatabaseSQLite.close!(db) + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(2000), + ) + @test cached_data_new[1] == 1.0 + @test cached_data_new[2] == 1.0 + @test cached_data_new[3] == 1.0 + @test isnan(cached_data_new[4]) + + cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(2001), + ) + @test cached_data_new[1] == 2.0 + @test cached_data_new[2] == 1.0 + @test cached_data_new[3] == 1.0 + @test isnan(cached_data_new[4]) + + cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(2002), + ) + @test cached_data_new[1] == 3.0 + @test cached_data_new[2] == 3.0 + @test cached_data_new[3] == 3.0 + @test isnan(cached_data_new[4]) + + PSRDatabaseSQLite.close!(db) + return rm(db_path) +end + +function runtests() + Base.GC.gc() + Base.GC.gc() + for name in names(@__MODULE__; all = true) + if startswith("$name", "test_") + @testset "$(name)" begin + getfield(@__MODULE__, name)() + end + end + end +end + +TestTimeController.runtests() + +end diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql new file mode 100644 index 00000000..4ec4c837 --- /dev/null +++ b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql @@ -0,0 +1,26 @@ +PRAGMA user_version = 1; +PRAGMA foreign_keys = ON; + +CREATE TABLE Configuration ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + value1 REAL NOT NULL DEFAULT 100, + enum1 TEXT NOT NULL DEFAULT 'A' CHECK(enum1 IN ('A', 'B', 'C')) +) STRICT; + +CREATE TABLE Resource ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL, + type TEXT NOT NULL DEFAULT "D" +) STRICT; + +CREATE TABLE Resource_timeseries_group1 ( + id INTEGER, + date_time TEXT NOT NULL, + some_vector1 REAL, + some_vector2 REAL, + some_vector3 REAL, + some_vector4 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time) +) STRICT; \ No newline at end of file From f67946575b682dfaf82aef1b28643f129722af34 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Tue, 2 Jul 2024 16:23:50 -0300 Subject: [PATCH 14/34] update --- script_time_controller.jl | 61 +++++---- src/PSRDatabaseSQLite/time_controller.jl | 153 +++++++++-------------- 2 files changed, 87 insertions(+), 127 deletions(-) diff --git a/script_time_controller.jl b/script_time_controller.jl index 50a2c420..da6612e2 100644 --- a/script_time_controller.jl +++ b/script_time_controller.jl @@ -46,41 +46,40 @@ function test_read_time_series() for (j, date_time) in enumerate([DateTime(i) for i in 1900:1901]) @show date_time - for i in 1:50 - t1 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector1", - Float64, - date_time = date_time - ) + t1 = @timed PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64, + date_time = date_time + ) + # @show t1.value - t2 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector2", - Float64, - date_time = date_time - ) + t2 = @timed PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector2", + Float64, + date_time = date_time + ) - t3 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector3", - Float64, - date_time = date_time - ) + t3 = @timed PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector3", + Float64, + date_time = date_time + ) - t4 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector4", - Float64, - date_time = date_time - ) + t4 = @timed PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector4", + Float64, + date_time = date_time + ) - times .+= [t1.time, t2.time, t3.time, t4.time] - end + times .+= [t1.time, t2.time, t3.time, t4.time] end @show times diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index 6cae9c17..4da24acf 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -5,8 +5,6 @@ struct TimeSeriesChanged <: TimeSeriesRequestStatus end const CollectionAttribute = Tuple{String, String} -const DATETIME_FORMAT = "yyyy-mm-dd HH:MM:SS" - mutable struct TimeControllerCache{T} data::Vector{T} # Control of dates requested per element in a given pair collection attribute @@ -22,6 +20,9 @@ mutable struct TimeControllerCache{T} # Cache of collection_ids _collection_ids::Vector{Int} + + # Cache prepared statement for querying the data + _prepared_statement::SQLite.Stmt end Base.@kwdef mutable struct TimeController @@ -32,59 +33,51 @@ function _collection_attribute(collection_id::String, attribute_id::String)::Col return (collection_id, attribute_id) end -function _closest_previous_date_with_data( +function _update_time_controller_cache!( + cache::TimeControllerCache, db, attribute::Attribute, - id::Int, date_time::DateTime -)::DateTime - # TODO this query could probably be optimized - # It is reading many things that are not necessary - # And filtering and sorting in the end +) + cache.last_date_requested = date_time query = """ - SELECT MAX(DATETIME(date_time)) - FROM $(attribute.table_where_is_located) - WHERE $(attribute.id) IS NOT NULL AND DATETIME(date_time) <= DATETIME('$date_time') AND id = '$id' + SELECT + id, + MAX(CASE WHEN DATETIME(date_time) <= DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_previous_date_with_data, + MIN(CASE WHEN DATETIME(date_time) > DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_next_date_with_data, + $(attribute.id) + FROM $(attribute.table_where_is_located) + WHERE $(attribute.id) IS NOT NULL + GROUP BY id + ORDER BY id """ result = DBInterface.execute(db.sqlite_db, query) - for row in result - answer = row[1] - if ismissing(answer) - return typemin(DateTime) + # @show result + for (i, row) in enumerate(result) + id = row[1] + @assert id == cache._collection_ids[i] "The id in the database is different from the one in the cache" + closest_previous_date_with_data = row[2] + closest_next_date_with_data = row[3] + data = row[4] + if ismissing(closest_previous_date_with_data) + cache.closest_previous_date_with_data[i] = typemin(DateTime) + else + cache.closest_previous_date_with_data[i] = DateTime(closest_previous_date_with_data) end - return DateTime(answer, DATETIME_FORMAT) - end -end - -function _closest_next_date_with_data( - db, - attribute::Attribute, - id::Int, - date_time::DateTime -)::DateTime - # TODO this query could probably be optimized - # It is reading many things that are not necessary - # And filtering and sorting in the end - query = """ - SELECT MIN(DATETIME(date_time)) - FROM $(attribute.table_where_is_located) - WHERE $(attribute.id) IS NOT NULL AND DATETIME(date_time) > DATETIME('$date_time') AND id = '$id' - """ - result = DBInterface.execute(db.sqlite_db, query) - for row in result - answer = row[1] - if ismissing(answer) - return typemax(DateTime) + if ismissing(closest_next_date_with_data) + cache.closest_next_date_with_data[i] = typemax(DateTime) + else + cache.closest_next_date_with_data[i] = DateTime(closest_next_date_with_data) + end + if ismissing(data) + cache.data[i] = _psrdatabasesqlite_null_value(eltype(cache.data)) + else + cache.data[i] = data end - return DateTime(answer, DATETIME_FORMAT) end -end - -function _update_global_closest_dates_with_data!( - cache::TimeControllerCache -) cache._closest_global_previous_date_with_data = maximum(cache.closest_previous_date_with_data) cache._closest_global_next_date_with_data = minimum(cache.closest_next_date_with_data) + return cache end function _no_need_to_query_any_id( @@ -101,22 +94,25 @@ function _start_time_controller_cache( ::Type{T} ) where T _collection_ids = read_scalar_parameters(db, attribute.parent_collection, "id") - data = Vector{T}(undef, length(_collection_ids)) - closest_previous_date_with_data = Vector{DateTime}(undef, length(_collection_ids)) - closest_next_date_with_data = Vector{DateTime}(undef, length(_collection_ids)) - for (i, id) in enumerate(_collection_ids) - closest_previous_date_with_data[i] = _closest_previous_date_with_data(db, attribute, id, date_time) - closest_next_date_with_data[i] = _closest_next_date_with_data(db, attribute, id, date_time) - end + data = fill(_psrdatabasesqlite_null_value(T), length(_collection_ids)) + closest_previous_date_with_data = fill(typemin(DateTime), length(_collection_ids)) + closest_next_date_with_data = fill(typemax(DateTime), length(_collection_ids)) _closest_global_previous_date_with_data = maximum(closest_previous_date_with_data) _closest_global_next_date_with_data = minimum(closest_next_date_with_data) - - # Query the data for the first time - for (i, id) in enumerate(_collection_ids) - data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, closest_previous_date_with_data[i], T) - end - return TimeControllerCache{T}( + _prepared_statement = SQLite.Stmt(db.sqlite_db, """ + SELECT + id, + MAX(CASE WHEN DATETIME(date_time) <= DATETIME(':date_time') THEN date_time ELSE NULL END) AS closest_previous_date_with_data, + MIN(CASE WHEN DATETIME(date_time) > DATETIME(':date_time') THEN date_time ELSE NULL END) AS closest_next_date_with_data, + $(attribute.id) + FROM $(attribute.table_where_is_located) + WHERE $(attribute.id) IS NOT NULL + GROUP BY id + ORDER BY id + """) + + cache = TimeControllerCache{T}( data, closest_previous_date_with_data, date_time, @@ -124,47 +120,12 @@ function _start_time_controller_cache( _closest_global_previous_date_with_data, _closest_global_next_date_with_data, _collection_ids, + _prepared_statement ) -end -function _request_time_series_data_for_time_controller_cache( - db, - attribute::Attribute, - id::Int, - date_time::DateTime, - ::Type{T} -) where T - query = """ - SELECT $(attribute.id) - FROM $(attribute.table_where_is_located) - WHERE id = $id AND DATETIME(date_time) = DATETIME('$date_time') - """ - result = DBInterface.execute(db.sqlite_db, query) - for row in result - return T(row[1]) - end - return _psrdatabasesqlite_null_value(T) -end + _update_time_controller_cache!(cache, db, attribute, date_time) -function _update_time_controller_cache!( - cache::TimeControllerCache, - db, - attribute::Attribute, - date_time::DateTime, - ::Type{T} -) where T - cache.last_date_requested = date_time - for (i, id) in enumerate(cache._collection_ids) - # If date is whitin the range we do not need to update anything - if cache.closest_previous_date_with_data[i] <= date_time < cache.closest_previous_date_with_data[i] - continue - end - cache.closest_previous_date_with_data[i] = _closest_previous_date_with_data(db, attribute, id, date_time) - cache.closest_next_date_with_data[i] = _closest_next_date_with_data(db, attribute, id, date_time) - cache.data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, cache.closest_previous_date_with_data[i], T) - end - _update_global_closest_dates_with_data!(cache) - return nothing + return cache end function read_mapped_timeseries( @@ -193,6 +154,6 @@ function read_mapped_timeseries( return cache.data end # If we need to update the cache we update the dates and the data - _update_time_controller_cache!(cache, db, attribute, date_time, T) + _update_time_controller_cache!(cache, db, attribute, date_time) return cache.data end \ No newline at end of file From 6e1dbb3d40003b03e86fe68152251286b166e631 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Tue, 2 Jul 2024 17:31:24 -0300 Subject: [PATCH 15/34] update --- src/PSRDatabaseSQLite/time_controller.jl | 84 +++++++++++-------- .../test_time_controller.jl | 14 ++-- 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index 4da24acf..0c3c925e 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -5,6 +5,13 @@ struct TimeSeriesChanged <: TimeSeriesRequestStatus end const CollectionAttribute = Tuple{String, String} +# Some comments +# TODO we can further optimize the time controller with a few strategies +# 1 - We can try to ask for the data in the same query that we ask for the dates. I just don`t know how to write the good query for that +# 2 - We can use prepared statements for the queries +# 3 - Avoid querying the data for every id in the attribute. Currently we fill the cache of dates before making the query and use it to inform which date each id should query. This is quite inneficient +# The best way of optimizing it would be to solve 1 and 2. + mutable struct TimeControllerCache{T} data::Vector{T} # Control of dates requested per element in a given pair collection attribute @@ -20,9 +27,6 @@ mutable struct TimeControllerCache{T} # Cache of collection_ids _collection_ids::Vector{Int} - - # Cache prepared statement for querying the data - _prepared_statement::SQLite.Stmt end Base.@kwdef mutable struct TimeController @@ -39,26 +43,58 @@ function _update_time_controller_cache!( attribute::Attribute, date_time::DateTime ) + + _update_time_controller_cache_dates!(cache, db, attribute, date_time) + + for (i, id) in enumerate(cache._collection_ids) + cache.data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, cache.closest_previous_date_with_data[i], eltype(cache.data)) + end + + return nothing +end + +function _request_time_series_data_for_time_controller_cache( + db, + attribute::Attribute, + id::Int, + date_time::DateTime, + ::Type{T} +) where T + query = """ + SELECT $(attribute.id) + FROM $(attribute.table_where_is_located) + WHERE id = $id AND DATETIME(date_time) = DATETIME('$date_time') + """ + result = DBInterface.execute(db.sqlite_db, query) + for row in result + return T(row[1]) + end + return _psrdatabasesqlite_null_value(T) +end + +function _update_time_controller_cache_dates!( + cache::TimeControllerCache, + db, + attribute::Attribute, + date_time::DateTime +) cache.last_date_requested = date_time query = """ - SELECT - id, - MAX(CASE WHEN DATETIME(date_time) <= DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_previous_date_with_data, - MIN(CASE WHEN DATETIME(date_time) > DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_next_date_with_data, - $(attribute.id) - FROM $(attribute.table_where_is_located) - WHERE $(attribute.id) IS NOT NULL - GROUP BY id - ORDER BY id + SELECT + id, + MAX(CASE WHEN DATETIME(date_time) <= DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_previous_date_with_data, + MIN(CASE WHEN DATETIME(date_time) > DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_next_date_with_data + FROM $(attribute.table_where_is_located) + WHERE $(attribute.id) IS NOT NULL + GROUP BY id + ORDER BY id """ result = DBInterface.execute(db.sqlite_db, query) - # @show result for (i, row) in enumerate(result) id = row[1] @assert id == cache._collection_ids[i] "The id in the database is different from the one in the cache" closest_previous_date_with_data = row[2] closest_next_date_with_data = row[3] - data = row[4] if ismissing(closest_previous_date_with_data) cache.closest_previous_date_with_data[i] = typemin(DateTime) else @@ -69,11 +105,6 @@ function _update_time_controller_cache!( else cache.closest_next_date_with_data[i] = DateTime(closest_next_date_with_data) end - if ismissing(data) - cache.data[i] = _psrdatabasesqlite_null_value(eltype(cache.data)) - else - cache.data[i] = data - end end cache._closest_global_previous_date_with_data = maximum(cache.closest_previous_date_with_data) cache._closest_global_next_date_with_data = minimum(cache.closest_next_date_with_data) @@ -100,18 +131,6 @@ function _start_time_controller_cache( _closest_global_previous_date_with_data = maximum(closest_previous_date_with_data) _closest_global_next_date_with_data = minimum(closest_next_date_with_data) - _prepared_statement = SQLite.Stmt(db.sqlite_db, """ - SELECT - id, - MAX(CASE WHEN DATETIME(date_time) <= DATETIME(':date_time') THEN date_time ELSE NULL END) AS closest_previous_date_with_data, - MIN(CASE WHEN DATETIME(date_time) > DATETIME(':date_time') THEN date_time ELSE NULL END) AS closest_next_date_with_data, - $(attribute.id) - FROM $(attribute.table_where_is_located) - WHERE $(attribute.id) IS NOT NULL - GROUP BY id - ORDER BY id - """) - cache = TimeControllerCache{T}( data, closest_previous_date_with_data, @@ -119,8 +138,7 @@ function _start_time_controller_cache( closest_next_date_with_data, _closest_global_previous_date_with_data, _closest_global_next_date_with_data, - _collection_ids, - _prepared_statement + _collection_ids ) _update_time_controller_cache!(cache, db, attribute, date_time) diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl index bf4f4c29..4c2ce7fe 100644 --- a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl +++ b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl @@ -117,7 +117,7 @@ function test_time_controller_missing() PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do df_timeseries_group1 = DataFrame(; date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), - some_vector1 = vcat([missing], [1.0, 2.0, 3.0]), + some_vector1 = vcat([missing], [3.0, 2.0, 1.0]), ) PSRDatabaseSQLite.create_element!( db, @@ -128,7 +128,7 @@ function test_time_controller_missing() df_timeseries_group1 = DataFrame(; date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), - some_vector1 = vcat([missing], [1.0, missing, 3.0]), + some_vector1 = vcat([missing], [3.0, missing, 1.0]), ) PSRDatabaseSQLite.create_element!( db, @@ -170,8 +170,8 @@ function test_time_controller_missing() Float64; date_time = DateTime(2000), ) - @test cached_data_new[1] == 1.0 - @test cached_data_new[2] == 1.0 + @test cached_data_new[1] == 3.0 + @test cached_data_new[2] == 3.0 @test cached_data_new[3] == 1.0 @test isnan(cached_data_new[4]) @@ -183,7 +183,7 @@ function test_time_controller_missing() date_time = DateTime(2001), ) @test cached_data_new[1] == 2.0 - @test cached_data_new[2] == 1.0 + @test cached_data_new[2] == 3.0 @test cached_data_new[3] == 1.0 @test isnan(cached_data_new[4]) @@ -194,8 +194,8 @@ function test_time_controller_missing() Float64; date_time = DateTime(2002), ) - @test cached_data_new[1] == 3.0 - @test cached_data_new[2] == 3.0 + @test cached_data_new[1] == 1.0 + @test cached_data_new[2] == 1.0 @test cached_data_new[3] == 3.0 @test isnan(cached_data_new[4]) From e867c8e2803833c82e71b2578df7958ef0622917 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Tue, 2 Jul 2024 18:26:53 -0300 Subject: [PATCH 16/34] Update time controller tests --- .../test_time_controller.jl | 343 +++++++++++------- .../test_time_controller.sql | 2 + 2 files changed, 221 insertions(+), 124 deletions(-) diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl index 4c2ce7fe..d27496ed 100644 --- a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl +++ b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl @@ -6,9 +6,21 @@ using Dates using DataFrames using Test +function _test_cache(cached_data, answer) + @test length(cached_data) == length(answer) + for i in eachindex(cached_data) + if isnan(answer[i]) + @test isnan(cached_data[i]) + else + @test cached_data[i] == answer[i] + end + end +end + +# For each date, test the returned value with the expected value function test_time_controller_read() path_schema = joinpath(@__DIR__, "test_time_controller.sql") - db_path = joinpath(@__DIR__, "test_time_controller.sqlite") + db_path = joinpath(@__DIR__, "test_time_controller_read.sqlite") GC.gc() GC.gc() if isfile(db_path) @@ -17,187 +29,270 @@ function test_time_controller_read() db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do - for i in 1:500 - df_timeseries_group1 = DataFrame(; - date_time = vcat([DateTime(0)], [DateTime(i) for i in 1900:1979]), - some_vector1 = vcat([missing], [j for j in 1:80] .* i), - some_vector2 = vcat([1.0], [missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]), - some_vector3 = vcat([1.0], [missing for j in 1:80]), - some_vector4 = vcat([missing], [missing for j in 1:80]), - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource $i", - group1 = df_timeseries_group1, - ) - end - end + + df = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + some_vector1 = [missing, 1.0, 2.0], + some_vector2 = [1.0, 2.0, 3.0], + some_vector3 = [3.0, 2.0, 1.0], + some_vector4 = [1.0, missing, 5.0], + some_vector5 = [missing, missing, missing], + some_vector6 = [6.0, missing, missing], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df, + ) PSRDatabaseSQLite.close!(db) db = PSRDatabaseSQLite.load_db(db_path; read_only = true) - for (j, date_time) in enumerate([DateTime(i) for i in 1900:1979]) - some_vector1_check = vcat(Float64[j * k for k in 1:500]) - - some_vector2_check = if j <= 10 - vcat([1.0 for k in 1:500]) - elseif j <= 20 && j > 10 - l_idx = indexin(j, 11:20)[1] - vcat(Float64[l_idx * k for k in 1:500]) - else - vcat([10.0 * k for k in 1:500]) - end - some_vector3_check = vcat([1.0 for k in 1:500]) - some_vector4_check = vcat([missing for k in 1:500]) + some_vector1_answer = [[NaN], [1.0], [2.0]] + some_vector2_answer = [[1.0], [2.0], [3.0]] + some_vector3_answer = [[3.0], [2.0], [1.0]] + some_vector4_answer = [[1.0], [1.0], [5.0]] + some_vector5_answer = [[NaN], [NaN], [NaN]] + some_vector6_answer = [[6.0], [6.0], [6.0]] - cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + # test for dates in correct sequence + for d_i in eachindex(df.date_time) + cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( db, "Resource", "some_vector1", Float64; - date_time = date_time, + date_time = DateTime(df.date_time[d_i]), ) - for k in 1:500 - @test cached_data_new[k] == some_vector1_check[k] - end + _test_cache(cached_1, some_vector1_answer[d_i]) - cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( db, "Resource", "some_vector2", Float64; - date_time = date_time, + date_time = DateTime(df.date_time[d_i]), ) - for k in 1:500 - @test cached_data_new[k] == some_vector2_check[k] - end + _test_cache(cached_2, some_vector2_answer[d_i]) - cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + cached_3 = PSRDatabaseSQLite.read_mapped_timeseries( db, "Resource", "some_vector3", Float64; - date_time = date_time, + date_time = DateTime(df.date_time[d_i]), ) + _test_cache(cached_3, some_vector3_answer[d_i]) - for k in 1:500 - @test cached_data_new[k] == some_vector3_check[k] - end - - cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + cached_4 = PSRDatabaseSQLite.read_mapped_timeseries( db, "Resource", "some_vector4", Float64; - date_time = date_time, + date_time = DateTime(df.date_time[d_i]), ) + _test_cache(cached_4, some_vector4_answer[d_i]) - for k in 1:500 - @test isnan(cached_data_new[k]) - end + cached_5 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector5", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_5, some_vector5_answer[d_i]) + + cached_6 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector6", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_6, some_vector6_answer[d_i]) end - PSRDatabaseSQLite.close!(db) - return rm(db_path) -end + # test for dates in reverse sequence + for d_i in reverse(eachindex(df.date_time)) + cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_1, some_vector1_answer[d_i]) -function test_time_controller_missing() - path_schema = joinpath(@__DIR__, "test_time_controller.sql") - db_path = joinpath(@__DIR__, "test_time_controller_missing.sqlite") - GC.gc() - GC.gc() - if isfile(db_path) - rm(db_path) - end + cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector2", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_2, some_vector2_answer[d_i]) - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do - df_timeseries_group1 = DataFrame(; - date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), - some_vector1 = vcat([missing], [3.0, 2.0, 1.0]), + cached_3 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector3", + Float64; + date_time = DateTime(df.date_time[d_i]), ) - PSRDatabaseSQLite.create_element!( + _test_cache(cached_3, some_vector3_answer[d_i]) + + cached_4 = PSRDatabaseSQLite.read_mapped_timeseries( db, - "Resource"; - label = "Resource 1", - group1 = df_timeseries_group1, + "Resource", + "some_vector4", + Float64; + date_time = DateTime(df.date_time[d_i]), ) + _test_cache(cached_4, some_vector4_answer[d_i]) - df_timeseries_group1 = DataFrame(; - date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), - some_vector1 = vcat([missing], [3.0, missing, 1.0]), + cached_5 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector5", + Float64; + date_time = DateTime(df.date_time[d_i]), ) - PSRDatabaseSQLite.create_element!( + _test_cache(cached_5, some_vector5_answer[d_i]) + + cached_6 = PSRDatabaseSQLite.read_mapped_timeseries( db, - "Resource"; - label = "Resource 2", - group1 = df_timeseries_group1, + "Resource", + "some_vector6", + Float64; + date_time = DateTime(df.date_time[d_i]), ) + _test_cache(cached_6, some_vector6_answer[d_i]) + end - df_timeseries_group1 = DataFrame(; - date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2002)]), - some_vector1 = vcat([missing], [1.0, 3.0]), + # test for dates in random sequence + for d_i in [2, 1, 3] + cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(df.date_time[d_i]), ) - PSRDatabaseSQLite.create_element!( + _test_cache(cached_1, some_vector1_answer[d_i]) + + cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( db, - "Resource"; - label = "Resource 3", - group1 = df_timeseries_group1, + "Resource", + "some_vector2", + Float64; + date_time = DateTime(df.date_time[d_i]), ) + _test_cache(cached_2, some_vector2_answer[d_i]) - df_timeseries_group1 = DataFrame(; - date_time = vcat([DateTime(0)], [DateTime(2000), DateTime(2001), DateTime(2002)]), - some_vector1 = [missing for i in 1:4], + cached_3 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector3", + Float64; + date_time = DateTime(df.date_time[d_i]), ) - return PSRDatabaseSQLite.create_element!( + _test_cache(cached_3, some_vector3_answer[d_i]) + + cached_4 = PSRDatabaseSQLite.read_mapped_timeseries( db, - "Resource"; - label = "Resource 4", - group1 = df_timeseries_group1, + "Resource", + "some_vector4", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_4, some_vector4_answer[d_i]) + + cached_5 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector5", + Float64; + date_time = DateTime(df.date_time[d_i]), ) + _test_cache(cached_5, some_vector5_answer[d_i]) + + cached_6 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector6", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_6, some_vector6_answer[d_i]) end PSRDatabaseSQLite.close!(db) - db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + return rm(db_path) +end - cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector1", - Float64; - date_time = DateTime(2000), - ) - @test cached_data_new[1] == 3.0 - @test cached_data_new[2] == 3.0 - @test cached_data_new[3] == 1.0 - @test isnan(cached_data_new[4]) +function test_time_controller_read_more_agents() + path_schema = joinpath(@__DIR__, "test_time_controller.sql") + db_path = joinpath(@__DIR__, "test_time_controller_read_multiple.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end - cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + df = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + some_vector1 = [missing, 1.0, 2.0], + some_vector2 = [1.0, missing, 5.0], + ) + PSRDatabaseSQLite.create_element!( db, - "Resource", - "some_vector1", - Float64; - date_time = DateTime(2001), + "Resource"; + label = "Resource 1", + group1 = df, ) - @test cached_data_new[1] == 2.0 - @test cached_data_new[2] == 3.0 - @test cached_data_new[3] == 1.0 - @test isnan(cached_data_new[4]) - cached_data_new = PSRDatabaseSQLite.read_mapped_timeseries( + df2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + some_vector1 = [missing, 10.0, 20.0], + some_vector2 = [10.0, missing, 50.0], + ) + PSRDatabaseSQLite.create_element!( db, - "Resource", - "some_vector1", - Float64; - date_time = DateTime(2002), + "Resource"; + label = "Resource 2", + group1 = df2, ) - @test cached_data_new[1] == 1.0 - @test cached_data_new[2] == 1.0 - @test cached_data_new[3] == 3.0 - @test isnan(cached_data_new[4]) + + PSRDatabaseSQLite.close!(db) + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + some_vector1_answer = [[NaN, NaN], [1.0, 10.0], [2.0, 20.0]] + some_vector2_answer = [[1.0, 10.0], [1.0, 10.0], [5.0, 50.0]] + + # test for dates in correct sequence + for d_i in eachindex(df.date_time) + cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_1, some_vector1_answer[d_i]) + + cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector2", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_2, some_vector2_answer[d_i]) + end PSRDatabaseSQLite.close!(db) return rm(db_path) diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql index 4ec4c837..29fa916e 100644 --- a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql +++ b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql @@ -21,6 +21,8 @@ CREATE TABLE Resource_timeseries_group1 ( some_vector2 REAL, some_vector3 REAL, some_vector4 REAL, + some_vector5 REAL, + some_vector6 REAL, FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, PRIMARY KEY (id, date_time) ) STRICT; \ No newline at end of file From c178f910eafab274c24c857babcc450dd58f47c2 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Wed, 3 Jul 2024 12:30:57 -0300 Subject: [PATCH 17/34] add function to count elements in a table --- src/PSRDatabaseSQLite/read.jl | 12 ++++++++++++ src/PSRDatabaseSQLite/time_controller.jl | 3 +++ 2 files changed, 15 insertions(+) diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 100b3162..74465ae5 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -6,6 +6,18 @@ const READ_METHODS_BY_CLASS_OF_ATTRIBUTE = Dict( TimeSeriesFile => "read_time_series_file", ) +function number_of_elements(db::DatabaseSQLite, collection_id::String)::Int + query = "SELECT COUNT(*) FROM $collection_id" + result = DBInterface.execute(db.sqlite_db, query) + for row in result + return row[1] + end +end + +function _collection_has_any_data(db::DatabaseSQLite, collection_id::String)::Bool + return number_of_elements(db, collection_id) > 0 +end + function _get_id( db::DatabaseSQLite, collection_id::String, diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index 0c3c925e..3d620051 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -160,6 +160,9 @@ function read_mapped_timeseries( :read, ) @assert _is_read_only(db) "Time series mapping only works in read only databases" + if !(_collection_has_any_data(db, collection_id)) + return Vector{T}(undef, 0) + end collection_attribute = _collection_attribute(collection_id, attribute_id) attribute = _get_attribute(db, collection_id, attribute_id) if !haskey(db._time_controller.cache, collection_attribute) From b4813691c66c4051a822e66da6e3862880430f71 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 3 Jul 2024 14:39:32 -0300 Subject: [PATCH 18/34] Tests for empty cache --- .../test_time_controller.jl | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl index d27496ed..d47da1db 100644 --- a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl +++ b/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl @@ -298,6 +298,122 @@ function test_time_controller_read_more_agents() return rm(db_path) end +function test_time_controller_empty() + path_schema = joinpath(@__DIR__, "test_time_controller.sql") + db_path = joinpath(@__DIR__, "test_time_controller_read_empty.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end + + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + PSRDatabaseSQLite.close!(db) + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + empty_cache = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(2000), + ) + _test_cache(empty_cache, []) + + PSRDatabaseSQLite.close!(db) + return rm(db_path) +end + +function test_time_controller_filled_then_empty() + path_schema = joinpath(@__DIR__, "test_time_controller.sql") + db_path = joinpath(@__DIR__, "test_time_controller_read_filled_then_empty.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end + + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + df = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + some_vector1 = [missing, 1.0, 2.0], + some_vector2 = [1.0, missing, 5.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df, + ) + + df2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + some_vector1 = [missing, 10.0, 20.0], + some_vector2 = [10.0, missing, 50.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 2", + group1 = df2, + ) + + PSRDatabaseSQLite.close!(db) + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + some_vector1_answer = [[NaN, NaN], [1.0, 10.0], [2.0, 20.0]] + some_vector2_answer = [[1.0, 10.0], [1.0, 10.0], [5.0, 50.0]] + + # test for dates in correct sequence + for d_i in eachindex(df.date_time) + cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_1, some_vector1_answer[d_i]) + + cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector2", + Float64; + date_time = DateTime(df.date_time[d_i]), + ) + _test_cache(cached_2, some_vector2_answer[d_i]) + end + + PSRDatabaseSQLite.close!(db) + + db = PSRDatabaseSQLite.load_db(db_path; read_only = false) + + PSRDatabaseSQLite.delete_element!(db, "Resource", "Resource 1") + PSRDatabaseSQLite.delete_element!(db, "Resource", "Resource 2") + + PSRDatabaseSQLite.close!(db) + + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + empty_cache = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(2000), + ) + _test_cache(empty_cache, []) + + PSRDatabaseSQLite.close!(db) + + return rm(db_path) +end + function runtests() Base.GC.gc() Base.GC.gc() From bd6ffe55ff684ad36db6782c89233490e77c47cb Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 3 Jul 2024 18:30:43 -0300 Subject: [PATCH 19/34] Add docs --- docs/make.jl | 1 + docs/src/psrdatabasesqlite/time_controller.md | 193 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 docs/src/psrdatabasesqlite/time_controller.md diff --git a/docs/make.jl b/docs/make.jl index 93577b7c..f7a5a0e3 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -23,6 +23,7 @@ makedocs(; "PSRDatabaseSQLite Overview" => String[ "psrdatabasesqlite/introduction.md", "psrdatabasesqlite/rules.md", + "psrdatabasesqlite/time_controller.md", ], "OpenStudy and OpenBinary Examples" => String[ "examples/reading_parameters.md", diff --git a/docs/src/psrdatabasesqlite/time_controller.md b/docs/src/psrdatabasesqlite/time_controller.md new file mode 100644 index 00000000..e33a9e48 --- /dev/null +++ b/docs/src/psrdatabasesqlite/time_controller.md @@ -0,0 +1,193 @@ +# Time Series + +It is possible to store time series data in the database. For that, there is a specific table format that must be followed. Consider the following example: + +```sql +CREATE TABLE Resource ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT UNIQUE NOT NULL +) STRICT; + +CREATE TABLE Resource_timeseries_group1 ( + id INTEGER, + date_time TEXT NOT NULL, + some_vector1 REAL, + some_vector2 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time) +) STRICT; +``` + +It is mandatory for a time series to be indexed by a `date_time` column with the following format: `YYYY-MM-DD HH:MM:SS`. You can use the `Dates.jl` package for handling this format. + +```julia +using Dates +date = DateTime(2024, 3, 1) # 2024-03-01T00:00:00 (March 1st, 2024) +``` + +Notice that in this example, there are two value columns `some_vector1` and `some_vector2`. You can have as many value columns as you want. You can also separate the time series data into different tables, by creating a table `Resource_timeseries_group2` for example. + +It is also possible to add more dimensions to your time series, such as `block` and `scenario`. + +```sql +CREATE TABLE Resource_timeseries_group2 ( + id INTEGER, + date_time TEXT NOT NULL, + block INTEGER NOT NULL, + some_vector3 REAL, + some_vector4 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time, block) +) STRICT; +``` + +## Rules + +Time series in `PSRDatabaseSQLite` are very flexible. You can have missing values, and you can have sparse data. + +If you are querying for a time series entry that has a missing value, it first checks if there is a data with a `date_time` earlier than the queried `date_time`. If there is, it returns the value of the previous data. If there is no data earlier than the queried `date_time`, it returns a specified value according to the type of data you are querying. + +- For `Float64`, it returns `NaN`. +- For `Int64`, it returns `typemin(Int)`. +- For `String`, it returns `""` (empty String). +- For `DateTime`, it returns `typemin(DateTime)`. + +For example, if you have the following data: + +| **Date** | **some_vector1(Float64)** | **some_vector2(Float64)** | +|:--------:|:-----------:|:-----------:| +| 2020 | 1.0 | missing | +| 2021 | missing | 1.0 | +| 2022 | 3.0 | missing | + +1. If you query for `some_vector1` at `2020`, it returns `1.0`. +2. If you query for `some_vector2` at `2020`, it returns `NaN`. +3. If you query for `some_vector1` at `2021`, it returns `1.0`. +4. If you query for `some_vector2` at `2021`, it returns `1.0`. +5. If you query for `some_vector1` at `2022`, it returns `3.0`. +6. If you query for `some_vector2` at `2022`, it returns `1.0`. + + +## Inserting data + +When creating a new element that has a time series, you can pass this information via a `DataFrame`. Consider the collection `Resource` with the two time series tables `Resource_timeseries_group1` and `Resource_timeseries_group2`. + +```julia +using DataFrames +using Dates +using PSRClassesInterface +PSRDatabaseSQLite = PSRClassesInterface.PSRDatabaseSQLite + +db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + +PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + +df_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], + some_vector1 = [missing, 1.0, 2.0], + some_vector2 = [1.0, missing, 5.0], + ) + +df_group2 = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + some_vector3 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], + some_vector4 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], + ) + + +PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_group1, + group2 = df_group2, +) +``` + +## Reading data + +You can read the information from the time series in different ways. +First, you can read the information as a `DataFrame`. This dataframe can be filtered according to the desired dimension values. It can be also specific for an element or for all elements. In the last case, an array of dataframes is returned. + +### Filtering by element and `date_time` dimension +```julia +df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource 1"; + date_time = DateTime(2001), + ) +``` + +### No filtering by `date_time` +```julia +df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector1", + "Resource 1" + ) +``` + +### Filtering by `block` and `date_time` for an element +```julia +df = PSRDatabaseSQLite.read_time_series_df( + db, + "Resource", + "some_vector3", + "Resource 1"; + date_time = DateTime(2000), + block = 1, + ) +``` + +### No filter and returing all elements +```julia +dfs = PSRDatabaseSQLite.read_time_series_dfs( + db, + "Resource", + "some_vector1" + ) +``` + + +## Reading data via a `TimeController` + +Reading time series data from the database can lead to performance issues when the time series is too large. To avoid this, you can use the `TimeController` to cache the previous and next non-missing values, according to the dimensions you are indexing the data. + +The `TimeController` is initialized automatically. You just need to use a different function when reading the data and always pass the dimensions values that you want. + +Also, the returned data for this type of function is a vector containing the values for all elements that contain the time series, for the queried dimensions. + +For example, consider the following table for `some_vector1`: + +| **Date** | **Resource 1** | **Resource 2** | +|:--------:|:-----------:|:-----------:| +| 2020 | 1.0 | missing | +| 2021 | missing | 1.0 | +| 2022 | 3.0 | missing | + +If you query the following: + +```julia +values = PSRDatabaseSQLite.read_mapped_timeseries( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(2020) +) +``` + +It will return `[1.0, NaN]`. \ No newline at end of file From e072cd799615f391810dcb9bfac6e5e7537f72e6 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Thu, 4 Jul 2024 15:18:05 -0300 Subject: [PATCH 20/34] Change table names --- docs/src/psrdatabasesqlite/rules.md | 4 +- docs/src/psrdatabasesqlite/time_controller.md | 18 +- profiling/create_profile.jl | 1 - profiling/open_profile.jl | 2 +- src/PSRDatabaseSQLite/collection.jl | 46 +- src/PSRDatabaseSQLite/create.jl | 20 +- src/PSRDatabaseSQLite/database_sqlite.jl | 23 +- src/PSRDatabaseSQLite/read.jl | 164 +++---- src/PSRDatabaseSQLite/time_controller.jl | 55 +-- src/PSRDatabaseSQLite/update.jl | 2 +- src/PSRDatabaseSQLite/utils.jl | 2 +- src/PSRDatabaseSQLite/validate.jl | 29 +- .../test_create/test_create.jl | 16 +- .../test_create_parameters_and_vectors.sql | 2 +- .../test_create/test_create_time_series.sql | 8 +- .../test_psri_study_interface/toy_schema.sql | 2 +- test/PSRDatabaseSQLite/test_read/test_read.jl | 453 ------------------ .../PSRDatabaseSQLite/test_read/test_read.sql | 2 +- .../test_read_time_series.sql | 8 +- .../test_time_controller.sql | 2 +- .../test_time_series.jl} | 297 ++++++++---- .../test_create_time_series_files.sql | 2 +- .../test_update/test_update_time_series.sql | 4 +- .../test_valid_database.sql | 2 +- 24 files changed, 376 insertions(+), 788 deletions(-) rename test/PSRDatabaseSQLite/{test_read => test_time_series}/test_read_time_series.sql (89%) rename test/PSRDatabaseSQLite/{test_time_controller => test_time_series}/test_time_controller.sql (94%) rename test/PSRDatabaseSQLite/{test_time_controller/test_time_controller.jl => test_time_series/test_time_series.jl} (53%) diff --git a/docs/src/psrdatabasesqlite/rules.md b/docs/src/psrdatabasesqlite/rules.md index 3c241b4e..80086873 100644 --- a/docs/src/psrdatabasesqlite/rules.md +++ b/docs/src/psrdatabasesqlite/rules.md @@ -135,7 +135,7 @@ CREATE TABLE HydroPlant_vector_GaugingStation( ### Time Series - All Time Series for the elements from a Collection should be stored in a Table -- The Table name should be the same as the name of the Collection followed by `_timeseriesfiles`, as presented below +- The Table name should be the same as the name of the Collection followed by `_time_series_files`, as presented below

COLLECTION_vector_ATTRIBUTE

@@ -145,7 +145,7 @@ CREATE TABLE HydroPlant_vector_GaugingStation( Example: ```sql -CREATE TABLE Plant_timeseriesfiles ( +CREATE TABLE Plant_time_series_files ( generation TEXT, cost TEXT ) STRICT; diff --git a/docs/src/psrdatabasesqlite/time_controller.md b/docs/src/psrdatabasesqlite/time_controller.md index e33a9e48..f7a33bb7 100644 --- a/docs/src/psrdatabasesqlite/time_controller.md +++ b/docs/src/psrdatabasesqlite/time_controller.md @@ -8,7 +8,7 @@ CREATE TABLE Resource ( label TEXT UNIQUE NOT NULL ) STRICT; -CREATE TABLE Resource_timeseries_group1 ( +CREATE TABLE Resource_time_series_group1 ( id INTEGER, date_time TEXT NOT NULL, some_vector1 REAL, @@ -25,12 +25,12 @@ using Dates date = DateTime(2024, 3, 1) # 2024-03-01T00:00:00 (March 1st, 2024) ``` -Notice that in this example, there are two value columns `some_vector1` and `some_vector2`. You can have as many value columns as you want. You can also separate the time series data into different tables, by creating a table `Resource_timeseries_group2` for example. +Notice that in this example, there are two value columns `some_vector1` and `some_vector2`. You can have as many value columns as you want. You can also separate the time series data into different tables, by creating a table `Resource_time_series_group2` for example. It is also possible to add more dimensions to your time series, such as `block` and `scenario`. ```sql -CREATE TABLE Resource_timeseries_group2 ( +CREATE TABLE Resource_time_series_group2 ( id INTEGER, date_time TEXT NOT NULL, block INTEGER NOT NULL, @@ -70,7 +70,7 @@ For example, if you have the following data: ## Inserting data -When creating a new element that has a time series, you can pass this information via a `DataFrame`. Consider the collection `Resource` with the two time series tables `Resource_timeseries_group1` and `Resource_timeseries_group2`. +When creating a new element that has a time series, you can pass this information via a `DataFrame`. Consider the collection `Resource` with the two time series tables `Resource_time_series_group1` and `Resource_time_series_group2`. ```julia using DataFrames @@ -121,7 +121,7 @@ First, you can read the information as a `DataFrame`. This dataframe can be filt ### Filtering by element and `date_time` dimension ```julia -df = PSRDatabaseSQLite.read_time_series_df( +df = PSRDatabaseSQLite.read_time_series_table( db, "Resource", "some_vector1", @@ -132,7 +132,7 @@ df = PSRDatabaseSQLite.read_time_series_df( ### No filtering by `date_time` ```julia -df = PSRDatabaseSQLite.read_time_series_df( +df = PSRDatabaseSQLite.read_time_series_table( db, "Resource", "some_vector1", @@ -142,7 +142,7 @@ df = PSRDatabaseSQLite.read_time_series_df( ### Filtering by `block` and `date_time` for an element ```julia -df = PSRDatabaseSQLite.read_time_series_df( +df = PSRDatabaseSQLite.read_time_series_table( db, "Resource", "some_vector3", @@ -154,7 +154,7 @@ df = PSRDatabaseSQLite.read_time_series_df( ### No filter and returing all elements ```julia -dfs = PSRDatabaseSQLite.read_time_series_dfs( +dfs = PSRDatabaseSQLite.read_time_series_tables( db, "Resource", "some_vector1" @@ -181,7 +181,7 @@ For example, consider the following table for `some_vector1`: If you query the following: ```julia -values = PSRDatabaseSQLite.read_mapped_timeseries( +values = PSRDatabaseSQLite.read_time_series_row( db, "Resource", "some_vector1", diff --git a/profiling/create_profile.jl b/profiling/create_profile.jl index 2142a68e..782c6406 100644 --- a/profiling/create_profile.jl +++ b/profiling/create_profile.jl @@ -7,7 +7,6 @@ root_path = dirname(@__DIR__) Pkg.activate(root_path) using PSRClassesInterface - include("../script_time_controller.jl") @profile include("../script_time_controller.jl") pprof() diff --git a/profiling/open_profile.jl b/profiling/open_profile.jl index 5fcff148..7199bfd7 100644 --- a/profiling/open_profile.jl +++ b/profiling/open_profile.jl @@ -5,4 +5,4 @@ using PProf file_name = "profile.pb.gz" -PProf.refresh(file=file_name, webport = 57998) +PProf.refresh(; file = file_name, webport = 57998) diff --git a/src/PSRDatabaseSQLite/collection.jl b/src/PSRDatabaseSQLite/collection.jl index fbea77d3..5f092f6d 100644 --- a/src/PSRDatabaseSQLite/collection.jl +++ b/src/PSRDatabaseSQLite/collection.jl @@ -240,14 +240,14 @@ function _create_collection_vector_relations(db::SQLite.DB, collection_id::Strin return vector_relations end -function _get_timeseries_dimension_names(df_table_infos::DataFrame) +function _get_time_series_dimension_names(df_table_infos::DataFrame) dimension_names = Vector{String}(undef, 0) - for timeseries_attribute in eachrow(df_table_infos) - if timeseries_attribute.name == "id" + for time_series_attribute in eachrow(df_table_infos) + if time_series_attribute.name == "id" continue end - if timeseries_attribute.pk != 0 - push!(dimension_names, timeseries_attribute.name) + if time_series_attribute.pk != 0 + push!(dimension_names, time_series_attribute.name) end end return dimension_names @@ -258,39 +258,39 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) time_series = OrderedDict{String, TimeSeries}() parent_collection = collection_id for table_name in time_series_tables - group_id = _id_of_timeseries_group(table_name) + group_id = _id_of_time_series_group(table_name) table_where_is_located = table_name df_table_infos = table_info(db, table_name) - dimension_names = _get_timeseries_dimension_names(df_table_infos) - for timeseries_attribute in eachrow(df_table_infos) - id = timeseries_attribute.name + dimension_names = _get_time_series_dimension_names(df_table_infos) + for time_series_attribute in eachrow(df_table_infos) + id = time_series_attribute.name if id == "id" || id == "date_time" # These are obligatory for every vector table # and have no point in being stored in the database definition. - if timeseries_attribute.pk == 0 + if time_series_attribute.pk == 0 psr_database_sqlite_error( - "Invalid table \"$(table_name)\" of timeseries attributes of collection \"$(collection_id)\". " * - "The column \"$(timeseries_attribute.name)\" is not a primary key but it should.", + "Invalid table \"$(table_name)\" of time_series attributes of collection \"$(collection_id)\". " * + "The column \"$(time_series_attribute.name)\" is not a primary key but it should.", ) end continue end # There is no point in storing the other primary keys of these tables - if timeseries_attribute.pk != 0 - if _sql_type_to_julia_type(id, timeseries_attribute.type) != Int64 + if time_series_attribute.pk != 0 + if _sql_type_to_julia_type(id, time_series_attribute.type) != Int64 psr_database_sqlite_error( - "Invalid table \"$(table_name)\" of timeseries attributes of collection \"$(collection_id)\". " * - "The column \"$(timeseries_attribute.name)\" is not an integer primary key but it should.", + "Invalid table \"$(table_name)\" of time_series attributes of collection \"$(collection_id)\". " * + "The column \"$(time_series_attribute.name)\" is not an integer primary key but it should.", ) end continue end - type = _sql_type_to_julia_type(id, timeseries_attribute.type) - default_value = _get_default_value(type, timeseries_attribute.dflt_value) - not_null = Bool(timeseries_attribute.notnull) + type = _sql_type_to_julia_type(id, time_series_attribute.type) + default_value = _get_default_value(type, time_series_attribute.dflt_value) + not_null = Bool(time_series_attribute.notnull) if haskey(time_series, id) psr_database_sqlite_error( - "Duplicated timeseries attribute \"$id\" in collection \"$collection_id\"", + "Duplicated time_series attribute \"$id\" in collection \"$collection_id\"", ) end time_series[id] = TimeSeries( @@ -404,8 +404,8 @@ function _id_of_vector_group(table_name::String) return string(matches.captures[1]) end -function _id_of_timeseries_group(table_name::String) - matches = match(r"_timeseries_(.*)", table_name) +function _id_of_time_series_group(table_name::String) + matches = match(r"_time_series_(.*)", table_name) return string(matches.captures[1]) end @@ -425,7 +425,7 @@ function _get_collection_time_series_tables( end function _get_collection_time_series_files_tables(::SQLite.DB, collection_id::String) - return string(collection_id, "_timeseriesfiles") + return string(collection_id, "_time_series_files") end function _validate_actions_on_foreign_key( diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 7672f434..6345c09e 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -117,20 +117,20 @@ function _create_time_series!( db::DatabaseSQLite, collection_id::String, id::Integer, - dict_timeseries_attributes, + dict_time_series_attributes, ) - for (group, df) in dict_timeseries_attributes - timeseries_group_table_name = _timeseries_group_table_name(collection_id, string(group)) + for (group, df) in dict_time_series_attributes + time_series_group_table_name = _time_series_group_table_name(collection_id, string(group)) ids = fill(id, nrow(df)) DataFrames.insertcols!(df, 1, :id => ids) # Convert datetime column to string df[!, :date_time] = string.(df[!, :date_time]) # Add missing columns - missing_names_in_df = setdiff(_attributes_in_timeseries_group(db, collection_id, string(group)), string.(names(df))) + missing_names_in_df = setdiff(_attributes_in_time_series_group(db, collection_id, string(group)), string.(names(df))) for missing_attribute in missing_names_in_df df[!, Symbol(missing_attribute)] = fill(missing, nrow(df)) end - _insert_vectors_from_df(db, df, timeseries_group_table_name) + _insert_vectors_from_df(db, df, time_series_group_table_name) end end @@ -142,7 +142,7 @@ function _create_element!( _throw_if_collection_does_not_exist(db, collection_id) dict_scalar_attributes = Dict{Symbol, Any}() dict_vector_attributes = Dict{Symbol, Any}() - dict_timeseries_attributes = Dict{Symbol, Any}() + dict_time_series_attributes = Dict{Symbol, Any}() # Validate that the arguments will be valid for (key, value) in kwargs @@ -155,8 +155,8 @@ function _create_element!( end dict_vector_attributes[key] = value elseif isa(value, DataFrame) - _throw_if_not_timeseries_group(db, collection_id, string(key)) - dict_timeseries_attributes[key] = value + _throw_if_not_time_series_group(db, collection_id, string(key)) + dict_time_series_attributes[key] = value else _throw_if_is_time_series_file(db, collection_id, string(key)) _throw_if_not_scalar_attribute(db, collection_id, string(key)) @@ -182,13 +182,13 @@ function _create_element!( _create_vectors!(db, collection_id, id, dict_vector_attributes) end - if !isempty(dict_timeseries_attributes) + if !isempty(dict_time_series_attributes) id = get( dict_scalar_attributes, :id, _get_id(db, collection_id, dict_scalar_attributes[:label]), ) - _create_time_series!(db, collection_id, id, dict_timeseries_attributes) + _create_time_series!(db, collection_id, id, dict_time_series_attributes) end return nothing diff --git a/src/PSRDatabaseSQLite/database_sqlite.jl b/src/PSRDatabaseSQLite/database_sqlite.jl index 9587b74d..03da7185 100644 --- a/src/PSRDatabaseSQLite/database_sqlite.jl +++ b/src/PSRDatabaseSQLite/database_sqlite.jl @@ -3,7 +3,7 @@ Base.@kwdef mutable struct DatabaseSQLite collections_map::OrderedDict{String, Collection} read_only::Bool = false # TimeController is a cache that allows PSRDatabaseSQLite to - # store information about the last timeseries query. This is useful for avoiding to + # store information about the last time_series query. This is useful for avoiding to # re-query the database when the same query is made multiple times. # The TimeController is a private behaviour and whenever it is used # it changes the database mode to read-only. @@ -98,8 +98,7 @@ function DatabaseSQLite( read_only::Bool = false, ) sqlite_db = - # read_only ? SQLite.DB("file:" * database_path * "?mode=ro&immutable=1") : - SQLite.DB(database_path) + read_only ? SQLite.DB("file:" * database_path * "?mode=ro&immutable=1") : SQLite.DB(database_path) _set_default_pragmas!(sqlite_db) @@ -114,7 +113,7 @@ function DatabaseSQLite( db = DatabaseSQLite(; sqlite_db, collections_map, - read_only + read_only, ) return db end @@ -164,7 +163,7 @@ function _is_time_series( return haskey(collection.time_series, attribute_id) end -function _is_timeseries_group( +function _is_time_series_group( db::DatabaseSQLite, collection_id::String, group_id::String, @@ -298,27 +297,27 @@ function _map_of_groups_to_vector_attributes( return map_of_groups_to_vector_attributes end -function _attributes_in_timeseries_group( +function _attributes_in_time_series_group( db::DatabaseSQLite, collection_id::String, group_id::String, ) collection = _get_collection(db, collection_id) - attributes_in_timeseries_group = Vector{String}(undef, 0) + attributes_in_time_series_group = Vector{String}(undef, 0) for (_, attribute) in collection.time_series if attribute.group_id == group_id - push!(attributes_in_timeseries_group, attribute.id) + push!(attributes_in_time_series_group, attribute.id) end end - return attributes_in_timeseries_group + return attributes_in_time_series_group end function _vectors_group_table_name(collection_id::String, group::String) return string(collection_id, "_vector_", group) end -function _timeseries_group_table_name(collection_id::String, group::String) - return string(collection_id, "_timeseries_", group) +function _time_series_group_table_name(collection_id::String, group::String) + return string(collection_id, "_time_series_", group) end function _is_collection_id(name::String) @@ -331,7 +330,7 @@ function _is_collection_vector_table_name(name::String, collection_id::String) end function _is_collection_time_series_table_name(name::String, collection_id::String) - return startswith(name, "$(collection_id)_timeseries_") + return startswith(name, "$(collection_id)_time_series_") && !endswith(name, "_time_series_files") end _get_collection_ids(db::DatabaseSQLite) = collect(keys(db.collections_map)) diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 74465ae5..6473a3c9 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -156,59 +156,8 @@ function _query_vector( return results end -function read_time_series_dfs( - db::DatabaseSQLite, - collection_id::String, - attribute_id::String; - read_exact_date::Bool = false, - dimensions..., -) - _throw_if_attribute_is_not_time_series( - db, - collection_id, - attribute_id, - :read, - ) - attribute = _get_attribute(db, collection_id, attribute_id) - ids_in_table = read_scalar_parameters(db, collection_id, "id") - - results = DataFrame[] - for id in ids_in_table - push!(results, _read_time_series_df(db, collection_id, attribute, id; read_exact_date, dimensions...)) - end - - return results -end - -function read_time_series_df( - db::DatabaseSQLite, - collection_id::String, - attribute_id::String, - label::String; - read_exact_date::Bool = false, - dimensions..., -) - _throw_if_attribute_is_not_time_series( - db, - collection_id, - attribute_id, - :read, - ) - attribute = _get_attribute(db, collection_id, attribute_id) - id = _get_id(db, collection_id, label) - - return _read_time_series_df( - db, - collection_id, - attribute, - id; - read_exact_date, - dimensions..., - ) -end - function end_date_query(db::DatabaseSQLite, attribute::Attribute) - # First checks if the date or dimension value is within the range of the data. + # First checks if the date or dimension value is within the range of the data. # Then it queries the closest date before the provided date. # If there is no date query the data with date 0 (which will probably return no data.) end_date_query = "SELECT MAX(DATE(date_time)) FROM $(attribute.table_where_is_located)" @@ -228,49 +177,6 @@ function closest_date_query(db::DatabaseSQLite, attribute::Attribute, dim_value: return DateTime(closest_date[!, 1][1]) end -function _read_time_series_df( - db::DatabaseSQLite, - collection_id::String, - attribute::Attribute, - id::Int; - read_exact_date::Bool = false, - dimensions..., -) - _validate_time_series_dimensions(collection_id, attribute, dimensions) - - query = string("SELECT ", join(attribute.dimension_names, ",", ", "), ", ", attribute.id) - query *= " FROM $(attribute.table_where_is_located) WHERE id = '$id'" - if !isempty(dimensions) - query *= " AND " - i = 0 - for (dim_name, dim_value) in dimensions - if dim_name == :date_time - if read_exact_date - query *= "DATE($dim_name) = DATE('$(dim_value)')" - else - end_date = end_date_query(db, attribute) - closest_date = closest_date_query(db, attribute, dim_value) - date_to_equal_in_query = if dim_value > end_date - DateTime(0) - else - closest_date - end - # query the closest date and make it equal to the provided date. - query *= "DATE($dim_name) = DATE('$(date_to_equal_in_query)')" - end - else - query *= "$(dim_name) = '$dim_value'" - end - i += 1 - if i < length(dimensions) - query *= " AND " - end - end - end - - return DBInterface.execute(db.sqlite_db, query) |> DataFrame -end - """ TODO """ @@ -465,6 +371,74 @@ function read_time_series_file( end end +function read_time_series_row( + db, + collection_id::String, + attribute_id::String; + date_time::DateTime, +) + _throw_if_attribute_is_not_time_series( + db, + collection_id, + attribute_id, + :read, + ) + @assert _is_read_only(db) "Time series mapping only works in read only databases" + + collection_attribute = _collection_attribute(collection_id, attribute_id) + attribute = _get_attribute(db, collection_id, attribute_id) + + T = attribute.type + + if !(_collection_has_any_data(db, collection_id)) + return Vector{T}(undef, 0) + end + if !haskey(db._time_controller.cache, collection_attribute) + db._time_controller.cache[collection_attribute] = _start_time_controller_cache(db, attribute, date_time, T) + end + cache = db._time_controller.cache[collection_attribute] + # If we don`t need to update anything we just return the data + if _no_need_to_query_any_id(cache, date_time) + cache.last_date_requested = date_time + return cache.data + end + # If we need to update the cache we update the dates and the data + _update_time_controller_cache!(cache, db, attribute, date_time) + return cache.data +end + +function _read_time_series_table( + db::DatabaseSQLite, + attribute::Attribute, + id::Int, +) + query = string("SELECT ", join(attribute.dimension_names, ",", ", "), ", ", attribute.id) + query *= " FROM $(attribute.table_where_is_located) WHERE id = '$id'" + return DBInterface.execute(db.sqlite_db, query) |> DataFrame +end + +function read_time_series_table( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + label::String, +) + _throw_if_attribute_is_not_time_series( + db, + collection_id, + attribute_id, + :read, + ) + attribute = _get_attribute(db, collection_id, attribute_id) + id = _get_id(db, collection_id, label) + + return _read_time_series_table( + db, + attribute, + id, + ) +end + function _treat_query_result( query_results::Vector{Missing}, attribute::Attribute, diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index 3d620051..ef5acd6a 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -41,13 +41,13 @@ function _update_time_controller_cache!( cache::TimeControllerCache, db, attribute::Attribute, - date_time::DateTime + date_time::DateTime, ) - _update_time_controller_cache_dates!(cache, db, attribute, date_time) for (i, id) in enumerate(cache._collection_ids) - cache.data[i] = _request_time_series_data_for_time_controller_cache(db, attribute, id, cache.closest_previous_date_with_data[i], eltype(cache.data)) + cache.data[i] = + _request_time_series_data_for_time_controller_cache(db, attribute, id, cache.closest_previous_date_with_data[i]) end return nothing @@ -58,14 +58,16 @@ function _request_time_series_data_for_time_controller_cache( attribute::Attribute, id::Int, date_time::DateTime, - ::Type{T} -) where T +) query = """ SELECT $(attribute.id) FROM $(attribute.table_where_is_located) WHERE id = $id AND DATETIME(date_time) = DATETIME('$date_time') """ result = DBInterface.execute(db.sqlite_db, query) + + T = attribute.type + for row in result return T(row[1]) end @@ -76,7 +78,7 @@ function _update_time_controller_cache_dates!( cache::TimeControllerCache, db, attribute::Attribute, - date_time::DateTime + date_time::DateTime, ) cache.last_date_requested = date_time query = """ @@ -113,7 +115,7 @@ end function _no_need_to_query_any_id( cache::TimeControllerCache, - date_time::DateTime + date_time::DateTime, )::Bool return cache._closest_global_previous_date_with_data <= date_time < cache._closest_global_next_date_with_data end @@ -122,8 +124,8 @@ function _start_time_controller_cache( db, attribute::Attribute, date_time::DateTime, - ::Type{T} -) where T + ::Type{T}, +) where {T} _collection_ids = read_scalar_parameters(db, attribute.parent_collection, "id") data = fill(_psrdatabasesqlite_null_value(T), length(_collection_ids)) closest_previous_date_with_data = fill(typemin(DateTime), length(_collection_ids)) @@ -138,43 +140,10 @@ function _start_time_controller_cache( closest_next_date_with_data, _closest_global_previous_date_with_data, _closest_global_next_date_with_data, - _collection_ids + _collection_ids, ) _update_time_controller_cache!(cache, db, attribute, date_time) return cache end - -function read_mapped_timeseries( - db, - collection_id::String, - attribute_id::String, - type::Type{T}; - date_time::DateTime -) where T - _throw_if_attribute_is_not_time_series( - db, - collection_id, - attribute_id, - :read, - ) - @assert _is_read_only(db) "Time series mapping only works in read only databases" - if !(_collection_has_any_data(db, collection_id)) - return Vector{T}(undef, 0) - end - collection_attribute = _collection_attribute(collection_id, attribute_id) - attribute = _get_attribute(db, collection_id, attribute_id) - if !haskey(db._time_controller.cache, collection_attribute) - db._time_controller.cache[collection_attribute] = _start_time_controller_cache(db, attribute, date_time, type) - end - cache = db._time_controller.cache[collection_attribute] - # If we don`t need to update anything we just return the data - if _no_need_to_query_any_id(cache, date_time) - cache.last_date_requested = date_time - return cache.data - end - # If we need to update the cache we update the dates and the data - _update_time_controller_cache!(cache, db, attribute, date_time) - return cache.data -end \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/update.jl b/src/PSRDatabaseSQLite/update.jl index e71740fa..5e8ba618 100644 --- a/src/PSRDatabaseSQLite/update.jl +++ b/src/PSRDatabaseSQLite/update.jl @@ -272,7 +272,7 @@ function set_time_series_file!( kwargs..., ) _throw_if_collection_does_not_exist(db, collection_id) - table_name = collection_id * "_timeseriesfiles" + table_name = collection_id * "_time_series_files" dict_time_series = Dict() for (key, value) in kwargs if !isa(value, AbstractString) diff --git a/src/PSRDatabaseSQLite/utils.jl b/src/PSRDatabaseSQLite/utils.jl index dcc3e0d8..5aeffe47 100644 --- a/src/PSRDatabaseSQLite/utils.jl +++ b/src/PSRDatabaseSQLite/utils.jl @@ -75,7 +75,7 @@ end function load_db(database_path::String; read_only::Bool = false) db = try DatabaseSQLite( - database_path, + database_path; read_only = read_only, ) catch e diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index ac2ac566..07cafc12 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -1,6 +1,6 @@ # just for reference this are the main regexes # the functions not commented implement combinations of them -# with other reserved words such as vector, relation and timeseries. +# with other reserved words such as vector, relation and time_series. # _regex_table_name() = Regex("(?:[A-Z][a-z]*)+") # _regex_column_name() = Regex("[a-z][a-z0-9]*(?:_{1}[a-z0-9]+)*") @@ -21,13 +21,12 @@ _is_valid_table_vector_name(table::String) = _is_valid_time_series_name(table::String) = !isnothing( match( - r"^(?:[A-Z][a-z]*)+_timeseries_[a-z][a-z0-9]*(?:_{1}[a-z0-9]+)*$", + r"^(?:[A-Z][a-z]*)+_time_series_[a-z][a-z0-9]*(?:_{1}[a-z0-9]+)*$", table, ), ) -_is_valid_table_timeseriesfiles_name(table::String) = - !isnothing(match(r"^(?:[A-Z][a-z]*)+_timeseriesfiles", table)) +_is_valid_table_time_series_files_name(table::String) = !isnothing(match(r"^(?:[A-Z][a-z]*)+_time_series_files", table)) _is_valid_time_series_attribute_value(value::String) = !isnothing( @@ -75,23 +74,23 @@ function _validate_table(db::SQLite.DB, table::String) return num_errors end -function _validate_timeseries_table(db::SQLite.DB, table::String) +function _validate_time_series_table(db::SQLite.DB, table::String) attributes = column_names(db, table) num_errors = 0 if !("id" in attributes) - @error("Table $table is a timeseries table and does not have an \"id\" column.") + @error("Table $table is a time_series table and does not have an \"id\" column.") num_errors += 1 end if !("date_time" in attributes) @error( - "Table $table is a timeseries table and does not have an \"date_time\" column.", + "Table $table is a time_series table and does not have an \"date_time\" column.", ) num_errors += 1 end return num_errors end -function _validate_timeseriesfiles_table(db::SQLite.DB, table::String) +function _validate_time_series_files_table(db::SQLite.DB, table::String) attributes = column_names(db, table) num_errors = 0 if ("id" in attributes) @@ -147,10 +146,10 @@ function _validate_database(db::SQLite.DB) end if _is_valid_table_name(table) num_errors += _validate_table(db, table) - elseif _is_valid_table_timeseriesfiles_name(table) - num_errors += _validate_timeseriesfiles_table(db, table) + elseif _is_valid_table_time_series_files_name(table) + num_errors += _validate_time_series_files_table(db, table) elseif _is_valid_time_series_name(table) - num_errors += _validate_timeseries_table(db, table) + num_errors += _validate_time_series_table(db, table) elseif _is_valid_table_vector_name(table) num_errors += _validate_vector_table(db, table) else @@ -159,8 +158,8 @@ function _validate_database(db::SQLite.DB) Valid table name formats are: - Collections: NameOfCollection - Vector attributes: NameOfCollection_vector_group_id - - Time series: NameOfCollection_timeseries_group_id - - Time series files: NameOfCollection_timeseriesfiles + - Time series: NameOfCollection_time_series_group_id + - Time series files: NameOfCollection_time_series_files """) num_errors += 1 end @@ -345,12 +344,12 @@ function _throw_if_not_vector_attribute( return nothing end -function _throw_if_not_timeseries_group( +function _throw_if_not_time_series_group( db::DatabaseSQLite, collection::String, group::String, ) - if !_is_timeseries_group(db, collection, group) + if !_is_time_series_group(db, collection, group) psr_database_sqlite_error( "Group \"$group\" is not a time series group. ", ) diff --git a/test/PSRDatabaseSQLite/test_create/test_create.jl b/test/PSRDatabaseSQLite/test_create/test_create.jl index 77651c22..ee5b7f10 100644 --- a/test/PSRDatabaseSQLite/test_create/test_create.jl +++ b/test/PSRDatabaseSQLite/test_create/test_create.jl @@ -274,17 +274,17 @@ function test_create_time_series() PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) for i in 1:3 - df_timeseries_group1 = DataFrame(; + df_time_series_group1 = DataFrame(; date_time = [DateTime(2000), DateTime(2001)], some_vector1 = [1.0, 2.0] .* i, some_vector2 = [2.0, 3.0] .* i, ) - df_timeseries_group2 = DataFrame(; + df_time_series_group2 = DataFrame(; date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], block = [1, 2, 1, 2], some_vector3 = [1.0, missing, 3.0, 4.0] .* i, ) - df_timeseries_group3 = DataFrame(; + df_time_series_group3 = DataFrame(; date_time = [ DateTime(2000), DateTime(2000), @@ -304,13 +304,13 @@ function test_create_time_series() db, "Resource"; label = "Resource $i", - group1 = df_timeseries_group1, - group2 = df_timeseries_group2, - group3 = df_timeseries_group3, + group1 = df_time_series_group1, + group2 = df_time_series_group2, + group3 = df_time_series_group3, ) end - df_timeseries_group5 = DataFrame(; + df_time_series_group5 = DataFrame(; date_time = [DateTime(2000), DateTime(2001)], some_vector1 = [1.0, 2.0], some_vector2 = [2.0, 3.0], @@ -320,7 +320,7 @@ function test_create_time_series() db, "Resource"; label = "Resource 4", - group5 = df_timeseries_group5, + group5 = df_time_series_group5, ) PSRDatabaseSQLite.close!(db) diff --git a/test/PSRDatabaseSQLite/test_create/test_create_parameters_and_vectors.sql b/test/PSRDatabaseSQLite/test_create/test_create_parameters_and_vectors.sql index a80781b3..a0106f2c 100644 --- a/test/PSRDatabaseSQLite/test_create/test_create_parameters_and_vectors.sql +++ b/test/PSRDatabaseSQLite/test_create/test_create_parameters_and_vectors.sql @@ -81,7 +81,7 @@ CREATE TABLE Process_vector_outputs ( PRIMARY KEY (id, vector_index) ) STRICT; -CREATE TABLE Plant_timeseriesfiles ( +CREATE TABLE Plant_time_series_files ( generation TEXT, prices TEXT ) STRICT; \ No newline at end of file diff --git a/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql b/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql index 6fd20e5c..67aeb17d 100644 --- a/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql +++ b/test/PSRDatabaseSQLite/test_create/test_create_time_series.sql @@ -15,7 +15,7 @@ CREATE TABLE Resource ( type TEXT NOT NULL DEFAULT "D" ) STRICT; -CREATE TABLE Resource_timeseries_group1 ( +CREATE TABLE Resource_time_series_group1 ( id INTEGER, date_time TEXT NOT NULL, some_vector1 REAL, @@ -24,7 +24,7 @@ CREATE TABLE Resource_timeseries_group1 ( PRIMARY KEY (id, date_time) ) STRICT; -CREATE TABLE Resource_timeseries_group2 ( +CREATE TABLE Resource_time_series_group2 ( id INTEGER, date_time TEXT NOT NULL, block INTEGER NOT NULL, @@ -34,7 +34,7 @@ CREATE TABLE Resource_timeseries_group2 ( PRIMARY KEY (id, date_time, block) ) STRICT; -CREATE TABLE Resource_timeseries_group3 ( +CREATE TABLE Resource_time_series_group3 ( id INTEGER, date_time TEXT NOT NULL, block INTEGER NOT NULL, @@ -45,7 +45,7 @@ CREATE TABLE Resource_timeseries_group3 ( PRIMARY KEY (id, date_time, block, segment) ) STRICT; -CREATE TABLE Resource_timeseries_group4 ( +CREATE TABLE Resource_time_series_group4 ( id INTEGER, date_time TEXT NOT NULL, block INTEGER NOT NULL, diff --git a/test/PSRDatabaseSQLite/test_psri_study_interface/toy_schema.sql b/test/PSRDatabaseSQLite/test_psri_study_interface/toy_schema.sql index eaa9be29..a391c41b 100644 --- a/test/PSRDatabaseSQLite/test_psri_study_interface/toy_schema.sql +++ b/test/PSRDatabaseSQLite/test_psri_study_interface/toy_schema.sql @@ -59,7 +59,7 @@ CREATE TABLE Plant_vector_cost_relation ( PRIMARY KEY (id, vector_index) ) STRICT; -CREATE TABLE Plant_timeseriesfiles ( +CREATE TABLE Plant_time_series_files ( generation TEXT, cost TEXT ) STRICT; \ No newline at end of file diff --git a/test/PSRDatabaseSQLite/test_read/test_read.jl b/test/PSRDatabaseSQLite/test_read/test_read.jl index c68c30a9..58a9bb22 100644 --- a/test/PSRDatabaseSQLite/test_read/test_read.jl +++ b/test/PSRDatabaseSQLite/test_read/test_read.jl @@ -272,459 +272,6 @@ function test_read_time_series_files() return rm(db_path) end -function test_read_timeseries_single() - path_schema = joinpath(@__DIR__, "test_read_time_series.sql") - db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - - for i in 1:3 - df_timeseries_group1 = DataFrame(; - date_time = [DateTime(2000), DateTime(2001)], - some_vector1 = [1.0, 2.0] .* i, - some_vector2 = [2.0, 3.0] .* i, - ) - df_timeseries_group2 = DataFrame(; - date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], - block = [1, 2, 1, 2], - some_vector3 = [1.0, missing, 3.0, 4.0] .* i, - ) - df_timeseries_group3 = DataFrame(; - date_time = [ - DateTime(2000), - DateTime(2000), - DateTime(2000), - DateTime(2000), - DateTime(2001), - DateTime(2001), - DateTime(2001), - DateTime(2009), - ], - block = [1, 1, 1, 1, 2, 2, 2, 2], - segment = [1, 2, 3, 4, 1, 2, 3, 4], - some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource $i", - group1 = df_timeseries_group1, - group2 = df_timeseries_group2, - group3 = df_timeseries_group3, - ) - end - - # some errors - - df_empty = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector1", - "Resource 1"; - date_time = DateTime(1998), - ) - @test isempty(df_empty) - - df_empty = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector1", - "Resource 1"; - date_time = DateTime(2030), - ) - @test isempty(df_empty) - - df_empty = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource 1"; - date_time = DateTime(2030), - block = 20, - ) - @test isempty(df_empty) - - df_wrong_date = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource 1"; - date_time = DateTime(2003), - ) - @test df_wrong_date.date_time[1] == string(DateTime(2001)) - - # return single dataframe - - for i in 1:3 - df_timeseries_group1 = DataFrame(; - date_time = [DateTime(2000), DateTime(2001)], - some_vector1 = [1.0, 2.0] .* i, - some_vector2 = [2.0, 3.0] .* i, - ) - df_timeseries_group2 = DataFrame(; - date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], - block = [1, 2, 1, 2], - some_vector3 = [1.0, missing, 3.0, 4.0] .* i, - ) - df_timeseries_group3 = DataFrame(; - date_time = [ - DateTime(2000), - DateTime(2000), - DateTime(2000), - DateTime(2000), - DateTime(2001), - DateTime(2001), - DateTime(2001), - DateTime(2009), - ], - block = [1, 1, 1, 1, 2, 2, 2, 2], - segment = [1, 2, 3, 4, 1, 2, 3, 4], - some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - ) - - for row in eachrow(df_timeseries_group1) - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector1", - "Resource $i"; - row.date_time, - ) - @test df.date_time == string.([row.date_time]) - @test df.some_vector1 == [row.some_vector1] - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector2", - "Resource $i"; - row.date_time, - ) - @test df.date_time == string.([row.date_time]) - @test df.some_vector2 == [row.some_vector2] - end - - for row in eachrow(df_timeseries_group2) - - # single element query - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector3", - "Resource $i"; - row.date_time, - block = row.block, - ) - if ismissing(row.some_vector3) - @test ismissing(df.some_vector3[1]) - else - @test df.some_vector3 == [row.some_vector3] - end - @test df.block == [row.block] - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector4", - "Resource $i"; - row.date_time, - block = row.block, - ) - @test isempty(df.some_vector4) - - # two-element query - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector3", - "Resource $i"; - row.date_time, - ) - df_to_compare = df_timeseries_group2[ - (df_timeseries_group2.date_time.==row.date_time), :] - @test size(df, 1) == size(df_to_compare, 1) - for df_i in 1:size(df, 1) - if ismissing(df_to_compare.some_vector3[df_i]) - @test ismissing(df.some_vector3[df_i]) - else - @test df.some_vector3[df_i] == df_to_compare.some_vector3[df_i] - end - @test df.block[df_i] == df_to_compare.block[df_i] - end - - # all elements query - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector3", - "Resource $i"; - ) - for df_i in 1:size(df, 1) - if ismissing(df_timeseries_group2.some_vector3[df_i]) - @test ismissing(df.some_vector3[df_i]) - else - @test df.some_vector3[df_i] == df_timeseries_group2.some_vector3[df_i] - end - @test df.block[df_i] == df_timeseries_group2.block[df_i] - @test df.date_time[df_i] == string.(df_timeseries_group2.date_time[df_i]) - end - end - - for row in eachrow(df_timeseries_group3) - - # single element query - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource $i"; - row.date_time, - block = row.block, - segment = row.segment, - ) - @test df.date_time == string.([row.date_time]) - @test df.block == [row.block] - @test df.segment == [row.segment] - @test df.some_vector5 == [row.some_vector5] - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector6", - "Resource $i"; - row.date_time, - block = row.block, - segment = row.segment, - ) - @test df.date_time == string.([row.date_time]) - @test df.block == [row.block] - @test df.segment == [row.segment] - @test df.some_vector6 == [row.some_vector6] - - # two-element query - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource $i"; - row.date_time, - block = row.block, - ) - df_to_compare = df_timeseries_group3[ - (df_timeseries_group3.date_time.==row.date_time).&(df_timeseries_group3.block.==row.block), :] - @test size(df, 1) == size(df_to_compare, 1) - for df_i in 1:size(df, 1) - @test df.some_vector5[df_i] == df_to_compare.some_vector5[df_i] - @test df.block[df_i] == df_to_compare.block[df_i] - end - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource $i"; - row.date_time, - segment = row.segment, - ) - - df_to_compare = df_timeseries_group3[ - (df_timeseries_group3.date_time.==row.date_time).&(df_timeseries_group3.segment.==row.segment), :] - @test size(df, 1) == size(df_to_compare, 1) - for df_i in 1:size(df, 1) - @test df.some_vector5[df_i] == df_to_compare.some_vector5[df_i] - @test df.block[df_i] == df_to_compare.block[df_i] - end - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector6", - "Resource $i"; - row.date_time, - block = row.block, - segment = row.segment, - ) - - df_to_compare = df_timeseries_group3[ - (df_timeseries_group3.date_time.==row.date_time).&(df_timeseries_group3.block.==row.block).&(df_timeseries_group3.segment.==row.segment), - :] - @test size(df, 1) == size(df_to_compare, 1) - for df_i in 1:size(df, 1) - @test df.some_vector6[df_i] == df_to_compare.some_vector6[df_i] - @test df.block[df_i] == df_to_compare.block[df_i] - @test df.segment[df_i] == df_to_compare.segment[df_i] - @test df.date_time[df_i] == string.(df_to_compare.date_time[df_i]) - end - - # three-element query - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource $i"; - row.date_time, - ) - df_to_compare = df_timeseries_group3[ - (df_timeseries_group3.date_time.==row.date_time), :] - @test size(df, 1) == size(df_to_compare, 1) - for df_i in 1:size(df, 1) - @test df.some_vector5[df_i] == df_to_compare.some_vector5[df_i] - @test df.block[df_i] == df_to_compare.block[df_i] - @test df.segment[df_i] == df_to_compare.segment[df_i] - end - - # all elements query - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource $i"; - ) - for df_i in 1:size(df, 1) - @test df.some_vector5[df_i] == df_timeseries_group3.some_vector5[df_i] - @test df.block[df_i] == df_timeseries_group3.block[df_i] - @test df.segment[df_i] == df_timeseries_group3.segment[df_i] - @test df.date_time[df_i] == string.(df_timeseries_group3.date_time[df_i]) - end - end - end - - PSRDatabaseSQLite.close!(db) - GC.gc() - GC.gc() - # rm(db_path) - @test true - return nothing -end - -function test_read_timeseries_multiple() - path_schema = joinpath(@__DIR__, "test_read_time_series.sql") - db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - - for i in 1:3 - df_timeseries_group1 = DataFrame(; - date_time = [DateTime(2000), DateTime(2001)], - some_vector1 = [1.0, 2.0] .* i, - some_vector2 = [2.0, 3.0] .* i, - ) - df_timeseries_group2 = DataFrame(; - date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], - block = [1, 2, 1, 2], - some_vector3 = [1.0, missing, 3.0, 4.0] .* i, - ) - df_timeseries_group3 = DataFrame(; - date_time = [ - DateTime(2000), - DateTime(2000), - DateTime(2000), - DateTime(2000), - DateTime(2001), - DateTime(2001), - DateTime(2001), - DateTime(2009), - ], - block = [1, 1, 1, 1, 2, 2, 2, 2], - segment = [1, 2, 3, 4, 1, 2, 3, 4], - some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource $i", - group1 = df_timeseries_group1, - group2 = df_timeseries_group2, - group3 = df_timeseries_group3, - ) - end - - # return multiple DataFrames - - dates_df1 = [DateTime(2000), DateTime(2001)] - some_vector1 = [[1.0, 2.0, 3.0] .* i for i in 1:3] - some_vector2 = [[2.0, 3.0, 4.0] .* i for i in 1:3] - - for i in eachindex(dates_df1) - dfs = PSRDatabaseSQLite.read_time_series_dfs( - db, - "Resource", - "some_vector1"; - date_time = dates_df1[i], - ) - - for j in 1:3 - df = dfs[j] - @test df.date_time == string.([dates_df1[i]]) - @test df.some_vector1 == [some_vector1[j][i]] - end - - dfs = PSRDatabaseSQLite.read_time_series_dfs( - db, - "Resource", - "some_vector2"; - date_time = dates_df1[i], - ) - - for j in 1:3 - df = dfs[j] - @test df.date_time == string.([dates_df1[i]]) - @test df.some_vector2 == [some_vector2[j][i]] - end - end - - PSRDatabaseSQLite.close!(db) - GC.gc() - GC.gc() - rm(db_path) - @test true - return nothing -end - -function test_read_wrong_date() - path_schema = joinpath(@__DIR__, "test_read_time_series.sql") - db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - - df = DataFrame(; - date_time = [DateTime(2000), DateTime(2001), DateTime(2002)], - some_vector1 = [1.0, 2.0, missing], - some_vector2 = [2.0, 3.0, 4.0], - ) - - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource 1", - group1 = df, - ) - - df = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector1", - "Resource 1"; - date_time = DateTime(2002), - ) - - @test df.date_time == string.([DateTime(2001)]) - @test df.some_vector1 == [2.0] -end - function runtests() Base.GC.gc() Base.GC.gc() diff --git a/test/PSRDatabaseSQLite/test_read/test_read.sql b/test/PSRDatabaseSQLite/test_read/test_read.sql index 18e9cfc7..29ec562b 100644 --- a/test/PSRDatabaseSQLite/test_read/test_read.sql +++ b/test/PSRDatabaseSQLite/test_read/test_read.sql @@ -54,7 +54,7 @@ CREATE TABLE Plant_vector_cost_relation ( PRIMARY KEY (id, vector_index) ) STRICT; -CREATE TABLE Plant_timeseriesfiles ( +CREATE TABLE Plant_time_series_files ( wind_speed TEXT, wind_direction TEXT ) STRICT; \ No newline at end of file diff --git a/test/PSRDatabaseSQLite/test_read/test_read_time_series.sql b/test/PSRDatabaseSQLite/test_time_series/test_read_time_series.sql similarity index 89% rename from test/PSRDatabaseSQLite/test_read/test_read_time_series.sql rename to test/PSRDatabaseSQLite/test_time_series/test_read_time_series.sql index 6fd20e5c..67aeb17d 100644 --- a/test/PSRDatabaseSQLite/test_read/test_read_time_series.sql +++ b/test/PSRDatabaseSQLite/test_time_series/test_read_time_series.sql @@ -15,7 +15,7 @@ CREATE TABLE Resource ( type TEXT NOT NULL DEFAULT "D" ) STRICT; -CREATE TABLE Resource_timeseries_group1 ( +CREATE TABLE Resource_time_series_group1 ( id INTEGER, date_time TEXT NOT NULL, some_vector1 REAL, @@ -24,7 +24,7 @@ CREATE TABLE Resource_timeseries_group1 ( PRIMARY KEY (id, date_time) ) STRICT; -CREATE TABLE Resource_timeseries_group2 ( +CREATE TABLE Resource_time_series_group2 ( id INTEGER, date_time TEXT NOT NULL, block INTEGER NOT NULL, @@ -34,7 +34,7 @@ CREATE TABLE Resource_timeseries_group2 ( PRIMARY KEY (id, date_time, block) ) STRICT; -CREATE TABLE Resource_timeseries_group3 ( +CREATE TABLE Resource_time_series_group3 ( id INTEGER, date_time TEXT NOT NULL, block INTEGER NOT NULL, @@ -45,7 +45,7 @@ CREATE TABLE Resource_timeseries_group3 ( PRIMARY KEY (id, date_time, block, segment) ) STRICT; -CREATE TABLE Resource_timeseries_group4 ( +CREATE TABLE Resource_time_series_group4 ( id INTEGER, date_time TEXT NOT NULL, block INTEGER NOT NULL, diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql b/test/PSRDatabaseSQLite/test_time_series/test_time_controller.sql similarity index 94% rename from test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql rename to test/PSRDatabaseSQLite/test_time_series/test_time_controller.sql index 29fa916e..4104f865 100644 --- a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.sql +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_controller.sql @@ -14,7 +14,7 @@ CREATE TABLE Resource ( type TEXT NOT NULL DEFAULT "D" ) STRICT; -CREATE TABLE Resource_timeseries_group1 ( +CREATE TABLE Resource_time_series_group1 ( id INTEGER, date_time TEXT NOT NULL, some_vector1 REAL, diff --git a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl similarity index 53% rename from test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl rename to test/PSRDatabaseSQLite/test_time_series/test_time_series.jl index d47da1db..3cde20cf 100644 --- a/test/PSRDatabaseSQLite/test_time_controller/test_time_controller.jl +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl @@ -6,7 +6,7 @@ using Dates using DataFrames using Test -function _test_cache(cached_data, answer) +function _test_row(cached_data, answer) @test length(cached_data) == length(answer) for i in eachindex(cached_data) if isnan(answer[i]) @@ -17,7 +17,132 @@ function _test_cache(cached_data, answer) end end -# For each date, test the returned value with the expected value +function _test_table(table, answer) + for (i, row) in enumerate(eachrow(table)) + for col in names(table) + if col == "date_time" + @test DateTime(row[col]) == answer[i, col] + continue + end + if ismissing(answer[i, col]) + @test ismissing(row[col]) + else + @test row[col] == answer[i, col] + end + end + end +end + +##################### +# Time Series Table # +##################### + +function test_read_time_series_single() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_read_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + for i in 1:3 + df_time_series_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0] .* i, + some_vector2 = [2.0, 3.0] .* i, + ) + df_time_series_group2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + block = [1, 2, 1, 2], + some_vector3 = [1.0, missing, 3.0, 4.0] .* i, + ) + df_time_series_group3 = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource $i", + group1 = df_time_series_group1, + group2 = df_time_series_group2, + group3 = df_time_series_group3, + ) + end + + # return single dataframe + + for i in 1:3 + df_group1_answer = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0] .* i, + some_vector2 = [2.0, 3.0] .* i, + ) + df_group2_answer = DataFrame(; + date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], + block = [1, 2, 1, 2], + some_vector3 = [1.0, missing, 3.0, 4.0] .* i, + ) + df_group3_answer = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, + ) + + all_answers = [df_group1_answer, df_group2_answer, df_group3_answer] + + # iterating over the three groups + + for df_answer in all_answers + for col in names(df_answer) + if startswith(col, "some_vector") + df = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + col, + "Resource $i", + ) + _test_table(df, df_answer) + end + end + end + end + + PSRDatabaseSQLite.close!(db) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing +end + +# ################## +# # Time Controller# +# ################## + +# # For each date, test the returned value with the expected value function test_time_controller_read() path_schema = joinpath(@__DIR__, "test_time_controller.sql") db_path = joinpath(@__DIR__, "test_time_controller_read.sqlite") @@ -58,173 +183,155 @@ function test_time_controller_read() # test for dates in correct sequence for d_i in eachindex(df.date_time) - cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_1 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector1", - Float64; + "some_vector1"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_1, some_vector1_answer[d_i]) + _test_row(cached_1, some_vector1_answer[d_i]) - cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_2 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector2", - Float64; + "some_vector2"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_2, some_vector2_answer[d_i]) + _test_row(cached_2, some_vector2_answer[d_i]) - cached_3 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_3 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector3", - Float64; + "some_vector3"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_3, some_vector3_answer[d_i]) + _test_row(cached_3, some_vector3_answer[d_i]) - cached_4 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_4 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector4", - Float64; + "some_vector4"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_4, some_vector4_answer[d_i]) + _test_row(cached_4, some_vector4_answer[d_i]) - cached_5 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_5 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector5", - Float64; + "some_vector5"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_5, some_vector5_answer[d_i]) + _test_row(cached_5, some_vector5_answer[d_i]) - cached_6 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_6 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector6", - Float64; + "some_vector6"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_6, some_vector6_answer[d_i]) + _test_row(cached_6, some_vector6_answer[d_i]) end # test for dates in reverse sequence for d_i in reverse(eachindex(df.date_time)) - cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_1 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector1", - Float64; + "some_vector1"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_1, some_vector1_answer[d_i]) + _test_row(cached_1, some_vector1_answer[d_i]) - cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_2 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector2", - Float64; + "some_vector2"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_2, some_vector2_answer[d_i]) + _test_row(cached_2, some_vector2_answer[d_i]) - cached_3 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_3 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector3", - Float64; + "some_vector3"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_3, some_vector3_answer[d_i]) + _test_row(cached_3, some_vector3_answer[d_i]) - cached_4 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_4 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector4", - Float64; + "some_vector4"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_4, some_vector4_answer[d_i]) + _test_row(cached_4, some_vector4_answer[d_i]) - cached_5 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_5 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector5", - Float64; + "some_vector5"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_5, some_vector5_answer[d_i]) + _test_row(cached_5, some_vector5_answer[d_i]) - cached_6 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_6 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector6", - Float64; + "some_vector6"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_6, some_vector6_answer[d_i]) + _test_row(cached_6, some_vector6_answer[d_i]) end # test for dates in random sequence for d_i in [2, 1, 3] - cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_1 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector1", - Float64; + "some_vector1"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_1, some_vector1_answer[d_i]) + _test_row(cached_1, some_vector1_answer[d_i]) - cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_2 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector2", - Float64; + "some_vector2"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_2, some_vector2_answer[d_i]) + _test_row(cached_2, some_vector2_answer[d_i]) - cached_3 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_3 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector3", - Float64; + "some_vector3"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_3, some_vector3_answer[d_i]) + _test_row(cached_3, some_vector3_answer[d_i]) - cached_4 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_4 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector4", - Float64; + "some_vector4"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_4, some_vector4_answer[d_i]) + _test_row(cached_4, some_vector4_answer[d_i]) - cached_5 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_5 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector5", - Float64; + "some_vector5"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_5, some_vector5_answer[d_i]) + _test_row(cached_5, some_vector5_answer[d_i]) - cached_6 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_6 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector6", - Float64; + "some_vector6"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_6, some_vector6_answer[d_i]) + _test_row(cached_6, some_vector6_answer[d_i]) end PSRDatabaseSQLite.close!(db) @@ -275,23 +382,21 @@ function test_time_controller_read_more_agents() # test for dates in correct sequence for d_i in eachindex(df.date_time) - cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_1 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector1", - Float64; + "some_vector1"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_1, some_vector1_answer[d_i]) + _test_row(cached_1, some_vector1_answer[d_i]) - cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_2 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector2", - Float64; + "some_vector2"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_2, some_vector2_answer[d_i]) + _test_row(cached_2, some_vector2_answer[d_i]) end PSRDatabaseSQLite.close!(db) @@ -313,14 +418,13 @@ function test_time_controller_empty() PSRDatabaseSQLite.close!(db) db = PSRDatabaseSQLite.load_db(db_path; read_only = true) - empty_cache = PSRDatabaseSQLite.read_mapped_timeseries( + empty_cache = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector1", - Float64; + "some_vector1"; date_time = DateTime(2000), ) - _test_cache(empty_cache, []) + _test_row(empty_cache, []) PSRDatabaseSQLite.close!(db) return rm(db_path) @@ -370,23 +474,21 @@ function test_time_controller_filled_then_empty() # test for dates in correct sequence for d_i in eachindex(df.date_time) - cached_1 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_1 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector1", - Float64; + "some_vector1"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_1, some_vector1_answer[d_i]) + _test_row(cached_1, some_vector1_answer[d_i]) - cached_2 = PSRDatabaseSQLite.read_mapped_timeseries( + cached_2 = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector2", - Float64; + "some_vector2"; date_time = DateTime(df.date_time[d_i]), ) - _test_cache(cached_2, some_vector2_answer[d_i]) + _test_row(cached_2, some_vector2_answer[d_i]) end PSRDatabaseSQLite.close!(db) @@ -400,14 +502,13 @@ function test_time_controller_filled_then_empty() db = PSRDatabaseSQLite.load_db(db_path; read_only = true) - empty_cache = PSRDatabaseSQLite.read_mapped_timeseries( + empty_cache = PSRDatabaseSQLite.read_time_series_row( db, "Resource", - "some_vector1", - Float64; + "some_vector1"; date_time = DateTime(2000), ) - _test_cache(empty_cache, []) + _test_row(empty_cache, []) PSRDatabaseSQLite.close!(db) diff --git a/test/PSRDatabaseSQLite/test_update/test_create_time_series_files.sql b/test/PSRDatabaseSQLite/test_update/test_create_time_series_files.sql index f9d0588b..daed5bd4 100644 --- a/test/PSRDatabaseSQLite/test_update/test_create_time_series_files.sql +++ b/test/PSRDatabaseSQLite/test_update/test_create_time_series_files.sql @@ -14,7 +14,7 @@ CREATE TABLE Resource ( type TEXT NOT NULL DEFAULT "D" CHECK(type IN ('D', 'E', 'F')) ) STRICT; -CREATE TABLE Resource_timeseriesfiles ( +CREATE TABLE Resource_time_series_files ( wind_speed TEXT, wind_direction TEXT ) STRICT; \ No newline at end of file diff --git a/test/PSRDatabaseSQLite/test_update/test_update_time_series.sql b/test/PSRDatabaseSQLite/test_update/test_update_time_series.sql index 991ff13c..d4eb9048 100644 --- a/test/PSRDatabaseSQLite/test_update/test_update_time_series.sql +++ b/test/PSRDatabaseSQLite/test_update/test_update_time_series.sql @@ -11,7 +11,7 @@ CREATE TABLE Plant ( label TEXT UNIQUE NOT NULL ); -CREATE TABLE Plant_timeseriesfiles ( +CREATE TABLE Plant_time_series_files ( generation TEXT ); @@ -20,7 +20,7 @@ CREATE TABLE Resource ( label TEXT UNIQUE NOT NULL ); -CREATE TABLE Resource_timeseriesfiles ( +CREATE TABLE Resource_time_series_files ( generation TEXT, other_generation TEXT ); \ No newline at end of file diff --git a/test/PSRDatabaseSQLite/test_valid_database_definitions/test_valid_database.sql b/test/PSRDatabaseSQLite/test_valid_database_definitions/test_valid_database.sql index 6ee4811c..24393db8 100644 --- a/test/PSRDatabaseSQLite/test_valid_database_definitions/test_valid_database.sql +++ b/test/PSRDatabaseSQLite/test_valid_database_definitions/test_valid_database.sql @@ -60,7 +60,7 @@ CREATE TABLE Plant_vector_cost_relation ( PRIMARY KEY (id, vector_index) ) STRICT; -CREATE TABLE Plant_timeseriesfiles ( +CREATE TABLE Plant_time_series_files ( generation TEXT, cost TEXT ) STRICT; \ No newline at end of file From 53f94be018061cdceaef0d726d091e581b451272 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Thu, 4 Jul 2024 18:33:22 -0300 Subject: [PATCH 21/34] Add update and delete time series --- script.jl | 78 ------- script_time_controller.jl | 95 --------- src/PSRDatabaseSQLite/delete.jl | 29 +++ src/PSRDatabaseSQLite/update.jl | 51 +++++ .../test_time_series/test_time_series.jl | 198 ++++++++++++++++++ time_controller.sql | 26 --- 6 files changed, 278 insertions(+), 199 deletions(-) delete mode 100644 script.jl delete mode 100644 script_time_controller.jl delete mode 100644 time_controller.sql diff --git a/script.jl b/script.jl deleted file mode 100644 index f10ca803..00000000 --- a/script.jl +++ /dev/null @@ -1,78 +0,0 @@ -using PSRClassesInterface.PSRDatabaseSQLite -using SQLite -using DataFrames -using Dates -using Test - -db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") -GC.gc() -GC.gc() -if isfile(db_path) - rm(db_path) -end - -function test_create_time_series() - path_schema = raw"C:\Users\guilhermebodin\Documents\Github\PSRClassesInterface.jl\test\PSRDatabaseSQLite\test_create\test_create_time_series.sql" - db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - for i in 1:3 - df_timeseries_group1 = DataFrame( - date_time = [DateTime(2000), DateTime(2001)], - some_vector1 = [1.0, 2.0] .* i, - some_vector2 = [2.0, 3.0] .* i - ) - df_timeseries_group2 = DataFrame( - date_time = [DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001)], - block = [1, 2, 1, 2], - some_vector3 = [1.0, missing, 3.0, 4.0] .* i, - ) - df_timeseries_group3 = DataFrame( - date_time = [DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2000), DateTime(2001), DateTime(2001), DateTime(2001), DateTime(2009)], - block = [1, 1, 1, 1, 2, 2, 2, 2], - segment = [1, 2, 3, 4, 1, 2, 3, 4], - some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4] .* i, - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource $i", - group1 = df_timeseries_group1, - group2 = df_timeseries_group2, - group3 = df_timeseries_group3 - ) - end - - results = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector1", - "Resource 1"; - date_time = DateTime(2000) - ) - @show results - - results = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource 1"; - date_time = DateTime(2002) - ) - @show results - - results = PSRDatabaseSQLite.read_time_series_df( - db, - "Resource", - "some_vector5", - "Resource 1" - ) - @show results - - PSRDatabaseSQLite.close!(db) - rm(db_path) - @test true -end - -test_create_time_series() \ No newline at end of file diff --git a/script_time_controller.jl b/script_time_controller.jl deleted file mode 100644 index da6612e2..00000000 --- a/script_time_controller.jl +++ /dev/null @@ -1,95 +0,0 @@ -using PSRClassesInterface.PSRDatabaseSQLite -using SQLite -using DataFrames -using Dates -using Test - -function test_create_time_series() - path_schema = joinpath(@__DIR__, "time_controller.sql") - db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") - GC.gc() - GC.gc() - if isfile(db_path) - rm(db_path) - end - - db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) - PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) - PSRDatabaseSQLite.SQLite.transaction(db.sqlite_db) do - for i in 1:500 - df_timeseries_group1 = DataFrame( - date_time = vcat([DateTime(0)],[DateTime(i) for i in 1900:1979]), - some_vector1 = vcat([missing],[j for j in 1:80] .* i), - some_vector2 = vcat([1.0],[missing for j in 1:10], [j for j in 1:10] .* i, [missing for j in 1:60]), - some_vector3 = vcat([1.0], [missing for j in 1:80]), - some_vector4 = vcat([missing], [missing for j in 1:80]), - ) - PSRDatabaseSQLite.create_element!( - db, - "Resource"; - label = "Resource $i", - group1 = df_timeseries_group1, - ) - end - end - - PSRDatabaseSQLite.close!(db) -end - -function test_read_time_series() - db_path = joinpath(@__DIR__, "test_create_time_series.sqlite") - - db = PSRDatabaseSQLite.load_db(db_path; read_only = true) - - times = zeros(4) - - - for (j, date_time) in enumerate([DateTime(i) for i in 1900:1901]) - @show date_time - t1 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector1", - Float64, - date_time = date_time - ) - # @show t1.value - - t2 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector2", - Float64, - date_time = date_time - ) - - t3 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector3", - Float64, - date_time = date_time - ) - - t4 = @timed PSRDatabaseSQLite.read_mapped_timeseries( - db, - "Resource", - "some_vector4", - Float64, - date_time = date_time - ) - - times .+= [t1.time, t2.time, t3.time, t4.time] - end - - @show times - - - PSRDatabaseSQLite.close!(db) - rm(db_path) -end - -@testset "Time Controller" begin - test_create_time_series() - test_read_time_series() -end \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/delete.jl b/src/PSRDatabaseSQLite/delete.jl index ef3a7539..46f59de0 100644 --- a/src/PSRDatabaseSQLite/delete.jl +++ b/src/PSRDatabaseSQLite/delete.jl @@ -24,3 +24,32 @@ function delete_element!( ) return nothing end + +function _delete_time_series!( + db::DatabaseSQLite, + collection_id::String, + group_id::String, + id::Integer, +) + time_series_table_name = "$(collection_id)_time_series_$(group_id)" + + DBInterface.execute( + db.sqlite_db, + "DELETE FROM $(time_series_table_name) WHERE id = '$id'", + ) + return nothing +end + +function delete_time_series!( + db::DatabaseSQLite, + collection_id::String, + group_id::String, + label::String, +) + _throw_if_collection_does_not_exist(db, collection_id) + id = _get_id(db, collection_id, label) + + _delete_time_series!(db, collection_id, group_id, id) + + return nothing +end diff --git a/src/PSRDatabaseSQLite/update.jl b/src/PSRDatabaseSQLite/update.jl index 5e8ba618..771b71cf 100644 --- a/src/PSRDatabaseSQLite/update.jl +++ b/src/PSRDatabaseSQLite/update.jl @@ -327,3 +327,54 @@ function set_time_series_file!( end return nothing end + +function _update_time_series!( + db::DatabaseSQLite, + attribute::Attribute, + id::Integer, + val, + dimensions, +) + query = "UPDATE $(attribute.table_where_is_located) SET $(attribute.id) = '$val'" + query *= " WHERE id = '$id' AND " + for (i, (key, value)) in enumerate(dimensions) + if key == "date_time" + query *= "$(key) = DATE('$(value)')" + else + query *= "$(key) = '$(value)'" + end + if i < length(dimensions) + query *= " AND " + end + end + DBInterface.execute(db.sqlite_db, query) + return nothing +end + +function update_time_series!( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + label::String, + val; + dimensions..., +) + _throw_if_attribute_is_not_time_series( + db, + collection_id, + attribute_id, + :update, + ) + attribute = _get_attribute(db, collection_id, attribute_id) + _validate_time_series_dimensions(collection_id, attribute, dimensions) + + if length(dimensions) != length(attribute.dimension_names) + psr_database_sqlite_error( + "The number of dimensions in the time series does not match the number of dimensions in the attribute. " * + "The attribute has $(attribute.num_dimensions) dimensions: $(join(attribute.dimension_names, ", ")).", + ) + end + + id = _get_id(db, collection_id, label) + return _update_time_series!(db, attribute, id, val, dimensions) +end diff --git a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl index 3cde20cf..80ac9ae7 100644 --- a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl @@ -515,6 +515,204 @@ function test_time_controller_filled_then_empty() return rm(db_path) end +function test_update_time_series() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_update_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + df_time_series_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0], + some_vector2 = [2.0, 3.0], + ) + + df_time_series_group3 = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], + some_vector6 = [1.0, 2.0, 3.0, 4.0, 1, 2, 3, 4], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_time_series_group1, + group3 = df_time_series_group3, + ) + + PSRDatabaseSQLite.update_time_series!( + db, + "Resource", + "some_vector1", + "Resource 1", + 10.0; + date_time = DateTime(2001), + ) + + PSRDatabaseSQLite.update_time_series!( + db, + "Resource", + "some_vector2", + "Resource 1", + 50.0; + date_time = DateTime(2001), + ) + + PSRDatabaseSQLite.update_time_series!( + db, + "Resource", + "some_vector5", + "Resource 1", + 10.0; + date_time = DateTime(2000), + block = 1, + segment = 2, + ) + + PSRDatabaseSQLite.update_time_series!( + db, + "Resource", + "some_vector5", + "Resource 1", + 3.0; + date_time = DateTime(2000), + block = 1, + segment = 1, + ) + + PSRDatabaseSQLite.update_time_series!( + db, + "Resource", + "some_vector6", + "Resource 1", + 33.0; + date_time = DateTime(2000), + block = 1, + segment = 3, + ) + + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.update_time_series!( + db, + "Resource", + "some_vector6", + "Resource 1", + 10.0; + date_time = DateTime(2000), + segment = 2, + ) + + df_group1_answer = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 10.0], + some_vector2 = [2.0, 50.0], + ) + df_group3_answer = DataFrame(; + date_time = [ + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2000), + DateTime(2001), + DateTime(2001), + DateTime(2001), + DateTime(2009), + ], + block = [1, 1, 1, 1, 2, 2, 2, 2], + segment = [1, 2, 3, 4, 1, 2, 3, 4], + some_vector5 = [3.0, 10.0, 3.0, 4.0, 1, 2, 3, 4], + some_vector6 = [1.0, 2.0, 33.0, 4.0, 1, 2, 3, 4], + ) + + all_answers = [df_group1_answer, df_group3_answer] + + # iterating over the three groups + + for df_answer in all_answers + for col in names(df_answer) + if startswith(col, "some_vector") + df = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + col, + "Resource 1", + ) + _test_table(df, df_answer) + end + end + end + + PSRDatabaseSQLite.close!(db) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing +end + +function test_delete_time_series() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_delete_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + df_time_series_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0], + some_vector2 = [2.0, 3.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_time_series_group1, + ) + + PSRDatabaseSQLite.delete_time_series!( + db, + "Resource", + "group1", + "Resource 1", + ) + + df = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + "some_vector1", + "Resource 1", + ) + + @test isempty(df) + + df = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + "some_vector2", + "Resource 1", + ) + + @test isempty(df) + + PSRDatabaseSQLite.close!(db) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing +end + function runtests() Base.GC.gc() Base.GC.gc() diff --git a/time_controller.sql b/time_controller.sql deleted file mode 100644 index 4ec4c837..00000000 --- a/time_controller.sql +++ /dev/null @@ -1,26 +0,0 @@ -PRAGMA user_version = 1; -PRAGMA foreign_keys = ON; - -CREATE TABLE Configuration ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - label TEXT UNIQUE NOT NULL, - value1 REAL NOT NULL DEFAULT 100, - enum1 TEXT NOT NULL DEFAULT 'A' CHECK(enum1 IN ('A', 'B', 'C')) -) STRICT; - -CREATE TABLE Resource ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - label TEXT UNIQUE NOT NULL, - type TEXT NOT NULL DEFAULT "D" -) STRICT; - -CREATE TABLE Resource_timeseries_group1 ( - id INTEGER, - date_time TEXT NOT NULL, - some_vector1 REAL, - some_vector2 REAL, - some_vector3 REAL, - some_vector4 REAL, - FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, - PRIMARY KEY (id, date_time) -) STRICT; \ No newline at end of file From 1a564ada3046a9b9e63c41a82cc0b4783a608949 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Fri, 5 Jul 2024 16:39:29 -0300 Subject: [PATCH 22/34] update --- src/PSRDatabaseSQLite/database_sqlite.jl | 7 +++ test/PSRDatabaseSQLite/time_series.jl | 70 ------------------------ 2 files changed, 7 insertions(+), 70 deletions(-) delete mode 100644 test/PSRDatabaseSQLite/time_series.jl diff --git a/src/PSRDatabaseSQLite/database_sqlite.jl b/src/PSRDatabaseSQLite/database_sqlite.jl index 03da7185..731e34ab 100644 --- a/src/PSRDatabaseSQLite/database_sqlite.jl +++ b/src/PSRDatabaseSQLite/database_sqlite.jl @@ -1,5 +1,6 @@ Base.@kwdef mutable struct DatabaseSQLite sqlite_db::SQLite.DB + database_path::String = "" collections_map::OrderedDict{String, Collection} read_only::Bool = false # TimeController is a cache that allows PSRDatabaseSQLite to @@ -11,6 +12,9 @@ Base.@kwdef mutable struct DatabaseSQLite end _is_read_only(db::DatabaseSQLite) = db.read_only +function database_path(db::DatabaseSQLite) + return db.database_path +end function _set_default_pragmas!(db::SQLite.DB) _set_foreign_keys_on!(db) @@ -53,6 +57,7 @@ function DatabaseSQLite_from_schema( db = DatabaseSQLite(; sqlite_db, + database_path, collections_map, ) @@ -87,6 +92,7 @@ function DatabaseSQLite_from_migrations( db = DatabaseSQLite(; sqlite_db, + database_path, collections_map, ) @@ -112,6 +118,7 @@ function DatabaseSQLite( db = DatabaseSQLite(; sqlite_db, + database_path, collections_map, read_only, ) diff --git a/test/PSRDatabaseSQLite/time_series.jl b/test/PSRDatabaseSQLite/time_series.jl deleted file mode 100644 index 953d944c..00000000 --- a/test/PSRDatabaseSQLite/time_series.jl +++ /dev/null @@ -1,70 +0,0 @@ -function test_time_series() - case_path = joinpath(@__DIR__, "data", "case_2") - if isfile(joinpath(case_path, "simplecase.sqlite")) - rm(joinpath(case_path, "simplecase.sqlite")) - end - - db = PSRI.create_study( - PSRI.PSRDatabaseSQLiteInterface(), - joinpath(case_path, "simplecase.sqlite"), - joinpath(case_path, "simple_schema.sql"); - val1 = 1, - ) - - PSRI.create_element!( - db, - "Plant"; - label = "Plant 1", - ) - - PSRI.create_element!( - db, - "Plant"; - label = "Plant 2", - ) - - iow = PSRI.open( - PSRI.OpenBinary.Writer, - joinpath(case_path, "generation"); - blocks = 3, - scenarios = 2, - stages = 12, - agents = PSRI.get_parms(db, "Plant", "label"), - unit = "MW", - ) - - for t in 1:12, s in 1:2, b in 1:3 - PSRI.write_registry(iow, [(t + s + b) * 100.0, (t + s + b) * 300.0], t, s, b) - end - - PSRI.close(iow) - - iow = PSRI.open( - PSRI.OpenBinary.Writer, - joinpath(case_path, "cost"); - blocks = 3, - scenarios = 2, - stages = 12, - agents = PSRI.get_parms(db, "Plant", "label"), - unit = "USD", - ) - - for t in 1:12, s in 1:2, b in 1:3 - PSRI.write_registry(iow, [(t + s + b) * 500.0, (t + s + b) * 400.0], t, s, b) - end - - PSRI.close(iow) - - PSRI.link_series_to_file( - db, - "Plant"; - generation = "generation", - cost = "cost", - ) - - PSRI.PSRDatabaseSQLite.close(db) - - return rm(joinpath(case_path, "simplecase.sqlite")) -end - -test_time_series() From 039f9329f7651866219cad247e000fa415a64123 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Mon, 8 Jul 2024 13:24:14 -0300 Subject: [PATCH 23/34] Update Docs --- docs/make.jl | 2 +- .../{time_controller.md => time_series.md} | 126 +++++++++++------- 2 files changed, 76 insertions(+), 52 deletions(-) rename docs/src/psrdatabasesqlite/{time_controller.md => time_series.md} (57%) diff --git a/docs/make.jl b/docs/make.jl index f7a5a0e3..7ce61e5f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -23,7 +23,7 @@ makedocs(; "PSRDatabaseSQLite Overview" => String[ "psrdatabasesqlite/introduction.md", "psrdatabasesqlite/rules.md", - "psrdatabasesqlite/time_controller.md", + "psrdatabasesqlite/time_series.md", ], "OpenStudy and OpenBinary Examples" => String[ "examples/reading_parameters.md", diff --git a/docs/src/psrdatabasesqlite/time_controller.md b/docs/src/psrdatabasesqlite/time_series.md similarity index 57% rename from docs/src/psrdatabasesqlite/time_controller.md rename to docs/src/psrdatabasesqlite/time_series.md index f7a33bb7..50cb3909 100644 --- a/docs/src/psrdatabasesqlite/time_controller.md +++ b/docs/src/psrdatabasesqlite/time_series.md @@ -1,6 +1,8 @@ # Time Series -It is possible to store time series data in the database. For that, there is a specific table format that must be followed. Consider the following example: +It is possible to store time series data in your database. Time series in `PSRDatabaseSQLite` are very flexible. You can have missing values, and you can have sparse data. + +There is a specific table format that must be followed. Consider the following example: ```sql CREATE TABLE Resource ( @@ -43,9 +45,9 @@ CREATE TABLE Resource_time_series_group2 ( ## Rules -Time series in `PSRDatabaseSQLite` are very flexible. You can have missing values, and you can have sparse data. +Time series in `PSRDatabaseSQLite` are very flexible. You can have missing values, and you can have sparse data. -If you are querying for a time series entry that has a missing value, it first checks if there is a data with a `date_time` earlier than the queried `date_time`. If there is, it returns the value of the previous data. If there is no data earlier than the queried `date_time`, it returns a specified value according to the type of data you are querying. +If you are querying for a time series row entry that has a missing value, it first checks if there is a data with a `date_time` earlier than the queried `date_time`. If there is, it returns the value of the previous data. If there is no data earlier than the queried `date_time`, it returns a specified value according to the type of data you are querying. - For `Float64`, it returns `NaN`. - For `Int64`, it returns `typemin(Int)`. @@ -116,61 +118,44 @@ PSRDatabaseSQLite.create_element!( ## Reading data -You can read the information from the time series in different ways. -First, you can read the information as a `DataFrame`. This dataframe can be filtered according to the desired dimension values. It can be also specific for an element or for all elements. In the last case, an array of dataframes is returned. +You can read the information from the time series in two different ways. + +### Reading as a table +First, you can read the whole time series table for a given value, as a `DataFrame`. -### Filtering by element and `date_time` dimension ```julia df = PSRDatabaseSQLite.read_time_series_table( - db, - "Resource", - "some_vector1", - "Resource 1"; - date_time = DateTime(2001), - ) + db, + "Resource", + "some_vector1", + "Resource 1", +) ``` -### No filtering by `date_time` -```julia -df = PSRDatabaseSQLite.read_time_series_table( - db, - "Resource", - "some_vector1", - "Resource 1" - ) -``` +### Reading a single row -### Filtering by `block` and `date_time` for an element -```julia -df = PSRDatabaseSQLite.read_time_series_table( - db, - "Resource", - "some_vector3", - "Resource 1"; - date_time = DateTime(2000), - block = 1, - ) -``` +It is also possible to read a single row of the time series in the form of an array. This is useful when you want to query a specific dimension entry. +For this function, there are performance improvements when reading the data via caching the previous and next non-missing values. -### No filter and returing all elements ```julia -dfs = PSRDatabaseSQLite.read_time_series_tables( - db, - "Resource", - "some_vector1" - ) +values = PSRDatabaseSQLite.read_time_series_row( + db, + "Resource", + "some_vector1", + Float64; + date_time = DateTime(2020) +) ``` +When querying a row, all values should non-missing. However, if there is a missing value, the function will return the previous non-missing value. And if even the previous value is missing, it will return a specified value according to the type of data you are querying. -## Reading data via a `TimeController` -Reading time series data from the database can lead to performance issues when the time series is too large. To avoid this, you can use the `TimeController` to cache the previous and next non-missing values, according to the dimensions you are indexing the data. - -The `TimeController` is initialized automatically. You just need to use a different function when reading the data and always pass the dimensions values that you want. - -Also, the returned data for this type of function is a vector containing the values for all elements that contain the time series, for the queried dimensions. +- For `Float64`, it returns `NaN`. +- For `Int64`, it returns `typemin(Int)`. +- For `String`, it returns `""` (empty String). +- For `DateTime`, it returns `typemin(DateTime)`. -For example, consider the following table for `some_vector1`: +For example, if you have the following data for the time series `some_vector1`: | **Date** | **Resource 1** | **Resource 2** | |:--------:|:-----------:|:-----------:| @@ -178,16 +163,55 @@ For example, consider the following table for `some_vector1`: | 2021 | missing | 1.0 | | 2022 | 3.0 | missing | -If you query the following: +1. If you query at `2020`, it returns `[1.0, NaN]`. +3. If you query at `2021`, it returns `[1.0, 1.0]`. +5. If you query at `2022`, it returns `[3.0, 1.0]`. + + +## Updating data + +When updating one of the entries of a time series for a given element and attribute, you need to specify the exact dimension values of the row you want to update. + + +For example, consider a time series that has `block` and `data_time` dimensions. ```julia -values = PSRDatabaseSQLite.read_time_series_row( +PSRDatabaseSQLite.update_time_series!( db, "Resource", - "some_vector1", - Float64; - date_time = DateTime(2020) + "some_vector3", + "Resource 1", + 10.0; # new value + date_time = DateTime(2000), + block = 1 +) +``` + +## Deleting data + +You can delete the whole time series of an element for a given time series group. +Consider the following table: + +```sql +CREATE TABLE Resource_time_series_group1 ( + id INTEGER, + date_time TEXT NOT NULL, + some_vector1 REAL, + some_vector2 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time) +) STRICT; +``` + +This table represents a "group" that stores two time series `some_vector1` and `some_vector2`. You can delete all the data from this group by calling the following function: + +```julia +PSRDatabaseSQLite.delete_time_series!( + db, + "Resource", + "group1", + "Resource 1", ) ``` -It will return `[1.0, NaN]`. \ No newline at end of file +When trying to read a time series that has been deleted, the function will return an empty `DataFrame`. From 5422d6321cbd259ab2efe872d4d9a1186dd90a08 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Mon, 8 Jul 2024 15:50:22 -0300 Subject: [PATCH 24/34] Update regex --- src/PSRDatabaseSQLite/validate.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 07cafc12..c397eff3 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -21,7 +21,7 @@ _is_valid_table_vector_name(table::String) = _is_valid_time_series_name(table::String) = !isnothing( match( - r"^(?:[A-Z][a-z]*)+_time_series_[a-z][a-z0-9]*(?:_{1}[a-z0-9]+)*$", + r"^(?:[A-Z][a-z]*)+_time_series_(?!files$)[a-z][a-z0-9]*(?:_{1}[a-z0-9]+)*$", table, ), ) From fd1767342944c10b4a4b04ebc8f257e61c9306f0 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Tue, 9 Jul 2024 15:41:34 -0300 Subject: [PATCH 25/34] Error handling and tests --- src/PSRDatabaseSQLite/create.jl | 6 ++ src/PSRDatabaseSQLite/update.jl | 32 ++++++- src/PSRDatabaseSQLite/validate.jl | 41 +++++++++ .../test_time_series/test_time_series.jl | 87 +++++++++++++++++++ 4 files changed, 165 insertions(+), 1 deletion(-) diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 6345c09e..4bce0c67 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -120,6 +120,11 @@ function _create_time_series!( dict_time_series_attributes, ) for (group, df) in dict_time_series_attributes + if isempty(df) # user passed an empty DataFrame + psr_database_sqlite_error( + "Cannot create an empty time series group \"$group\" in collection \"$collection_id\".", + ) + end time_series_group_table_name = _time_series_group_table_name(collection_id, string(group)) ids = fill(id, nrow(df)) DataFrames.insertcols!(df, 1, :id => ids) @@ -156,6 +161,7 @@ function _create_element!( dict_vector_attributes[key] = value elseif isa(value, DataFrame) _throw_if_not_time_series_group(db, collection_id, string(key)) + _throw_if_data_does_not_match_group(db, collection_id, string(key), value) dict_time_series_attributes[key] = value else _throw_if_is_time_series_file(db, collection_id, string(key)) diff --git a/src/PSRDatabaseSQLite/update.jl b/src/PSRDatabaseSQLite/update.jl index 771b71cf..ccb359f1 100644 --- a/src/PSRDatabaseSQLite/update.jl +++ b/src/PSRDatabaseSQLite/update.jl @@ -328,6 +328,30 @@ function set_time_series_file!( return nothing end +function _dimension_value_exists( + db::DatabaseSQLite, + attribute::Attribute, + id::Integer, + dimensions..., +) + query = "SELECT $(attribute.id) FROM $(attribute.table_where_is_located) WHERE id = $id AND " + for (i, (key, value)) in enumerate(dimensions) + if key == "date_time" + query *= "$(key) = DATE('$(value)')" + else + query *= "$(key) = '$(value)'" + end + if i < length(dimensions) + query *= " AND " + end + end + results = DBInterface.execute(db.sqlite_db, query) |> DataFrame + if isempty(results) + return false + end + return true +end + function _update_time_series!( db::DatabaseSQLite, attribute::Attribute, @@ -366,8 +390,15 @@ function update_time_series!( :update, ) attribute = _get_attribute(db, collection_id, attribute_id) + id = _get_id(db, collection_id, label) _validate_time_series_dimensions(collection_id, attribute, dimensions) + if !_dimension_value_exists(db, attribute, id, dimensions...) + psr_database_sqlite_error( + "The chosen values for dimensions $(join(keys(dimensions), ", ")) do not exist in the time series for element $(label) in collection $(collection_id).", + ) + end + if length(dimensions) != length(attribute.dimension_names) psr_database_sqlite_error( "The number of dimensions in the time series does not match the number of dimensions in the attribute. " * @@ -375,6 +406,5 @@ function update_time_series!( ) end - id = _get_id(db, collection_id, label) return _update_time_series!(db, attribute, id, val, dimensions) end diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index c397eff3..644bfcd7 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -357,6 +357,47 @@ function _throw_if_not_time_series_group( return nothing end +function _throw_if_data_does_not_match_group( + db::DatabaseSQLite, + collection_id::String, + group::String, + df::DataFrame, +) + collection = _get_collection(db, collection_id) + dimensions_in_df = [] + attributes_in_df = [] + + for column in names(df) + if column in keys(collection.time_series) + # should be an attribute + push!(attributes_in_df, column) + else + # should be a dimension + push!(dimensions_in_df, column) + end + end + + # validate if the attributes belong to the same group and if the dimensions are valid for this group + for attribute_id in attributes_in_df + attribute = _get_attribute(db, collection_id, attribute_id) + if attribute.group_id != group + psr_database_sqlite_error( + "Attribute \"$attribute_id\" is not in the time series group \"$group\".", + ) + end + end + + for dimension in dimensions_in_df + if !(dimension in collection.time_series[attributes_in_df[1]].dimension_names) + psr_database_sqlite_error( + "The dimension \"$dimension\" is not defined in the time series group \"$group\".", + ) + end + end + + return nothing +end + function _throw_if_is_time_series_file( db::DatabaseSQLite, collection::String, diff --git a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl index 80ac9ae7..39b83860 100644 --- a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl @@ -613,6 +613,17 @@ function test_update_time_series() segment = 2, ) + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.update_time_series!( + db, + "Resource", + "some_vector5", + "Resource 1", + 3.0; + date_time = DateTime(1890), + block = 999, + segment = 2, + ) + df_group1_answer = DataFrame(; date_time = [DateTime(2000), DateTime(2001)], some_vector1 = [1.0, 10.0], @@ -713,6 +724,82 @@ function test_delete_time_series() return nothing end +function test_create_wrong_time_series() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_create_wrong_time_series.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + df_time_series_group1_wrong = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0], + some_vector20 = [2.0, 3.0], + ) + + df_time_series_group1_wrong2 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + block = [1, 2], + some_vector1 = [1.0, 2.0], + some_vector2 = [2.0, 3.0], + ) + + df_time_series_group1_wrong3 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + something = [1, 2], + some_vector1 = [1.0, 2.0], + some_vector2 = [2.0, 3.0], + ) + + df_time_series_group1 = DataFrame(; + date_time = [DateTime(2000), DateTime(2001)], + some_vector1 = [1.0, 2.0], + some_vector2 = [2.0, 3.0], + ) + + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_time_series_group1_wrong, + ) + + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_time_series_group1_wrong2, + ) + + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_time_series_group1_wrong3, + ) + + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df_time_series_group1, + ) + + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 2", + group1 = DataFrame(), + ) + + PSRDatabaseSQLite.close!(db) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing +end + function runtests() Base.GC.gc() Base.GC.gc() From 719000f2ab30c2a704a36cf16a4049797cbd88f1 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Tue, 9 Jul 2024 15:44:21 -0300 Subject: [PATCH 26/34] Fix --- src/PSRDatabaseSQLite/create.jl | 10 +++++----- .../test_time_series/test_time_series.jl | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 4bce0c67..2c9b7210 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -120,11 +120,6 @@ function _create_time_series!( dict_time_series_attributes, ) for (group, df) in dict_time_series_attributes - if isempty(df) # user passed an empty DataFrame - psr_database_sqlite_error( - "Cannot create an empty time series group \"$group\" in collection \"$collection_id\".", - ) - end time_series_group_table_name = _time_series_group_table_name(collection_id, string(group)) ids = fill(id, nrow(df)) DataFrames.insertcols!(df, 1, :id => ids) @@ -162,6 +157,11 @@ function _create_element!( elseif isa(value, DataFrame) _throw_if_not_time_series_group(db, collection_id, string(key)) _throw_if_data_does_not_match_group(db, collection_id, string(key), value) + if isempty(value) + psr_database_sqlite_error( + "Cannot create the time series group \"$key\" with an empty DataFrame.", + ) + end dict_time_series_attributes[key] = value else _throw_if_is_time_series_file(db, collection_id, string(key)) diff --git a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl index 39b83860..47f9ae80 100644 --- a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl @@ -792,6 +792,12 @@ function test_create_wrong_time_series() group1 = DataFrame(), ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 2", + ) + PSRDatabaseSQLite.close!(db) GC.gc() GC.gc() From e87a6b6ae81f2af1f215509410551a539968350b Mon Sep 17 00:00:00 2001 From: pedroripper Date: Fri, 12 Jul 2024 01:03:01 -0300 Subject: [PATCH 27/34] Fix time controller query --- src/PSRDatabaseSQLite/time_controller.jl | 5 +- .../test_time_series/test_time_series.jl | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index ef5acd6a..8ff0faea 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -84,10 +84,9 @@ function _update_time_controller_cache_dates!( query = """ SELECT id, - MAX(CASE WHEN DATETIME(date_time) <= DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_previous_date_with_data, - MIN(CASE WHEN DATETIME(date_time) > DATETIME('$date_time') THEN date_time ELSE NULL END) AS closest_next_date_with_data + MAX(CASE WHEN DATETIME(date_time) <= DATETIME('$date_time') AND $(attribute.id) IS NOT NULL THEN date_time ELSE NULL END) AS closest_previous_date_with_data, + MIN(CASE WHEN DATETIME(date_time) > DATETIME('$date_time') AND $(attribute.id) IS NOT NULL THEN date_time ELSE NULL END) AS closest_next_date_with_data FROM $(attribute.table_where_is_located) - WHERE $(attribute.id) IS NOT NULL GROUP BY id ORDER BY id """ diff --git a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl index 47f9ae80..641a5b70 100644 --- a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl @@ -403,6 +403,71 @@ function test_time_controller_read_more_agents() return rm(db_path) end +function test_time_controller_read_more_agents_2() + path_schema = joinpath(@__DIR__, "test_time_controller.sql") + db_path = joinpath(@__DIR__, "test_time_controller_read_multiple_2.sqlite") + GC.gc() + GC.gc() + if isfile(db_path) + rm(db_path) + end + + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + df = DataFrame(; + date_time = [DateTime(2000)], + some_vector1 = [missing], + some_vector2 = [1.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1", + group1 = df, + ) + + df2 = DataFrame(; + date_time = [DateTime(2000)], + some_vector1 = [1.0], + some_vector2 = [10.0], + ) + PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 2", + group1 = df2, + ) + + PSRDatabaseSQLite.close!(db) + db = PSRDatabaseSQLite.load_db(db_path; read_only = true) + + some_vector1_answer = [[NaN, 1.0]] + some_vector2_answer = [[1.0, 10.0]] + + # test for dates in correct sequence + for d_i in eachindex(df.date_time) + cached_1 = PSRDatabaseSQLite.read_time_series_row( + db, + "Resource", + "some_vector1"; + date_time = DateTime(df.date_time[d_i]), + ) + _test_row(cached_1, some_vector1_answer[d_i]) + + cached_2 = PSRDatabaseSQLite.read_time_series_row( + db, + "Resource", + "some_vector2"; + date_time = DateTime(df.date_time[d_i]), + ) + _test_row(cached_2, some_vector2_answer[d_i]) + end + + PSRDatabaseSQLite.close!(db) + return rm(db_path) +end + function test_time_controller_empty() path_schema = joinpath(@__DIR__, "test_time_controller.sql") db_path = joinpath(@__DIR__, "test_time_controller_read_empty.sqlite") From e8c741ef4619be017761e5fdf4835f4d4e453c33 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Tue, 16 Jul 2024 16:40:28 -0300 Subject: [PATCH 28/34] Fix query --- src/PSRDatabaseSQLite/time_controller.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl index 8ff0faea..19dc37a8 100644 --- a/src/PSRDatabaseSQLite/time_controller.jl +++ b/src/PSRDatabaseSQLite/time_controller.jl @@ -84,8 +84,8 @@ function _update_time_controller_cache_dates!( query = """ SELECT id, - MAX(CASE WHEN DATETIME(date_time) <= DATETIME('$date_time') AND $(attribute.id) IS NOT NULL THEN date_time ELSE NULL END) AS closest_previous_date_with_data, - MIN(CASE WHEN DATETIME(date_time) > DATETIME('$date_time') AND $(attribute.id) IS NOT NULL THEN date_time ELSE NULL END) AS closest_next_date_with_data + MAX(CASE WHEN DATE(date_time) <= DATE('$date_time') AND $(attribute.id) IS NOT NULL THEN DATE(date_time) ELSE NULL END) AS closest_previous_date_with_data, + MIN(CASE WHEN DATE(date_time) > DATE('$date_time') AND $(attribute.id) IS NOT NULL THEN DATE(date_time) ELSE NULL END) AS closest_next_date_with_data FROM $(attribute.table_where_is_located) GROUP BY id ORDER BY id From 724f02d00853717ee3c0e30c68ab4f9f57300666 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 17 Jul 2024 15:52:06 -0300 Subject: [PATCH 29/34] Update according to .dart --- src/PSRDatabaseSQLite/attribute.jl | 3 +- src/PSRDatabaseSQLite/collection.jl | 53 ++++++++++++++++++++--------- src/PSRDatabaseSQLite/read.jl | 2 +- src/PSRDatabaseSQLite/update.jl | 4 +-- src/PSRDatabaseSQLite/validate.jl | 6 ++-- 5 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/PSRDatabaseSQLite/attribute.jl b/src/PSRDatabaseSQLite/attribute.jl index 41e92127..a6a5dfce 100644 --- a/src/PSRDatabaseSQLite/attribute.jl +++ b/src/PSRDatabaseSQLite/attribute.jl @@ -107,8 +107,7 @@ mutable struct TimeSeries{T} <: VectorAttribute group_id::String parent_collection::String table_where_is_located::String - dimension_names::Vector{String} - num_dimensions::Int + dimensions::Dict{String, VectorParameter} end mutable struct TimeSeriesFile{T} <: ReferenceToFileAttribute diff --git a/src/PSRDatabaseSQLite/collection.jl b/src/PSRDatabaseSQLite/collection.jl index 5f092f6d..90dbfee4 100644 --- a/src/PSRDatabaseSQLite/collection.jl +++ b/src/PSRDatabaseSQLite/collection.jl @@ -240,19 +240,6 @@ function _create_collection_vector_relations(db::SQLite.DB, collection_id::Strin return vector_relations end -function _get_time_series_dimension_names(df_table_infos::DataFrame) - dimension_names = Vector{String}(undef, 0) - for time_series_attribute in eachrow(df_table_infos) - if time_series_attribute.name == "id" - continue - end - if time_series_attribute.pk != 0 - push!(dimension_names, time_series_attribute.name) - end - end - return dimension_names -end - function _create_collection_time_series(db::SQLite.DB, collection_id::String) time_series_tables = _get_collection_time_series_tables(db, collection_id) time_series = OrderedDict{String, TimeSeries}() @@ -261,8 +248,43 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) group_id = _id_of_time_series_group(table_name) table_where_is_located = table_name df_table_infos = table_info(db, table_name) - dimension_names = _get_time_series_dimension_names(df_table_infos) + dimensions = Dict{String, VectorParameter}() for time_series_attribute in eachrow(df_table_infos) + if time_series_attribute.pk != 0 + # it is a dimension if it is not id + if time_series_attribute.name != "id" + id = time_series_attribute.name + type = _sql_type_to_julia_type(id, time_series_attribute.type) + default_value = _get_default_value(type, time_series_attribute.dflt_value) + not_null = true + if haskey(dimensions, id) + psr_database_sqlite_error( + "Duplicated time_series attribute \"$id\" in collection \"$collection_id\"", + ) + end + if id == "date_time" + dimensions[id] = VectorParameter( + id, + DateTime, + default_value, + not_null, + group_id, + parent_collection, + table_where_is_located, + ) + else + dimensions[id] = VectorParameter( + id, + type, + default_value, + not_null, + group_id, + parent_collection, + table_where_is_located, + ) + end + end + end id = time_series_attribute.name if id == "id" || id == "date_time" # These are obligatory for every vector table @@ -301,8 +323,7 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) group_id, parent_collection, table_where_is_located, - dimension_names, - length(dimension_names), + dimensions, ) end end diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 6473a3c9..a2784ed2 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -412,7 +412,7 @@ function _read_time_series_table( attribute::Attribute, id::Int, ) - query = string("SELECT ", join(attribute.dimension_names, ",", ", "), ", ", attribute.id) + query = string("SELECT ", join(keys(attribute.dimensions), ",", ", "), ", ", attribute.id) query *= " FROM $(attribute.table_where_is_located) WHERE id = '$id'" return DBInterface.execute(db.sqlite_db, query) |> DataFrame end diff --git a/src/PSRDatabaseSQLite/update.jl b/src/PSRDatabaseSQLite/update.jl index ccb359f1..f3d1c98a 100644 --- a/src/PSRDatabaseSQLite/update.jl +++ b/src/PSRDatabaseSQLite/update.jl @@ -399,10 +399,10 @@ function update_time_series!( ) end - if length(dimensions) != length(attribute.dimension_names) + if length(dimensions) != length(attribute.dimensions) psr_database_sqlite_error( "The number of dimensions in the time series does not match the number of dimensions in the attribute. " * - "The attribute has $(attribute.num_dimensions) dimensions: $(join(attribute.dimension_names, ", ")).", + "The attribute has $(length(attribute.dimensions)) dimensions: $(join(keys(attribute.dimensions), ", ")).", ) end diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 644bfcd7..51976afa 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -388,7 +388,7 @@ function _throw_if_data_does_not_match_group( end for dimension in dimensions_in_df - if !(dimension in collection.time_series[attributes_in_df[1]].dimension_names) + if !(dimension in keys(collection.time_series[attributes_in_df[1]].dimensions)) psr_database_sqlite_error( "The dimension \"$dimension\" is not defined in the time series group \"$group\".", ) @@ -507,10 +507,10 @@ function _validate_time_series_dimensions( dimensions..., ) for dim_name in keys(dimensions...) - if !(string(dim_name) in attribute.dimension_names) + if !(string(dim_name) in keys(attribute.dimensions)) psr_database_sqlite_error( "The dimension \"$dim_name\" is not defined in the time series attribute \"$(attribute.id)\" of collection \"$collection_id\". " * - "The available dimensions are: $(attribute.dimension_names).", + "The available dimensions are: $(keys(attribute.dimensions)).", ) end end From 5ac65a8c8d06f85a36017d2d0268713e12557dac Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 17 Jul 2024 15:52:15 -0300 Subject: [PATCH 30/34] Update docs --- docs/src/psrdatabasesqlite/rules.md | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/docs/src/psrdatabasesqlite/rules.md b/docs/src/psrdatabasesqlite/rules.md index 80086873..91a5d939 100644 --- a/docs/src/psrdatabasesqlite/rules.md +++ b/docs/src/psrdatabasesqlite/rules.md @@ -132,9 +132,9 @@ CREATE TABLE HydroPlant_vector_GaugingStation( ``` -### Time Series +### Time Series Files -- All Time Series for the elements from a Collection should be stored in a Table +- All Time Series files for the elements from a Collection should be stored in a Table - The Table name should be the same as the name of the Collection followed by `_time_series_files`, as presented below

COLLECTION_vector_ATTRIBUTE

@@ -151,6 +151,30 @@ CREATE TABLE Plant_time_series_files ( ) STRICT; ``` +### Time Series +- Time Series stored in the database should be stored in a table with the name of the Collection followed by `_time_series_` and the name of the attribute group, as presented below. + +

COLLECTION_time_series_GROUP_OF_ATTRIBUTES

+ +Notice that it is quite similar to the vector attributes, but without the `vector_index` column. +Instead, a mandatory column named `date_time` should be created to store the date of the time series data. + +Example: + +```sql +CREATE TABLE Resource_time_series_group1 ( + id INTEGER, + date_time TEXT NOT NULL, + some_vector1 REAL, + some_vector2 REAL, + FOREIGN KEY(id) REFERENCES Resource(id) ON DELETE CASCADE ON UPDATE CASCADE, + PRIMARY KEY (id, date_time) +) STRICT; +``` + +!!! tip +For more information on how to handle time series data, please refer to the [Time Series](./time_series.md) section. + ## Migrations Migrations are an important part of the `DatabaseSQLite` framework. They are used to update the database schema to a new version without the need to delete the database and create a new one from scratch. Migrations are defined by two separate `.sql` files that are stored in the `migrations` directory of the model. The first file is the `up` migration and it is used to update the database schema to a new version. The second file is the `down` migration and it is used to revert the changes made by the `up` migration. Migrations are stored in directories in the model and they have a specific naming convention. The name of the migration folder should be the number of the version (e.g. `/migrations/1/`). From fac94af7f4eab6451c437e23eeebe6041f180797 Mon Sep 17 00:00:00 2001 From: pedroripper Date: Thu, 18 Jul 2024 16:44:50 -0300 Subject: [PATCH 31/34] Revert "Update according to .dart" This reverts commit 724f02d00853717ee3c0e30c68ab4f9f57300666. --- src/PSRDatabaseSQLite/attribute.jl | 3 +- src/PSRDatabaseSQLite/collection.jl | 53 +++++++++-------------------- src/PSRDatabaseSQLite/read.jl | 2 +- src/PSRDatabaseSQLite/update.jl | 4 +-- src/PSRDatabaseSQLite/validate.jl | 6 ++-- 5 files changed, 24 insertions(+), 44 deletions(-) diff --git a/src/PSRDatabaseSQLite/attribute.jl b/src/PSRDatabaseSQLite/attribute.jl index a6a5dfce..41e92127 100644 --- a/src/PSRDatabaseSQLite/attribute.jl +++ b/src/PSRDatabaseSQLite/attribute.jl @@ -107,7 +107,8 @@ mutable struct TimeSeries{T} <: VectorAttribute group_id::String parent_collection::String table_where_is_located::String - dimensions::Dict{String, VectorParameter} + dimension_names::Vector{String} + num_dimensions::Int end mutable struct TimeSeriesFile{T} <: ReferenceToFileAttribute diff --git a/src/PSRDatabaseSQLite/collection.jl b/src/PSRDatabaseSQLite/collection.jl index 90dbfee4..5f092f6d 100644 --- a/src/PSRDatabaseSQLite/collection.jl +++ b/src/PSRDatabaseSQLite/collection.jl @@ -240,6 +240,19 @@ function _create_collection_vector_relations(db::SQLite.DB, collection_id::Strin return vector_relations end +function _get_time_series_dimension_names(df_table_infos::DataFrame) + dimension_names = Vector{String}(undef, 0) + for time_series_attribute in eachrow(df_table_infos) + if time_series_attribute.name == "id" + continue + end + if time_series_attribute.pk != 0 + push!(dimension_names, time_series_attribute.name) + end + end + return dimension_names +end + function _create_collection_time_series(db::SQLite.DB, collection_id::String) time_series_tables = _get_collection_time_series_tables(db, collection_id) time_series = OrderedDict{String, TimeSeries}() @@ -248,43 +261,8 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) group_id = _id_of_time_series_group(table_name) table_where_is_located = table_name df_table_infos = table_info(db, table_name) - dimensions = Dict{String, VectorParameter}() + dimension_names = _get_time_series_dimension_names(df_table_infos) for time_series_attribute in eachrow(df_table_infos) - if time_series_attribute.pk != 0 - # it is a dimension if it is not id - if time_series_attribute.name != "id" - id = time_series_attribute.name - type = _sql_type_to_julia_type(id, time_series_attribute.type) - default_value = _get_default_value(type, time_series_attribute.dflt_value) - not_null = true - if haskey(dimensions, id) - psr_database_sqlite_error( - "Duplicated time_series attribute \"$id\" in collection \"$collection_id\"", - ) - end - if id == "date_time" - dimensions[id] = VectorParameter( - id, - DateTime, - default_value, - not_null, - group_id, - parent_collection, - table_where_is_located, - ) - else - dimensions[id] = VectorParameter( - id, - type, - default_value, - not_null, - group_id, - parent_collection, - table_where_is_located, - ) - end - end - end id = time_series_attribute.name if id == "id" || id == "date_time" # These are obligatory for every vector table @@ -323,7 +301,8 @@ function _create_collection_time_series(db::SQLite.DB, collection_id::String) group_id, parent_collection, table_where_is_located, - dimensions, + dimension_names, + length(dimension_names), ) end end diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index a2784ed2..6473a3c9 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -412,7 +412,7 @@ function _read_time_series_table( attribute::Attribute, id::Int, ) - query = string("SELECT ", join(keys(attribute.dimensions), ",", ", "), ", ", attribute.id) + query = string("SELECT ", join(attribute.dimension_names, ",", ", "), ", ", attribute.id) query *= " FROM $(attribute.table_where_is_located) WHERE id = '$id'" return DBInterface.execute(db.sqlite_db, query) |> DataFrame end diff --git a/src/PSRDatabaseSQLite/update.jl b/src/PSRDatabaseSQLite/update.jl index f3d1c98a..ccb359f1 100644 --- a/src/PSRDatabaseSQLite/update.jl +++ b/src/PSRDatabaseSQLite/update.jl @@ -399,10 +399,10 @@ function update_time_series!( ) end - if length(dimensions) != length(attribute.dimensions) + if length(dimensions) != length(attribute.dimension_names) psr_database_sqlite_error( "The number of dimensions in the time series does not match the number of dimensions in the attribute. " * - "The attribute has $(length(attribute.dimensions)) dimensions: $(join(keys(attribute.dimensions), ", ")).", + "The attribute has $(attribute.num_dimensions) dimensions: $(join(attribute.dimension_names, ", ")).", ) end diff --git a/src/PSRDatabaseSQLite/validate.jl b/src/PSRDatabaseSQLite/validate.jl index 51976afa..644bfcd7 100644 --- a/src/PSRDatabaseSQLite/validate.jl +++ b/src/PSRDatabaseSQLite/validate.jl @@ -388,7 +388,7 @@ function _throw_if_data_does_not_match_group( end for dimension in dimensions_in_df - if !(dimension in keys(collection.time_series[attributes_in_df[1]].dimensions)) + if !(dimension in collection.time_series[attributes_in_df[1]].dimension_names) psr_database_sqlite_error( "The dimension \"$dimension\" is not defined in the time series group \"$group\".", ) @@ -507,10 +507,10 @@ function _validate_time_series_dimensions( dimensions..., ) for dim_name in keys(dimensions...) - if !(string(dim_name) in keys(attribute.dimensions)) + if !(string(dim_name) in attribute.dimension_names) psr_database_sqlite_error( "The dimension \"$dim_name\" is not defined in the time series attribute \"$(attribute.id)\" of collection \"$collection_id\". " * - "The available dimensions are: $(keys(attribute.dimensions)).", + "The available dimensions are: $(attribute.dimension_names).", ) end end From 11a7d79d23f45a925e51bf2579a8a7e42ef863ec Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Fri, 26 Jul 2024 18:04:46 -0300 Subject: [PATCH 32/34] update name to update_time_series_row --- docs/src/psrdatabasesqlite/time_series.md | 2 +- src/PSRDatabaseSQLite/update.jl | 6 +- .../test_time_series/test_time_series.jl | 14 ++-- test/PSRDatabaseSQLite/time_series.jl | 70 +++++++++++++++++++ 4 files changed, 81 insertions(+), 11 deletions(-) create mode 100644 test/PSRDatabaseSQLite/time_series.jl diff --git a/docs/src/psrdatabasesqlite/time_series.md b/docs/src/psrdatabasesqlite/time_series.md index 50cb3909..cdbb100f 100644 --- a/docs/src/psrdatabasesqlite/time_series.md +++ b/docs/src/psrdatabasesqlite/time_series.md @@ -176,7 +176,7 @@ When updating one of the entries of a time series for a given element and attrib For example, consider a time series that has `block` and `data_time` dimensions. ```julia -PSRDatabaseSQLite.update_time_series!( +PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector3", diff --git a/src/PSRDatabaseSQLite/update.jl b/src/PSRDatabaseSQLite/update.jl index ccb359f1..6e2067a5 100644 --- a/src/PSRDatabaseSQLite/update.jl +++ b/src/PSRDatabaseSQLite/update.jl @@ -352,7 +352,7 @@ function _dimension_value_exists( return true end -function _update_time_series!( +function _update_time_series_row!( db::DatabaseSQLite, attribute::Attribute, id::Integer, @@ -375,7 +375,7 @@ function _update_time_series!( return nothing end -function update_time_series!( +function update_time_series_row!( db::DatabaseSQLite, collection_id::String, attribute_id::String, @@ -406,5 +406,5 @@ function update_time_series!( ) end - return _update_time_series!(db, attribute, id, val, dimensions) + return _update_time_series_row!(db, attribute, id, val, dimensions) end diff --git a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl index 641a5b70..0612bf79 100644 --- a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl @@ -617,7 +617,7 @@ function test_update_time_series() group3 = df_time_series_group3, ) - PSRDatabaseSQLite.update_time_series!( + PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector1", @@ -626,7 +626,7 @@ function test_update_time_series() date_time = DateTime(2001), ) - PSRDatabaseSQLite.update_time_series!( + PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector2", @@ -635,7 +635,7 @@ function test_update_time_series() date_time = DateTime(2001), ) - PSRDatabaseSQLite.update_time_series!( + PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector5", @@ -646,7 +646,7 @@ function test_update_time_series() segment = 2, ) - PSRDatabaseSQLite.update_time_series!( + PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector5", @@ -657,7 +657,7 @@ function test_update_time_series() segment = 1, ) - PSRDatabaseSQLite.update_time_series!( + PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector6", @@ -668,7 +668,7 @@ function test_update_time_series() segment = 3, ) - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.update_time_series!( + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector6", @@ -678,7 +678,7 @@ function test_update_time_series() segment = 2, ) - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.update_time_series!( + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.update_time_series_row!( db, "Resource", "some_vector5", diff --git a/test/PSRDatabaseSQLite/time_series.jl b/test/PSRDatabaseSQLite/time_series.jl new file mode 100644 index 00000000..953d944c --- /dev/null +++ b/test/PSRDatabaseSQLite/time_series.jl @@ -0,0 +1,70 @@ +function test_time_series() + case_path = joinpath(@__DIR__, "data", "case_2") + if isfile(joinpath(case_path, "simplecase.sqlite")) + rm(joinpath(case_path, "simplecase.sqlite")) + end + + db = PSRI.create_study( + PSRI.PSRDatabaseSQLiteInterface(), + joinpath(case_path, "simplecase.sqlite"), + joinpath(case_path, "simple_schema.sql"); + val1 = 1, + ) + + PSRI.create_element!( + db, + "Plant"; + label = "Plant 1", + ) + + PSRI.create_element!( + db, + "Plant"; + label = "Plant 2", + ) + + iow = PSRI.open( + PSRI.OpenBinary.Writer, + joinpath(case_path, "generation"); + blocks = 3, + scenarios = 2, + stages = 12, + agents = PSRI.get_parms(db, "Plant", "label"), + unit = "MW", + ) + + for t in 1:12, s in 1:2, b in 1:3 + PSRI.write_registry(iow, [(t + s + b) * 100.0, (t + s + b) * 300.0], t, s, b) + end + + PSRI.close(iow) + + iow = PSRI.open( + PSRI.OpenBinary.Writer, + joinpath(case_path, "cost"); + blocks = 3, + scenarios = 2, + stages = 12, + agents = PSRI.get_parms(db, "Plant", "label"), + unit = "USD", + ) + + for t in 1:12, s in 1:2, b in 1:3 + PSRI.write_registry(iow, [(t + s + b) * 500.0, (t + s + b) * 400.0], t, s, b) + end + + PSRI.close(iow) + + PSRI.link_series_to_file( + db, + "Plant"; + generation = "generation", + cost = "cost", + ) + + PSRI.PSRDatabaseSQLite.close(db) + + return rm(joinpath(case_path, "simplecase.sqlite")) +end + +test_time_series() From e204ec080d90bb07115fdcf66ee61ce526163915 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Fri, 26 Jul 2024 23:16:22 -0300 Subject: [PATCH 33/34] add new add_time_series_row! method --- docs/src/psrdatabasesqlite/time_series.md | 37 ++++++ src/PSRDatabaseSQLite/create.jl | 54 +++++++++ src/PSRDatabaseSQLite/read.jl | 1 + src/PSRDatabaseSQLite/update.jl | 1 + .../test_time_series/test_time_series.jl | 114 ++++++++++++++++-- 5 files changed, 200 insertions(+), 7 deletions(-) diff --git a/docs/src/psrdatabasesqlite/time_series.md b/docs/src/psrdatabasesqlite/time_series.md index cdbb100f..9013898e 100644 --- a/docs/src/psrdatabasesqlite/time_series.md +++ b/docs/src/psrdatabasesqlite/time_series.md @@ -116,6 +116,43 @@ PSRDatabaseSQLite.create_element!( ) ``` +It is also possible to insert a single row of a time series. This is useful when you want to insert a specific dimension entry. This way of inserting time series is less efficient than inserting a whole `DataFrame`. + +```julia +using DataFrames +using Dates +using PSRClassesInterface +PSRDatabaseSQLite = PSRClassesInterface.PSRDatabaseSQLite + +db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + +PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + +PSRDatabaseSQLite.create_element!( + db, + "Resource"; + label = "Resource 1" +) + +PSRDatabaseSQLite.add_time_series_row!( + db, + "Resource", + "some_vector1", + "Resource 1", + 10.0; # new value + date_time = DateTime(2000) +) + +PSRDatabaseSQLite.add_time_series_row!( + db, + "Resource", + "some_vector1", + "Resource 1", + 11.0; # new value + date_time = DateTime(2001) +) +``` + ## Reading data You can read the information from the time series in two different ways. diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 2c9b7210..2ad7efcd 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -323,3 +323,57 @@ function _validate_attribute_types_on_creation!( ) return nothing end + +function _add_time_series_row!( + db::DatabaseSQLite, + attribute::Attribute, + id::Integer, + val, + dimensions, +) + # Adding a time series element column by column as it is implemented on this function + # is not the most efficient way to do it. In any case if the user wants to add a time + # series column by column, this function can only be implemented as an upsert statements + # for each column. This is because the user can add a value in a primary key that already + # exists in the time series. In that case the column should be updated instead of inserted. + dimensions_string = join(keys(dimensions), ", ") + values_string = "$id, " + for dim in dimensions + values_string *= "'$(dim[2])', " + end + values_string *= "'$val'" + query = """ + INSERT INTO $(attribute.table_where_is_located) (id, $dimensions_string, $(attribute.id)) + VALUES ($values_string) + ON CONFLICT(id, $dimensions_string) DO UPDATE SET $(attribute.id) = '$val' + """ + DBInterface.execute(db.sqlite_db, query) + return nothing +end + +function add_time_series_row!( + db::DatabaseSQLite, + collection_id::String, + attribute_id::String, + label::String, + val; + dimensions..., +) + if !_is_time_series(db, collection_id, attribute_id) + psr_database_sqlite_error( + "The attribute $attribute_id is not a time series.", + ) + end + attribute = _get_attribute(db, collection_id, attribute_id) + id = _get_id(db, collection_id, label) + _validate_time_series_dimensions(collection_id, attribute, dimensions) + + if length(dimensions) != length(attribute.dimension_names) + psr_database_sqlite_error( + "The number of dimensions in the time series does not match the number of dimensions in the attribute. " * + "The attribute has $(attribute.num_dimensions) dimensions: $(join(attribute.dimension_names, ", ")).", + ) + end + + return _add_time_series_row!(db, attribute, id, val, dimensions) +end \ No newline at end of file diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl index 6473a3c9..116c5d65 100644 --- a/src/PSRDatabaseSQLite/read.jl +++ b/src/PSRDatabaseSQLite/read.jl @@ -3,6 +3,7 @@ const READ_METHODS_BY_CLASS_OF_ATTRIBUTE = Dict( ScalarRelation => "read_scalar_relations", VectorParameter => "read_vector_parameters", VectorRelation => "read_vector_relations", + TimeSeries => "read_time_series_row", TimeSeriesFile => "read_time_series_file", ) diff --git a/src/PSRDatabaseSQLite/update.jl b/src/PSRDatabaseSQLite/update.jl index 6e2067a5..cb5e450b 100644 --- a/src/PSRDatabaseSQLite/update.jl +++ b/src/PSRDatabaseSQLite/update.jl @@ -3,6 +3,7 @@ const UPDATE_METHODS_BY_CLASS_OF_ATTRIBUTE = Dict( ScalarRelation => "set_scalar_relation!", VectorParameter => "update_vector_parameter!", VectorRelation => "set_vector_relation!", + TimeSeries => "update_time_series_row!", TimeSeriesFile => "set_time_series_file!", ) diff --git a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl index 0612bf79..970e0359 100644 --- a/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl +++ b/test/PSRDatabaseSQLite/test_time_series/test_time_series.jl @@ -850,24 +850,124 @@ function test_create_wrong_time_series() group1 = df_time_series_group1, ) - @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.create_element!( + PSRDatabaseSQLite.close!(db) + GC.gc() + GC.gc() + rm(db_path) + @test true + return nothing +end + +function test_add_time_series_row() + path_schema = joinpath(@__DIR__, "test_read_time_series.sql") + db_path = joinpath(@__DIR__, "test_add_time_series_row.sqlite") + db = PSRDatabaseSQLite.create_empty_db_from_schema(db_path, path_schema; force = true) + + PSRDatabaseSQLite.create_element!(db, "Configuration"; label = "Toy Case", value1 = 1.0) + + PSRDatabaseSQLite.create_element!( db, "Resource"; - label = "Resource 2", - group1 = DataFrame(), + label = "Resource 1", ) - PSRDatabaseSQLite.create_element!( + PSRDatabaseSQLite.add_time_series_row!( db, - "Resource"; - label = "Resource 2", + "Resource", + "some_vector1", + "Resource 1", + 1.0; + date_time = DateTime(2000), ) + PSRDatabaseSQLite.add_time_series_row!( + db, + "Resource", + "some_vector2", + "Resource 1", + 2.0; + date_time = DateTime(2000), + ) + + PSRDatabaseSQLite.add_time_series_row!( + db, + "Resource", + "some_vector3", + "Resource 1", + 3.0; + date_time = DateTime(2001), + block = 1, + ) + + PSRDatabaseSQLite.add_time_series_row!( + db, + "Resource", + "some_vector4", + "Resource 1", + 4.0; + date_time = DateTime(2001), + block = 1, + ) + + # Attribute is not a time series + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.add_time_series_row!( + db, + "Resource", + "label", + "Resource 1", + 4.0; + date_time = DateTime(2001), + ) + + # Wrong dimensions + @test_throws PSRDatabaseSQLite.DatabaseException PSRDatabaseSQLite.add_time_series_row!( + db, + "Resource", + "some_vector1", + "Resource 1", + 4.0; + date_time = DateTime(2001), + block = 1, + segment = 1, + ) + + df_some_vector1 = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + "some_vector1", + "Resource 1", + ) + + df_some_vector2 = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + "some_vector2", + "Resource 1", + ) + + df_some_vector3 = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + "some_vector3", + "Resource 1", + ) + + df_some_vector4 = PSRDatabaseSQLite.read_time_series_table( + db, + "Resource", + "some_vector4", + "Resource 1", + ) + + @test df_some_vector1[1, :some_vector1] == 1.0 + @test df_some_vector2[1, :some_vector2] == 2.0 + @test df_some_vector3[1, :some_vector3] == 3.0 + @test df_some_vector4[1, :some_vector4] == 4.0 + PSRDatabaseSQLite.close!(db) GC.gc() GC.gc() rm(db_path) - @test true return nothing end From b27311f5ee25570df64072671ec1138e775b5d0a Mon Sep 17 00:00:00 2001 From: pedroripper Date: Wed, 7 Aug 2024 16:40:41 -0300 Subject: [PATCH 34/34] Format --- src/PSRDatabaseSQLite/create.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PSRDatabaseSQLite/create.jl b/src/PSRDatabaseSQLite/create.jl index 2ad7efcd..4ac58ac7 100644 --- a/src/PSRDatabaseSQLite/create.jl +++ b/src/PSRDatabaseSQLite/create.jl @@ -376,4 +376,4 @@ function add_time_series_row!( end return _add_time_series_row!(db, attribute, id, val, dimensions) -end \ No newline at end of file +end