diff --git a/NAMESPACE b/NAMESPACE index 4474ba1..818b965 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export("%>%") export(BIAS) +export(DataBook) export(DataSheet) export(EDI) export(EDS) @@ -22,6 +23,7 @@ export(VE) export(count_calc) export(cp) export(d) +export(get_summary_calculation_names) export(hss) export(instat_comment) export(link) @@ -29,7 +31,9 @@ export(mNSE) export(mae) export(md) export(me) +export(missing_values_check) export(mse) +export(na_check) export(nrmse) export(p10) export(p20) @@ -78,6 +82,7 @@ export(summary_median_absolute_deviation) export(summary_median_circular) export(summary_min) export(summary_min_circular) +export(summary_mode) export(summary_n_distinct) export(summary_nth) export(summary_outlier_limit) diff --git a/R/data_book.R b/R/data_book.R index afc5e78..cd4bb5f 100644 --- a/R/data_book.R +++ b/R/data_book.R @@ -225,13 +225,11 @@ #' \item{\code{define_as_options_by_context(data_name, obyc_types = NULL, key_columns = NULL)}}{Define options by context for a specified dataset.} #' \item{\code{display_daily_table(data_name, climatic_element, date_col, year_col, station_col, Misscode, Tracecode, Zerocode, monstats = c("min", "mean", "median", "max", "IQR", "sum"))}}{Display a daily summary table for a specified climatic data element.} #' -#' # from instat_comment.R in R-Instat #' \item{\code{add_comment(new_comment)}}{Adds a new `instat_comment` object to the data sheet if the key is defined and valid.} #' \item{\code{delete_comment(comment_id)}}{Deletes a comment from the data sheet based on the comment ID.} #' \item{\code{get_comment_ids()}}{Retrieves all comment IDs currently stored in the data sheet.} #' \item{\code{get_comments_as_data_frame()}}{Converts all comments in the data sheet to a data frame format for easier inspection and analysis.} #' -#' # from link.R in R-Instat #' \item{\code{update_links_rename_data_frame(old_data_name, new_data_name)}}{This function updates all links that reference a data frame with a specified old name, renaming it to a new name.} #' \item{\code{update_links_rename_column(data_name, old_column_name, new_column_name)}}{This function updates all links referencing a column in a data frame with a specified old column name, renaming it to a new column name.} #' \item{\code{add_link(from_data_frame, to_data_frame, link_pairs, type, link_name)}}{This function adds a new link between two data frames with the specified link pairs and type. It will check if the link already exists or if the link columns are keys.} @@ -247,7 +245,6 @@ #' \item{\code{link_between_containing(from_data_frame, containing_columns, to_data_frame)}}{This function returns columns in `to_data_frame` corresponding to `containing_columns` in `from_data_frame` if a link exists between them.} #' \item{\code{view_link(link_name)}}{Displays the details of a specified link.} #' -#' # from calculations.R in R-Instat #' \item{\code{apply_calculation(calc)}{Apply a Calculation to Data in the DataBook}} #' \item{\code{save_calculation(end_data_frame, calc)}{Save a Calculation to a Data Frame}} #' \item{\code{apply_instat_calculation(calc, curr_data_list, previous_manipulations = list(), param_list = list())}{Apply an Instat Calculation}} @@ -256,7 +253,6 @@ #' \item{\code{get_link_columns_from_data_frames(first_data_frame_name, first_data_frame_columns, second_data_frame_name, second_data_frame_columns)}{Get Link Columns Between Data Frames}} #' \item{\code{save_calc_output(calc, curr_data_list, previous_manipulations)}{Save the Output of a Calculation}} #' -#' # from summary_functions.R in R-Instat #' \item{\code{append_summaries_to_data_object(out, data_name, columns_to_summarise, summaries, factors = c(), summary_name, calc, calc_name = "")}{Append Summaries to a Data Object}} #' \item{\code{calculate_summary(data_name, columns_to_summarise = NULL, summaries, factors = c(), store_results = TRUE, drop = TRUE, return_output = FALSE, summary_name = NA, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, perc_return_all = FALSE, include_counts_with_percentage = FALSE, silent = FALSE, additional_filter, original_level = FALSE, signif_fig = 2, sep = "_", ...)}{Calculate Summaries for a Data Object}} #' \item{\code{summary(data_name, columns_to_summarise, summaries, factors = c(), store_results = FALSE, drop = FALSE, return_output = FALSE, summary_name = NA, add_cols = c(), filter_names = c(), ...)}{Perform and Return Summaries for a Data Object}} @@ -265,7 +261,6 @@ #' @export DataBook <- R6::R6Class("DataBook", public = list( - #' @field initialize Initialise for Public List #' @description Initialize a new DataBook object. #' @param data_tables A list of data frames to be included in the DataBook. #' @param instat_obj_metadata Metadata for the instat object. @@ -302,7 +297,7 @@ DataBook <- R6::R6Class("DataBook", }, #' @description - #' Standardize country names in the specified data table. + #' Standardise country names in the specified data table. #' @param data_name The name of the data table. #' @param country_columns A vector of column names containing country data. standardise_country_names = function(data_name, country_columns = c()) { @@ -636,6 +631,8 @@ DataBook <- R6::R6Class("DataBook", #' @param include_column_selections A boolean to include column selections in the clone. #' @param include_calculations A boolean to include calculations in the clone. #' @param include_comments A boolean to include comments in the clone. + #' @param include_scalars A boolean to include scalars in the clone. + #' @param ... Additional arguments passed to other methods. clone_data_object = function(curr_data_object, include_objects = TRUE, include_metadata = TRUE, include_logs = TRUE, include_filters = TRUE, include_column_selections = TRUE, include_calculations = TRUE, include_comments = TRUE, include_scalars = TRUE, ...) { curr_names <- names(curr_data_object) if("get_data_frame" %in% curr_names) new_data <- curr_data_object$get_data_frame(use_current_filter = FALSE) @@ -681,6 +678,7 @@ DataBook <- R6::R6Class("DataBook", #' @description #' Clones an instat calculation with manipulations and sub-calculations. #' @param curr_instat_calculation The current instat calculation to be cloned. + #' @param ... Additional arguments passed to other methods. clone_instat_calculation = function(curr_instat_calculation, ...) { new_manips <- lapply(curr_instat_calculation$manipulations, function(x) self$clone_instat_calculation(x)) new_subs <- lapply(curr_instat_calculation$sub_calculations, function(x) self$clone_instat_calculation(x)) @@ -760,7 +758,7 @@ DataBook <- R6::R6Class("DataBook", #' Retrieve data objects from the DataBook by name. #' @param data_name The name or index of the data object(s) to retrieve. #' @param as_list A boolean to return the data objects as a list (default: FALSE). - #' @param ... Additional arguments. + #' @param ... Additional arguments passed to other methods. get_data_objects = function(data_name, as_list = FALSE, ...) { if (missing(data_name)) { return(private$.data_sheets) @@ -783,7 +781,7 @@ DataBook <- R6::R6Class("DataBook", #' @param stack_data A boolean to stack data (default: FALSE). #' @param include_hidden_columns A boolean to include hidden columns (default: TRUE). #' @param use_current_filter A boolean to apply the current filter (default: TRUE). - #' @param ... Additional arguments. + #' @param ... Additional arguments passed to other methods. get_data_frame = function(data_name, convert_to_character = FALSE, stack_data = FALSE, include_hidden_columns = TRUE, use_current_filter = TRUE, ...) { if (!stack_data) { @@ -820,7 +818,7 @@ DataBook <- R6::R6Class("DataBook", #' @param convert_to_character A boolean indicating whether to convert data to character type (default: FALSE). #' @param property The specific property to retrieve. #' @param column The column for which metadata is to be retrieved. - #' @param ... Additional arguments. + #' @param ... Additional arguments passed to other methods. get_variables_metadata = function(data_name, data_type = "all", convert_to_character = FALSE, property, column, ...) { if (missing(data_name)) { @@ -898,6 +896,7 @@ DataBook <- R6::R6Class("DataBook", #' @description #' Retrieve metadata for a specific property. #' @param name The name of the metadata to retrieve. + #' @param ... Additional arguments passed to other methods. get_metadata = function(name, ...) { if (missing(name)) return(private$.metadata) if (!is.character(name)) stop("name must be a character") @@ -912,6 +911,7 @@ DataBook <- R6::R6Class("DataBook", #' @param exclude A vector of names to exclude. #' @param excluded_items A vector of excluded items. #' @param include_hidden A boolean indicating whether to include hidden items. + #' @param ... Additional arguments passed to other methods. get_data_names = function(as_list = FALSE, include, exclude, excluded_items, include_hidden = TRUE, ...) { ret <- names(private$.data_sheets) if(!include_hidden) { @@ -999,6 +999,7 @@ DataBook <- R6::R6Class("DataBook", #' @param data_name The name of the data table. #' @param as_list A boolean indicating whether to return results as a list. #' @param excluded_items A vector of excluded items. + #' @param ... Additional arguments passed to other methods. get_scalar_names = function(data_name, as_list = FALSE, excluded_items = c(), ...) { if (is.null(data_name) || identical(data_name, overall_label)) { out <- @@ -1180,6 +1181,7 @@ DataBook <- R6::R6Class("DataBook", #' @param data_name The name of the data frame. #' @param object_type_label The label for the object type. #' @param as_list A boolean indicating if the output should be a list. + #' @param ... Additional arguments passed to other methods. #' @return A vector of object names. get_object_names = function(data_name = NULL, object_type_label = NULL, @@ -1641,7 +1643,7 @@ DataBook <- R6::R6Class("DataBook", #' @param .cols Optional columns to rename. #' @param new_column_names_df Data frame with new column names. #' @param new_labels_df Data frame with new labels. - #' @param ... Additional parameters. + #' @param ... Additional arguments passed to other methods. rename_column_in_data = function(data_name, column_name = NULL, new_val = NULL, label = "", type = "single", .fn, .cols = everything(), new_column_names_df, new_labels_df, ...) { self$get_data_objects(data_name)$rename_column_in_data(column_name, new_val, label, type, .fn, .cols, new_column_names_df, new_labels_df, ...) self$update_links_rename_column(data_name = data_name, old_column_name = column_name, new_column_name = new_val) @@ -1929,7 +1931,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$convert_column_to_type(col_names = col_names, to_type = to_type, factor_values = factor_values, set_digits = set_digits, set_decimals = set_decimals, keep_attr = keep_attr, ignore_labels = ignore_labels, keep.labels = keep.labels) }, - #' Append Property to Variables Metadata #' @description Appends a new property and its value to the metadata of specified columns in a data table. #' @param data_name The name of the data table. #' @param col_names A vector of column names to which the property should be appended. @@ -1940,7 +1941,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$append_to_variables_metadata(col_names, property, new_val) }, - #' Append Property to Dataframe Metadata #' @description Appends a new property and its value to the metadata of a data table. #' @param data_name The name of the data table. #' @param property The name of the property to append. @@ -1950,7 +1950,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$append_to_metadata(property, new_val) }, - #' Append Property to Metadata #' @description Appends a new property and its value to the metadata of the current object. #' @param property The name of the property to append. #' @param new_val The value of the property to append. Default is an empty string. @@ -1967,7 +1966,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Add Metadata Field #' @description Adds a new metadata field and its value to the specified data table or all data tables. #' @param data_name The name of the data table. Use overall_label to apply to all data tables. #' @param property The name of the property to append. @@ -1982,7 +1980,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Reorder Dataframes #' @description Reorders the dataframes in the object according to the specified order. #' @param data_frames_order A vector specifying the new order of dataframes. #' @return None @@ -1993,7 +1990,6 @@ DataBook <- R6::R6Class("DataBook", self$data_objects_changed <- TRUE }, - #' Copy Columns #' @description Copies specified columns from a data table to another location or clipboard. #' @param data_name The name of the data table. #' @param col_names A vector of column names to copy. @@ -2008,7 +2004,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Drop Unused Factor Levels #' @description Drops unused levels from a factor column in the specified data table. #' @param data_name The name of the data table. #' @param col_name The name of the column. @@ -2017,7 +2012,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$drop_unused_factor_levels(col_name = col_name) }, - #' Set Factor Levels #' @description Sets new levels for a factor column in the specified data table. #' @param data_name The name of the data table. #' @param col_name The name of the column. @@ -2029,7 +2023,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_factor_levels(col_name = col_name, new_labels = new_labels, new_levels = new_levels, set_new_labels = set_new_labels) }, - #' Edit Factor Level #' @description Edits a level in a factor column in the specified data table. #' @param data_name The name of the data table. #' @param col_name The name of the column. @@ -2040,7 +2033,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$edit_factor_level(col_name = col_name, old_level = old_level, new_level = new_level) }, - #' Set Factor Reference Level #' @description Sets the reference level for a factor column in the specified data table. #' @param data_name The name of the data table. #' @param col_name The name of the column. @@ -2050,7 +2042,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_factor_reference_level(col_name = col_name, new_ref_level = new_ref_level) }, - #' Get Column Count #' @description Returns the number of columns in the specified data table. #' @param data_name The name of the data table. #' @param use_column_selection Boolean flag to use column selection. Default is FALSE. @@ -2059,7 +2050,6 @@ DataBook <- R6::R6Class("DataBook", return(self$get_data_objects(data_name)$get_column_count(use_column_selection)) }, - #' Reorder Factor Levels #' @description Reorders the levels of a factor column in the specified data table. #' @param data_name The name of the data table. #' @param col_name The name of the column. @@ -2069,7 +2059,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$reorder_factor_levels(col_name = col_name, new_level_names = new_level_names) }, - #' Get Data Type #' @description Returns the data type of the specified column in the given data table. #' @param data_name The name of the data table. #' @param col_name The name of the column. @@ -2078,7 +2067,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$get_data_type(col_name = col_name) }, - #' Copy Data Frame #' @description Copies a data frame to a new name or clipboard. #' @param data_name The name of the data table. #' @param new_name The new name for the copied data frame. @@ -2103,7 +2091,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Copy Column Metadata to Clipboard #' @description Copies the metadata of specified columns to the clipboard. #' @param data_name The name of the data table. #' @param property_names A vector of property names to copy. Default is all properties. @@ -2116,7 +2103,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Copy Data Frame Metadata to Clipboard #' @description Copies the metadata of the specified data table to the clipboard. #' @param data_name The name of the data table. #' @param property_names A vector of property names to copy. Default is all properties. @@ -2129,7 +2115,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Copy to Clipboard #' @description Copies the specified content to the clipboard. #' @param content The content to copy to the clipboard. #' @return None @@ -2137,7 +2122,6 @@ DataBook <- R6::R6Class("DataBook", clipr::write_clip(content = content) }, - #' Set Hidden Columns #' @description Sets the specified columns as hidden in the given data table. #' @param data_name The name of the data table. #' @param col_names A vector of column names to set as hidden. @@ -2146,7 +2130,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_hidden_columns(col_names = col_names) }, - #' Unhide All Columns #' @description Unhides all columns in the specified data table or all data tables if data_name is missing. #' @param data_name The name of the data table. If missing, applies to all data tables. #' @return None @@ -2158,7 +2141,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Set Hidden Data Frames #' @description Sets the specified data tables as hidden. #' @param data_names A vector of data table names to set as hidden. #' @return None @@ -2168,7 +2150,6 @@ DataBook <- R6::R6Class("DataBook", invisible(sapply(unhide_data_names, function(x) self$append_to_dataframe_metadata(data_name = x, property = is_hidden_label, new_val = FALSE))) }, - #' Get Hidden Data Frames #' @description Returns a list of hidden data tables. #' @return A vector of hidden data table names. get_hidden_data_frames = function() { @@ -2178,7 +2159,6 @@ DataBook <- R6::R6Class("DataBook", return(hidden_data_names) }, - #' Set Row Names #' @description Sets the row names for the specified data table. #' @param data_name The name of the data table. #' @param row_names A vector of row names to set. @@ -2187,7 +2167,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_row_names(row_names = row_names) }, - #' Get Row Names #' @description Returns the row names of the specified data table. #' @param data_name The name of the data table. #' @return A vector of row names. @@ -2195,7 +2174,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$get_row_names() }, - #' Set Protected Columns #' @description Sets the specified columns as protected in the given data table. #' @param data_name The name of the data table. #' @param col_names A vector of column names to set as protected. @@ -2204,7 +2182,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_protected_columns(col_names = col_names) }, - #' Get Metadata Fields #' @description Returns the metadata fields of the specified data table. #' @param data_name The name of the data table. #' @param include_overall Boolean flag to include overall metadata fields. Default is TRUE. @@ -2241,7 +2218,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Freeze Columns #' @description Freezes the specified columns in the given data table. #' @param data_name The name of the data table. #' @param column A vector of column names to freeze. @@ -2250,7 +2226,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$freeze_columns(column = column) }, - #' Unfreeze Columns #' @description Unfreezes all columns in the specified data table. #' @param data_name The name of the data table. #' @return None @@ -2258,7 +2233,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$unfreeze_columns() }, - #' Is Variables Metadata #' @description Checks if the specified property is metadata for the given columns in the data table. #' @param data_name The name of the data table. #' @param property The name of the property to check. @@ -2269,7 +2243,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$is_variables_metadata(property, column, return_vector) }, - #' Data Frame Exists #' @description Checks if the specified data table exists. #' @param data_name The name of the data table. #' @return A boolean value indicating if the data table exists. @@ -2277,7 +2250,6 @@ DataBook <- R6::R6Class("DataBook", return(data_name %in% names(private$.data_sheets)) }, - #' Add Key #' @description Adds a key to the specified columns in the given data table. #' @param data_name The name of the data table. #' @param col_names A vector of column names to add as keys. @@ -2290,7 +2262,6 @@ DataBook <- R6::R6Class("DataBook", invisible(sapply(self$get_data_objects(), function(x) if(!x$is_metadata(is_linkable)) x$append_to_metadata(is_linkable, FALSE))) }, - #' Is Key #' @description Checks if the specified columns are keys in the given data table. #' @param data_name The name of the data table. #' @param col_names A vector of column names to check. @@ -2299,7 +2270,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$is_key(col_names) }, - #' Has Key #' @description Checks if the specified data table has a key. #' @param data_name The name of the data table. #' @return A boolean value indicating if the data table has a key. @@ -2307,7 +2277,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$has_key() }, - #' Get Keys #' @description Returns the keys of the specified data table. #' @param data_name The name of the data table. #' @param key_name The name of the key. Default is all keys. @@ -2316,7 +2285,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$get_keys(key_name) }, - #' Add New Comment #' @description Adds a new comment to the specified row and column in the given data table. #' @param data_name The name of the data table. #' @param row The name of the row. @@ -2357,7 +2325,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(".comment")$set_data(new_data = comment_df) }, - #' Get Comments #' @description Returns the comments for the specified data table and comment ID. #' @param data_name The name of the data table. #' @param comment_id The ID of the comment. @@ -2366,9 +2333,9 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$get_comments(comment_id) }, - #' Get Links #' @description Returns the links for the specified link name or all links. #' @param link_name The name of the link. Default is all links. + #' @param ... Additional arguments passed to other methods. #' @return A list of links. get_links = function(link_name, ...) { if(!missing(link_name)) { @@ -2379,7 +2346,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Set Structure Columns #' @description Sets the structure columns for the specified data table. #' @param data_name The name of the data table. #' @param struc_type_1 A vector of column names for the first structure type. @@ -2390,7 +2356,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_structure_columns(struc_type_1, struc_type_2, struc_type_3) }, - #' Add Dependent Columns #' @description Adds dependent columns to the specified columns in the given data table. #' @param data_name The name of the data table. #' @param columns A vector of column names to add dependents to. @@ -2400,7 +2365,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$add_dependent_columns(columns, dependent_cols) }, - #' Set Column Colours #' @description Sets the colours for the specified columns in the given data table. #' @param data_name The name of the data table. #' @param columns A vector of column names to set colours for. @@ -2410,7 +2374,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_column_colours(columns, colours) }, - #' Has Colours #' @description Checks if the specified columns have colours in the given data table. #' @param data_name The name of the data table. #' @param columns A vector of column names to check. @@ -2419,7 +2382,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$has_colours(columns) }, - #' Remove Column Colours #' @description Removes the colours from all columns in the specified data table. #' @param data_name The name of the data table. #' @return None @@ -2427,7 +2389,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$remove_column_colours() }, - #' Set Column Colours by Metadata #' @description Sets the colours for the specified columns based on metadata in the given data table. #' @param data_name The name of the data table. #' @param columns A vector of column names to set colours for. @@ -2437,7 +2398,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_column_colours_by_metadata(data_name, columns, property) }, - #' Graph One Variable #' @description Creates a graph for a single variable in the specified data table. #' @param data_name The name of the data table. #' @param columns A vector of column names to graph. @@ -2448,13 +2408,12 @@ DataBook <- R6::R6Class("DataBook", #' @param free_scale_axis Boolean flag to allow free scaling of axes. Default is FALSE. #' @param ncol The number of columns in the output. Default is NULL. #' @param coord_flip Boolean flag to flip coordinates. Default is FALSE. - #' @param ... Additional arguments passed to the graph function. + #' @param ... Additional arguments passed to other methods. #' @return None graph_one_variable = function(data_name, columns, numeric = "geom_boxplot", categorical = "geom_bar", character = "geom_bar", output = "facets", free_scale_axis = FALSE, ncol = NULL, coord_flip = FALSE, ...) { self$get_data_objects(data_name)$graph_one_variable(columns = columns, numeric = numeric, categorical = categorical, output = output, free_scale_axis = free_scale_axis, ncol = ncol, coord_flip = coord_flip, ... = ...) }, - #' Make Date YearMonthDay #' @description Creates a date column from year, month, and day columns in the specified data table. #' @param data_name The name of the data table. #' @param year The name of the year column. @@ -2470,7 +2429,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$make_date_yearmonthday(year = year, month = month, day = day, f_year = f_year, f_month = f_month, f_day = f_day, year_format = year_format, month_format = month_format) }, - #' Make Date YearDoY #' @description Creates a date column from year and day of year columns in the specified data table. #' @param data_name The name of the data table. #' @param year The name of the year column. @@ -2482,7 +2440,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$make_date_yeardoy(year = year, doy = doy, base = base, doy_typical_length = doy_typical_length) }, - #' Set Contrasts of Factor #' @description Sets the contrasts for a factor column in the specified data table. #' @param data_name The name of the data table. #' @param col_name The name of the column. @@ -2493,7 +2450,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_contrasts_of_factor(col_name = col_name, new_contrasts = new_contrasts, defined_contr_matrix = defined_contr_matrix) }, - #' Create Factor Data Frame #' @description Creates a new data frame for a factor column in the specified data table. #' @param data_name The name of the data table. #' @param factor The name of the factor column. @@ -2538,7 +2494,6 @@ DataBook <- R6::R6Class("DataBook", self$add_link(from_data_frame = data_name, to_data_frame = factor_data_frame_name, link_pairs = factor, type = keyed_link_label) }, - #' Split Date #' @description Splits a date column into multiple date components in the specified data table. #' @param data_name The name of the data table. #' @param col_name The name of the date column. @@ -2903,6 +2858,7 @@ DataBook <- R6::R6Class("DataBook", #' Get variable names from a NetCDF file. #' @param file The path to the NetCDF file. #' @param as_list A boolean indicating whether to return results as a list. + #' @param ... Additional arguments passed to other methods. #' @return A list or vector of variable names from the NetCDF file. get_nc_variable_names = function(file = "", as_list = FALSE, ...) { if(file == "") { @@ -4132,6 +4088,7 @@ DataBook <- R6::R6Class("DataBook", #' @param rows_changed The rows that have been modified. #' @param comments_list A list of comments for changes made. #' @param add_flags Logical indicating whether to add flags. + #' @param ... Additional arguments passed to other methods. save_data_entry_data = function(data_name, new_data, rows_changed, comments_list = list(), add_flags = FALSE, ...) { if (!missing(comments_list)) { @@ -4318,7 +4275,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$display_daily_table(data_name = data_name, climatic_element = climatic_element, date_col = date_col, year_col =year_col, station_col = station_col, Misscode = Misscode, Tracecode = Tracecode, Zerocode = Zerocode, monstats = monstats) }, - #' Add a Comment to Data Sheet #' @description Adds a new `instat_comment` object to the data sheet if the key is defined and valid. #' @param new_comment An `instat_comment` object to be added to the data sheet. #' @details This function first checks if a key is defined and valid for the data sheet. @@ -4329,7 +4285,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$add_comment(new_comment) }, - #' Delete a Comment from Data Sheet #' @description Deletes a comment from the data sheet based on the comment ID. #' @param comment_id A character string representing the ID of the comment to be deleted. #' @details If the specified comment ID does not exist in the data sheet, an error is thrown. @@ -4338,14 +4293,12 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$delete_comment(comment_id) }, - #' Get All Comment IDs #' @description Retrieves all comment IDs currently stored in the data sheet. #' @return A character vector containing the IDs of all comments in the data sheet. get_comment_ids = function() { return(self$get_data_objects(data_name)$get_comment_ids()) }, - #' Get Comments as Data Frame #' @description Converts all comments in the data sheet to a data frame format for easier inspection and analysis. #' @details This function collects various fields from each comment and returns them in a data frame. #' The number of replies and attributes for each comment is also included. @@ -4371,8 +4324,6 @@ DataBook <- R6::R6Class("DataBook", self$get_data_objects(data_name)$set_options_by_context_types(obyc_types = obyc_types, key_columns = key_columns) }, - #' Update links to rename data frame - #' #' @description This function updates all links that reference a data frame with a specified old name, #' renaming it to a new name. #' @@ -4398,8 +4349,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Add a new link between data frames - #' #' @description This function adds a new link between two data frames with the specified link pairs and type. #' It will check if the link already exists or if the link columns are keys. #' @@ -4480,8 +4429,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Get link names - #' #' @description Retrieves the names of all links involving a specified data frame, with options to include or exclude specific types. #' #' @param data_name The name of the data frame @@ -4508,8 +4455,6 @@ DataBook <- R6::R6Class("DataBook", else return(out) }, - #' Check if a link exists from a data frame - #' #' @description Verifies if a link exists from a specific data frame with given link pairs. #' #' @param curr_data_frame The name of the originating data frame @@ -4529,8 +4474,6 @@ DataBook <- R6::R6Class("DataBook", return(FALSE) }, - #' Check if a link exists between two data frames - #' #' @description This function checks if there is an ordered or unordered link between two specified data frames. #' #' @param from_data_frame The name of the originating data frame @@ -4548,8 +4491,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Get the link definition between two data frames - #' #' @description Retrieves the link definition between two specified data frames. #' #' @param from_data_frame The name of the originating data frame @@ -4573,8 +4514,6 @@ DataBook <- R6::R6Class("DataBook", return(NULL) }, - #' Check if a Link Exists from One Data Frame to Another with Specified Columns - #' #' @description This function checks if a link exists from `first_data_frame` to `second_data_frame` #' using the specified `link_pairs` columns. #' @@ -4597,8 +4536,6 @@ DataBook <- R6::R6Class("DataBook", return(FALSE) }, - #' Retrieve Names of Linked Data Frames - #' #' @description This function returns the names of data frames linked to `from_data_frame`. #' Optionally, includes `from_data_frame` itself in the output if `include_self` is TRUE. #' Filters results by `link_cols`, if provided. @@ -4627,8 +4564,6 @@ DataBook <- R6::R6Class("DataBook", return(unique(out)) }, - #' Get the Linked Data Frame and Matching Columns for a Link - #' #' @description This function returns a list of the target data frame and matched columns. #' #' @param from_data_frame Name of the source data frame. @@ -4651,8 +4586,6 @@ DataBook <- R6::R6Class("DataBook", return(list()) }, - #' Recursively Search for Linked Data Frame Definitions - #' #' @description This function attempts to find a linked data frame that matches `link_pairs`. #' Recursively explores links between multiple data frames. #' @@ -4696,8 +4629,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Retrieve Equivalent Columns in Linked Data Frames - #' #' @description This function returns columns in `to_data_name` equivalent to `columns` in `from_data_name`. #' Recursively searches links between multiple data frames. #' @@ -4738,8 +4669,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Retrieve Columns in a Link Containing Specified Columns - #' #' @description This function returns columns in `to_data_frame` corresponding to `containing_columns` in `from_data_frame` #' if a link exists between them. #' @@ -4776,7 +4705,6 @@ DataBook <- R6::R6Class("DataBook", return(c()) }, - #' View a specific link by name #' @description Displays the details of a specified link. #' @param link_name The name of the link to view view_link = function(link_name) { @@ -4791,7 +4719,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Apply a Calculation to Data in the DataBook #' @description This method applies a given calculation to the data stored in the `DataBook` object. #' It supports various calculation types (e.g., "summary") and includes options for storing #' and returning results. @@ -4815,8 +4742,6 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Save a Calculation to a Data Frame - #' #' @description This method saves a calculation to a specific data frame within the `DataBook` object. #' The calculation is stored in the designated data frame's calculation registry for #' future reference and reuse. @@ -4844,9 +4769,7 @@ DataBook <- R6::R6Class("DataBook", }, - #' Apply an Instat Calculation - #' - #' This method performs a calculation or series of calculations (including sub-calculations) on data + #' @description This method performs a calculation or series of calculations (including sub-calculations) on data #' within the `DataBook` object. It supports recursive calls for managing dependencies between #' manipulations and sub-calculations. #' This method is called recursively, and it would not be called by a user, another function would always handle the output and display @@ -5278,7 +5201,6 @@ DataBook <- R6::R6Class("DataBook", }, - #' Run an Instat Calculation and Display Results #' @description This method runs a specified calculation using `apply_instat_calculation` and #' displays the results if required. It serves as the primary interface for #' triggering calculations within the `DataBook`. @@ -5294,9 +5216,7 @@ DataBook <- R6::R6Class("DataBook", if(display) return(out$data) }, - #' Get Corresponding Link Columns - #' - #' This function identifies corresponding link columns between two data frames + #' @description This function identifies corresponding link columns between two data frames #' within the `DataBook` object. It checks for existing links and maps column #' names between the two data frames based on their relationship. #' @@ -5345,8 +5265,6 @@ DataBook <- R6::R6Class("DataBook", return(by) }, - #' Get Link Columns Between Data Frames - #' #' @description This function finds a link between two data frames within the `DataBook` object and #' returns the corresponding columns to use for linking. It ensures the link is valid #' by checking that the columns exist in both data frames. @@ -5396,8 +5314,6 @@ DataBook <- R6::R6Class("DataBook", return(by) }, - #' Save the Output of a Calculation - #' #' @description This method saves the output of a calculation to the appropriate data frame #' within the `DataBook` object. It manages links and metadata associated with #' the calculation. @@ -5575,8 +5491,6 @@ DataBook <- R6::R6Class("DataBook", self$save_calculation(to_data_name, calc) }, - #' Append Summaries to a Data Object - #' #' @description This method appends the results of a summary calculation to a data object #' in the `DataBook`. If a corresponding summary data object exists, the method #' merges the new summary into it. Otherwise, it creates a new summary data object. @@ -5648,28 +5562,34 @@ DataBook <- R6::R6Class("DataBook", self$append_to_variables_metadata(summary_name, calc_out_columns, dependencies_label, dependencies_cols) }, - #' Calculate Summaries for a Data Object - #' - #' @description This method performs summary calculations on specified columns of a data object, optionally grouped by factors, - #' and stores the results in the `DataBook`. - #' - #' @param data_name A string specifying the name of the data object to summarise. - #' @param columns_to_summarise A character vector of columns to summarise. If `NULL`, the first column is used for counts. - #' @param summaries A character vector specifying the summary functions to apply (e.g., `"mean"`, `"sum"`). - #' @param factors A character vector of grouping factors. Default is `c()`. - #' @param store_results Logical. If `TRUE`, the results are stored in the `DataBook`. Default is `TRUE`. - #' @param drop Logical. Whether to drop unused factor levels. Default is `TRUE`. - #' @param return_output Logical. If `TRUE`, returns the summary results. Default is `FALSE`. - #' @param summary_name A string specifying the name of the summary data object. Default is `NA`. - #' @param ... Additional arguments passed to the summary functions. - #' - #' @return If `return_output = TRUE`, a data frame containing the summary results; otherwise, `NULL`. - #' - #' @details - #' - Supports percentage calculations through the `percentage_type` parameter (e.g., `"none"`, `"factors"`, `"columns"`). - #' - Handles weighted summaries and additional filters if specified. - #' - Groups data by factors before applying the summary functions. - calculate_summary = function(data_name, columns_to_summarise = NULL, summaries, factors = c(), store_results = TRUE, drop = TRUE, return_output = FALSE, summary_name = NA, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, perc_return_all = FALSE, include_counts_with_percentage = FALSE, silent = FALSE, additional_filter, original_level = FALSE, signif_fig = 2, sep = "_", ...) { + #' @description Computes summary statistics for a dataset based on specified columns, summaries, and grouping factors. + #' Supports flexible percentage calculations, handling of missing values, and result storage. + #' + #' @param data_name A character string representing the name of the dataset to summarize. + #' @param columns_to_summarise Optional. A character vector of column names to summarize. Defaults to `NULL`. + #' @param summaries A vector of summary functions to apply to the data. + #' @param factors A character vector of factor column names for grouping. Defaults to an empty vector. + #' @param store_results Logical. If `TRUE`, stores intermediate results. Defaults to `TRUE`. + #' @param drop Logical. If `TRUE`, drops unused factor levels. Defaults to `TRUE`. + #' @param return_output Logical. If `TRUE`, returns the summary output. Defaults to `FALSE`. + #' @param summary_name A character string for naming the summary. Defaults to `NA`. + #' @param result_names Optional. A character vector for naming summary results. Defaults to `NULL`. + #' @param percentage_type Character. Type of percentages to calculate ("none", "factors", "columns", "filter"). Defaults to `"none"`. + #' @param perc_total_columns Optional. Columns to use for total percentage calculations. Defaults to `NULL`. + #' @param perc_total_factors A character vector of factors to use for total percentage calculations. Defaults to an empty vector. + #' @param perc_total_filter Optional. A filter condition for percentage calculations. Defaults to `NULL`. + #' @param perc_decimal Logical. If `TRUE`, displays percentages in decimal format. Defaults to `FALSE`. + #' @param perc_return_all Logical. If `TRUE`, returns all percentage-related columns. Defaults to `FALSE`. + #' @param include_counts_with_percentage Logical. If `TRUE`, includes counts alongside percentages. Defaults to `FALSE`. + #' @param silent Logical. If `TRUE`, suppresses warnings. Defaults to `FALSE`. + #' @param additional_filter Optional. Additional filtering conditions for the calculation. + #' @param original_level Logical. If `TRUE`, uses the original level for calculations. Defaults to `FALSE`. + #' @param signif_fig Numeric. Number of significant figures for rounding numeric values. Defaults to `2`. + #' @param sep Character. Separator used in result names. Defaults to `"_"`. + #' @param ... Additional arguments passed to other methods. + #' @return A data frame containing the calculated summary statistics. + #' @export + calculate_summary = function(data_name, columns_to_summarise = NULL, summaries, factors = c(), store_results = TRUE, drop = TRUE, return_output = FALSE, summary_name = NA, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, perc_return_all = FALSE, include_counts_with_percentage = FALSE, silent = FALSE, additional_filter, original_level = FALSE, signif_fig = 2, sep = "_", ...) { if(original_level) type <- "calculation" else type <- "summary" include_columns_to_summarise <- TRUE @@ -5828,28 +5748,22 @@ DataBook <- R6::R6Class("DataBook", } }, - #' Perform and Return Summaries for a Data Object + #' @description Computes summary statistics for specified columns in a dataset, optionally grouped by factors. + #' Handles multiple summaries, data types, and error conditions gracefully. #' - #' @description This method performs summary calculations for specified columns, grouped by optional factors, - #' and returns the results as a data frame. Unlike `calculate_summary`, this method does not - #' store the results unless explicitly requested. - #' - #' @param data_name A string specifying the name of the data object to summarise. - #' @param columns_to_summarise A character vector of columns to summarise. - #' @param summaries A character vector specifying the summary functions to apply. - #' @param factors A character vector of grouping factors. Default is `c()`. - #' @param store_results Logical. If `TRUE`, stores the results in the `DataBook`. Default is `FALSE`. - #' @param drop Logical. Whether to drop unused factor levels. Default is `FALSE`. - #' @param return_output Logical. If `TRUE`, returns the summary results. Default is `FALSE`. - #' @param summary_name Optional. A string specifying the name of the summary data object. - #' @param ... Additional arguments passed to the summary functions. - #' - #' @return A data frame containing the summary results. - #' - #' @details - #' - Summaries are grouped by the specified `factors`, if provided. - #' - Supports handling of missing values and custom result formatting. - #' - Can perform multiple summary functions on multiple columns in a single call. + #' @param data_name A character string representing the name of the dataset to summarize. + #' @param columns_to_summarise A character vector of column names to summarize. + #' @param summaries A vector of summary function names to apply to the columns. + #' @param factors A character vector of factor column names for grouping. Defaults to an empty vector. + #' @param store_results Logical. If `TRUE`, stores the summary results. Defaults to `FALSE`. + #' @param drop Logical. If `TRUE`, drops unused factor levels. Defaults to `FALSE`. + #' @param return_output Logical. If `TRUE`, returns the summary output. Defaults to `FALSE`. + #' @param summary_name Optional. A character string to name the summary. Defaults to `NA`. + #' @param add_cols Optional. Additional columns to include in the output. Defaults to an empty vector. + #' @param filter_names A character vector of filter names to apply during the calculation. Defaults to an empty vector. + #' @param ... Additional arguments passed to other methods or functions. + #' @return A data frame or list containing the computed summary statistics. If no grouping factors are provided, the result is a table with row names corresponding to the summary functions. + #' @export summary = function(data_name, columns_to_summarise, summaries, factors = c(), store_results = FALSE, drop = FALSE, return_output = FALSE, summary_name = NA, add_cols = c(), filter_names = c(), ...) { calculated_from = list() calculated_from[[1]] <- list(data_name = data_name, columns = columns_to_summarise) @@ -5931,28 +5845,41 @@ DataBook <- R6::R6Class("DataBook", } return(results) }, - - #' Generate a Summary Table - #' - #' @description This method generates a summary table for a data object, grouped by specified factors, - #' and optionally includes margins and percentages. - #' - #' @param data_name A string specifying the name of the data object to summarise. - #' @param columns_to_summarise A character vector of columns to summarise. - #' @param summaries A character vector specifying the summary functions to apply. - #' @param factors A character vector of grouping factors. Default is `c()`. - #' @param store_table Logical. If `TRUE`, stores the summary table in the `DataBook`. Default is `FALSE`. - #' @param include_margins Logical. If `TRUE`, includes margins (e.g., totals) in the table. Default is `FALSE`. - #' @param return_output Logical. If `TRUE`, returns the summary table. Default is `FALSE`. - #' @param percentage_type A string specifying the type of percentage calculation (`"none"`, `"factors"`, `"columns"`, `"filter"`). Default is `"none"`. - #' @param ... Additional arguments passed to the summary functions. - #' - #' @return A data frame containing the summary table. + + #' @description Creates a summary table for a dataset based on specified columns, summaries, and factors. + #' Provides options for margins, percentages, and various customization settings. #' - #' @details - #' - The table includes summaries for the specified columns and factors. - #' - Supports margins and percentage calculations based on grouping levels or column totals. - #' - Automatically handles missing values and can format results with significant figures. + #' @param data_name A character string representing the name of the dataset to summarize. + #' @param columns_to_summarise Optional. A character vector of column names to summarize. Defaults to `NULL`. + #' @param summaries A vector of summary functions to apply to the data. + #' @param factors A character vector of factor column names for grouping. Defaults to an empty vector. + #' @param store_table Logical. If `TRUE`, stores the resulting table in the data book. Defaults to `FALSE`. + #' @param store_results Logical. If `TRUE`, stores intermediate results. Defaults to `FALSE`. + #' @param drop Logical. If `TRUE`, drops unused factor levels. Defaults to `TRUE`. + #' @param na.rm Logical. If `TRUE`, removes missing values. Defaults to `FALSE`. + #' @param summary_name A character string for naming the summary. Defaults to `NA`. + #' @param include_margins Logical. If `TRUE`, includes margin summaries. Defaults to `FALSE`. + #' @param margins Character. Type of margins to include ("outer", "summary"). Defaults to `"outer"`. + #' @param return_output Logical. If `TRUE`, returns the summary output. Defaults to `FALSE`. + #' @param treat_columns_as_factor Logical. If `TRUE`, treats columns to summarize as factors. Defaults to `FALSE`. + #' @param page_by Optional. A character vector for paginating results. Defaults to `NULL`. + #' @param signif_fig Numeric. Number of significant figures for rounding numeric values. Defaults to `2`. + #' @param na_display Character. String to represent missing values in the output. Defaults to an empty string. + #' @param na_level_display Character. String to represent missing factor levels in the output. Must be non-empty. + #' @param weights Optional. A numeric vector of weights for weighted summaries. Defaults to `NULL`. + #' @param caption Optional. A character string for table captions. Defaults to `NULL`. + #' @param result_names Optional. A character vector for naming summary results. Defaults to `NULL`. + #' @param percentage_type Character. Type of percentages to calculate ("none", "row", "column", etc.). Defaults to `"none"`. + #' @param perc_total_columns Optional. Columns to use for total percentage calculations. Defaults to `NULL`. + #' @param perc_total_factors A character vector of factors to use for total percentage calculations. Defaults to an empty vector. + #' @param perc_total_filter Optional. A filter condition for percentage calculations. Defaults to `NULL`. + #' @param perc_decimal Logical. If `TRUE`, displays percentages in decimal format. Defaults to `FALSE`. + #' @param include_counts_with_percentage Logical. If `TRUE`, includes counts alongside percentages. Defaults to `FALSE`. + #' @param margin_name Character. Name for margin rows/columns in the output. Defaults to `"(All)"`. + #' @param additional_filter Optional. An additional filter for data summarization. + #' @param ... Additional arguments passed to other methods. + #' @return A `tibble` containing the summarized data table. + #' @export summary_table = function(data_name, columns_to_summarise = NULL, summaries, factors = c(), store_table = FALSE, store_results = FALSE, drop = TRUE, na.rm = FALSE, summary_name = NA, include_margins = FALSE, margins = "outer", return_output = FALSE, treat_columns_as_factor = FALSE, page_by = NULL, signif_fig = 2, na_display = "", na_level_display = "NA", weights = NULL, caption = NULL, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, include_counts_with_percentage = FALSE, margin_name = "(All)", additional_filter, ...) { # TODO: write in errors if (na_level_display == "") stop("na_level_display must be a non empty string") @@ -6125,7 +6052,6 @@ DataBook <- R6::R6Class("DataBook", return(tibble::as_tibble(shaped_cell_values)) }, - #' Import SST #' @description Imports SST data and adds keys and links to the specified data tables. #' @param dataset The SST dataset. #' @param data_from The source of the data. Default is 5. diff --git a/R/data_sheet.R b/R/data_sheet.R index b4372cc..cb76f05 100644 --- a/R/data_sheet.R +++ b/R/data_sheet.R @@ -180,16 +180,13 @@ #' \item{\code{has_labels(col_names)}}{Checks if the specified columns have labels.} #' \item{\code{display_daily_table(data_name, climatic_element, date_col = date_col, year_col = year_col, station_col = station_col, Misscode, Tracecode, Zerocode, monstats = c("min", "mean", "median", "max", "IQR", "sum"))}}{Display a daily summary table for a specified climatic data element.} #' -#' # related to instat_comments.R file in R-Instat #' \item{\code{add_comment(new_comment)}}{Adds a new `instat_comment` object to the data sheet if the key is defined and valid.} #' \item{\code{delete_comment(comment_id)}}{Deletes a comment from the data sheet based on the comment ID.} #' \item{\code{get_comment_ids()}}{Retrieves all comment IDs currently stored in the data sheet.} #' \item{\code{get_comments_as_data_frame()}}{Converts all comments in the data sheet to a data frame format for easier inspection and analysis.} #' -#' # related to calculation.R file in R-Instat #' \item{\code{save_calculation(calc)}}{Save a Calculation to the DataSheet.} #' -#' # related to summary_functions.R file in R-Instat #' \item{\code{merge_data(new_data, by = NULL, type = "left", match = "all")}{Merge New Data with Existing Data}} #' \item{\code{calculate_summary(calc, ...)}{Calculate Summaries for Specified Columns}} #' } diff --git a/R/instat_comment.R b/R/instat_comment.R index 2b1804f..afbfa7e 100644 --- a/R/instat_comment.R +++ b/R/instat_comment.R @@ -2,7 +2,21 @@ #' #' @description #' The `instat_comment` R6 class represents a comment in a data sheet, with various properties including identifiers, key-value pairs, comment details, timestamps, and status flags for resolution and activity. -#' +#' +#' @field id A numeric/character string representing the unique identifier for the comment. This must be unique within a data frame. +#' @field key_values A character vector storing key-value pairs associated with the comment. This identifies the row the comment is on. +#' @field column If the comment is on a cell, this is the name of the column of the cell +#' @field value If the comment is on a cell, this is the value in the cell at the time the comment was created. +#' @field type The type of comment (`"critical"`, `"warning"`, `"message"`, or `""`). +#' @field comment A character string for the comment text or message. +#' @field label A character variable. A label or grouping for the comment e.g. if comments are produced by an operation they may all have the same label. This then allows similar comments to be identified e.g. for editing/deleting +#' @field calculation A character variable. If the comment was created through a calculation e.g. filtering the data frame, this shows how the calculation done on the data frame +#' @field time_stamp The date and time (`POSIXct`, `POSIXt`) the comment was created, defaulting to the current system time if empty. +#' @field replies A list of replies to the comment. A reply could be a comment itself +#' @field resolved Logical value indicating if the comment is marked as resolved (`TRUE` or `FALSE`). +#' @field active Logical value indicating if the comment is marked as active (`TRUE` or `FALSE`). +#' @field attributes A named list of additional information about the comment. +#' #' @section Methods: #' \describe{ #' \item{\code{data_clone(...)}}{Creates a deep clone of the current `instat_comment` object, including all of its fields and nested `instat_comment` replies.} @@ -74,6 +88,7 @@ instat_comment <- R6::R6Class("instat_comment", #' @title Clone `instat_comment` Object #' @description Creates a deep clone of the current `instat_comment` object, including all of its fields and nested `instat_comment` replies. + #' @param ... Additional parameters to read in #' @details The `data_clone` method duplicates the current `instat_comment` object, ensuring any `instat_comment` instances within the `replies` field are recursively cloned. Non-`instat_comment` replies are directly copied without cloning. #' @return A new `instat_comment` object with the same field values as the original, including a cloned list of `replies`. #' diff --git a/R/link.R b/R/link.R index 9f0b74a..e0feda6 100644 --- a/R/link.R +++ b/R/link.R @@ -3,6 +3,11 @@ #' @description #' The `link` R6 class represents a relationship between two data frames, defined by link attributes and the columns used to link them. #' +#' @field from_data_frame A character string representing the name of the first data frame in the link. +#' @field to_data_frame A character string representing the name of the second data frame in the link. +#' @field type A character string representing the type of link, e.g., "keyed". +#' @field link_columns A list where each element defines a way to link the data frames, with each element as a named character vector. +#' #' @section Methods: #' \describe{ #' \item{\code{data_clone(...)}}{Creates a deep clone of the current `link` object, including all its fields.} @@ -36,8 +41,11 @@ link <- R6::R6Class("link", type = "", link_columns = list(), - #' @title Clone `link` Object - #' @description Creates a deep clone of the current `link` object, including all its fields. + #' Clone `link` Object. + #' @description + #' Creates a deep clone of the current `link` object, including all its fields. + #' @param ... Additional parameters to read in + #' #' @return A new `link` object with the same field values as the original. data_clone = function(...) { ret <- link$new( @@ -49,8 +57,10 @@ link <- R6::R6Class("link", return(ret) }, - #' @title Rename Data Frame in Link - #' @description Renames the specified data frame in the link. + #' Rename a Data Frame in the Link. + #' @description + #' Renames the specified data frame in the link. + #' #' @param old_data_name The current name of the data frame to be renamed. #' @param new_data_name The new name for the data frame. rename_data_frame_in_link = function(old_data_name, new_data_name) { @@ -58,8 +68,10 @@ link <- R6::R6Class("link", if (self$to_data_frame == old_data_name) self$to_data_frame <- new_data_name }, - #' @title Rename Column in Link - #' @description Renames a column involved in the link between data frames. + #' Rename a Column in the Link. + #' @description + #' Renames a column involved in the link between data frames. + #' #' @param data_name The name of the data frame where the column is located. #' @param old_column_name The current name of the column to be renamed. #' @param new_column_name The new name for the column. @@ -75,7 +87,5 @@ link <- R6::R6Class("link", } } } - ), - private = list(), - active = list() + ) ) diff --git a/R/summary_functions.R b/R/summary_functions.R index 6022dd9..2084b13 100644 --- a/R/summary_functions.R +++ b/R/summary_functions.R @@ -1,3 +1,110 @@ +#' Get Summary Calculation Names +#' +#' Generates a set of unique names for summary calculations, based on provided summaries, columns, and filters. +#' +#' @param calc A calculation object (unused in the current implementation). +#' @param summaries A vector of summary function names. +#' @param columns_to_summarise A vector of column names to summarize. +#' @param calc_filters A list of filter objects applied to the calculations. +#' @return A character vector of unique summary calculation names. +#' @export +get_summary_calculation_names <- function(calc, summaries, columns_to_summarise, calc_filters) { + filter_description <- "" + i = 1 + for(filt in calc_filters) { + if(!filt$parameters[["is_no_filter"]]) { + if(i == 1) filter_description <- filt$name + else filter_description <- paste(filter_description, filt$name, sep = ".") + } + i = i + 1 + } + if(filter_description == "") { + out <- apply(expand.grid(paste0(substring(summaries, 9),"."), columns_to_summarise), 1, paste, collapse="") + } + else out <- apply(expand.grid(paste0(substring(summaries, 9),"."), paste0(columns_to_summarise, "_"), filter_description), 1, paste, collapse="") + out <- make.names(out) + return(out) +} + + +#' Check for Missing Values +#' +#' A placeholder function that always returns `FALSE`. +#' +#' @param x A vector to check for missing values. +#' @return Logical. Always returns `FALSE`. +#' @export +missing_values_check <- function(x) { + return(FALSE) +} + +#' Calculate Mode +#' +#' Determines the mode (most frequent value) of a vector. +#' +#' @param x A vector of data. +#' @param ... Additional arguments (unused). +#' @return The mode of the vector. Returns `NA` if the input is `NULL`. +#' @export +summary_mode <- function(x,...) { + ux <- unique(x) + out <- ux[which.max(tabulate(match(x, ux)))] + if(is.factor(x)) out <- as.character(out) + if(is.null(out)) return(NA) + else return(out) +} + +#' Check Missing Values Based on Conditions +#' +#' Evaluates a vector against specified conditions for missing values. +#' +#' @param x A vector to check for missing values. +#' @param na_type A character vector specifying the types of checks to perform. Options include: +#' \itemize{ +#' \item `"n"`: Total number of missing values (`<= na_max_n`). +#' \item `"prop"`: Proportion of missing values (`<= na_max_prop` in percentage). +#' \item `"n_non_miss"`: Minimum number of non-missing values (`>= na_min_n`). +#' \item `"FUN"`: A custom function to evaluate missing values. +#' \item `"con"`: Maximum consecutive missing values (`<= na_consecutive_n`). +#' } +#' @param na_consecutive_n Optional. Maximum allowed consecutive missing values. +#' @param na_max_n Optional. Maximum allowed missing values. +#' @param na_max_prop Optional. Maximum allowed proportion of missing values (in percentage). +#' @param na_min_n Optional. Minimum required non-missing values. +#' @param na_FUN Optional. A custom function to evaluate missing values. +#' @param ... Additional arguments passed to the custom function `na_FUN`. +#' @return Logical. Returns `TRUE` if all specified checks pass, otherwise `FALSE`. +#' @export +na_check <- function(x, na_type = c(), na_consecutive_n = NULL, na_max_n = NULL, na_max_prop = NULL, na_min_n = NULL, na_FUN = NULL, ...) { + res <- c() + for (i in seq_along(na_type)) { + type <- na_type[i] + if (type %in% c("n","'n'")) { + res[i] <- summary_count_missing(x) <= na_max_n + } + else if (type %in% c("prop","'prop'")) { + res[i] <- (summary_count_missing(x) / summary_count(x)) <= na_max_prop / 100 + } + else if (type %in% c("n_non_miss","'n_non_miss'")) { + res[i] <- summary_count_non_missing(x) >= na_min_n + } + else if (type %in% c("FUN","'FUN'")) { + res[i] <- na_FUN(x, ...) + } + else if (type %in% c("con","'con'")) { + is_na_rle <- rle(is.na(x)) + res[i] <- max(is_na_rle$lengths[is_na_rle$values]) <= na_consecutive_n + } + else { + stop("Invalid na_type specified for missing values check.") + } + if (!res[i]) { + return(FALSE) + } + } + return(all(res)) +} + #' Calculate the Mean of Circular Data #' #' Computes the mean of circular data using `circular::mean.circular`. @@ -14,8 +121,6 @@ summary_mean_circular <- function (x, na.rm = FALSE, control.circular = list(), else return(circular::mean.circular(x, na.rm = na.rm, trim = trim, control.circular = control.circular)[[1]]) } - - #' Calculate the Median of Circular Data #' #' Computes the median of circular data using `circular::median.circular`. diff --git a/man/DataBook.Rd b/man/DataBook.Rd index 06ac0c6..7cbd01e 100644 --- a/man/DataBook.Rd +++ b/man/DataBook.Rd @@ -24,21 +24,12 @@ An R6 class to manage a collection of data tables along with their metadata and \note{ This method delegates the actual saving of the calculation to the respective data frame's \code{save_calculation} method, ensuring modularity and separation of concerns. -Apply an Instat Calculation - -This method performs a calculation or series of calculations (including sub-calculations) on data -within the \code{DataBook} object. It supports recursive calls for managing dependencies between -manipulations and sub-calculations. -This method is called recursively, and it would not be called by a user, another function would always handle the output and display -results to the user (usually only the $data part of the list) If the two data frames are not directly linked, the function assumes the columns in the first data frame map directly to columns with the same names in the second data frame. -Get Link Columns Between Data Frames This function ensures that the linking columns are valid by verifying their existence in both data frames. -Save the Output of a Calculation } \section{Methods}{ @@ -238,12 +229,12 @@ Save the Output of a Calculation \item{\code{wrap_or_unwrap_data(data_name, col_name, column_data, width, wrap = TRUE)}}{Wraps or unwraps the specified column data in the given data table to the specified width.} \item{\code{anova_tables2(data_name, x_col_names, y_col_name, signif.stars = FALSE, sign_level = FALSE, means = FALSE)}}{Generate ANOVA tables for specified columns in a dataset.} \item{\code{define_as_options_by_context(data_name, obyc_types = NULL, key_columns = NULL)}}{Define options by context for a specified dataset.} -\item{\code{display_daily_table(data_name, climatic_element, date_col, year_col, station_col, Misscode, Tracecode, Zerocode, monstats = c("min", "mean", "median", "max", "IQR", "sum"))}}{Display a daily summary table for a specified climatic data element.}from instat_comment.R in R-Instat +\item{\code{display_daily_table(data_name, climatic_element, date_col, year_col, station_col, Misscode, Tracecode, Zerocode, monstats = c("min", "mean", "median", "max", "IQR", "sum"))}}{Display a daily summary table for a specified climatic data element.} \item{\code{add_comment(new_comment)}}{Adds a new \code{instat_comment} object to the data sheet if the key is defined and valid.} \item{\code{delete_comment(comment_id)}}{Deletes a comment from the data sheet based on the comment ID.} \item{\code{get_comment_ids()}}{Retrieves all comment IDs currently stored in the data sheet.} -\item{\code{get_comments_as_data_frame()}}{Converts all comments in the data sheet to a data frame format for easier inspection and analysis.}from link.R in R-Instat +\item{\code{get_comments_as_data_frame()}}{Converts all comments in the data sheet to a data frame format for easier inspection and analysis.} \item{\code{update_links_rename_data_frame(old_data_name, new_data_name)}}{This function updates all links that reference a data frame with a specified old name, renaming it to a new name.} \item{\code{update_links_rename_column(data_name, old_column_name, new_column_name)}}{This function updates all links referencing a column in a data frame with a specified old column name, renaming it to a new column name.} @@ -258,7 +249,7 @@ Save the Output of a Calculation \item{\code{get_possible_linked_to_definition(from_data_frame, link_pairs)}}{This function attempts to find a linked data frame that matches \code{link_pairs}. Recursively explores links between multiple data frames.} \item{\code{get_equivalent_columns(from_data_name, columns, to_data_name)}}{This function returns columns in \code{to_data_name} equivalent to \code{columns} in \code{from_data_name}. Recursively searches links between multiple data frames.} \item{\code{link_between_containing(from_data_frame, containing_columns, to_data_frame)}}{This function returns columns in \code{to_data_frame} corresponding to \code{containing_columns} in \code{from_data_frame} if a link exists between them.} -\item{\code{view_link(link_name)}}{Displays the details of a specified link.}from calculations.R in R-Instat +\item{\code{view_link(link_name)}}{Displays the details of a specified link.} \item{\code{apply_calculation(calc)}{Apply a Calculation to Data in the DataBook}} \item{\code{save_calculation(end_data_frame, calc)}{Save a Calculation to a Data Frame}} @@ -266,7 +257,7 @@ Save the Output of a Calculation \item{\code{run_instat_calculation(calc, display = TRUE, param_list = list())}{Run an Instat Calculation and Display Results}} \item{\code{get_corresponding_link_columns(first_data_frame_name, first_data_frame_columns, second_data_frame_name)}{Get Corresponding Link Columns}} \item{\code{get_link_columns_from_data_frames(first_data_frame_name, first_data_frame_columns, second_data_frame_name, second_data_frame_columns)}{Get Link Columns Between Data Frames}} -\item{\code{save_calc_output(calc, curr_data_list, previous_manipulations)}{Save the Output of a Calculation}}from summary_functions.R in R-Instat +\item{\code{save_calc_output(calc, curr_data_list, previous_manipulations)}{Save the Output of a Calculation}} \item{\code{append_summaries_to_data_object(out, data_name, columns_to_summarise, summaries, factors = c(), summary_name, calc, calc_name = "")}{Append Summaries to a Data Object}} \item{\code{calculate_summary(data_name, columns_to_summarise = NULL, summaries, factors = c(), store_results = TRUE, drop = TRUE, return_output = FALSE, summary_name = NA, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, perc_return_all = FALSE, include_counts_with_percentage = FALSE, silent = FALSE, additional_filter, original_level = FALSE, signif_fig = 2, sep = "_", ...)}{Calculate Summaries for a Data Object}} @@ -279,18 +270,9 @@ Save the Output of a Calculation \seealso{ \code{\link{DataSheet$save_calculation}} } -\section{Public fields}{ -\if{html}{\out{