From 3fe7ce1e1065d25baa6f44d4dcb35cd64c31c7b3 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Mon, 22 Jan 2024 19:51:40 +0100 Subject: [PATCH] feat(calculate_hash): allow to customize hash input --- NAMESPACE | 4 ++++ R/calculate_hash.R | 42 ++++++++++++++++++++++++++++++++---------- man/calculate_hash.Rd | 5 ++++- man/hash_input.Rd | 15 +++++++++++++++ 4 files changed, 55 insertions(+), 11 deletions(-) create mode 100644 man/hash_input.Rd diff --git a/NAMESPACE b/NAMESPACE index b7f35c90..4dcbe096 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,9 @@ S3method(discard,default) S3method(distinct_values,default) S3method(distinct_values,factor) S3method(distinct_values,logical) +S3method(hash_input,"function") +S3method(hash_input,data.table) +S3method(hash_input,default) S3method(insert_named,"NULL") S3method(insert_named,data.frame) S3method(insert_named,data.table) @@ -82,6 +85,7 @@ export(formulate) export(get_private) export(get_seed) export(has_element) +export(hash_input) export(ids) export(imap) export(imap_chr) diff --git a/R/calculate_hash.R b/R/calculate_hash.R index 1810a982..aae0c735 100644 --- a/R/calculate_hash.R +++ b/R/calculate_hash.R @@ -2,8 +2,11 @@ #' #' @description #' Calls [digest::digest()] to calculate the hash for all objects provided. +#' The hash is calculated using the [xxhash64] algorithm. +#' By specifying methods for the [`hash_input`] generic, you can control which information of an object +#' is used to calculate the hash. #' -#' The following operations are performed to make hashing more robust: +#' Methods exist for: #' * If an object is a [function()], the formals and the body are hashed separately. #' This ensures that the bytecode or parent environment are not be included #' in the hash. @@ -21,13 +24,32 @@ #' @examples #' calculate_hash(iris, 1, "a") calculate_hash = function(...) { - digest(lapply(list(...), function(x) { - if (is.function(x)) { - list(formals(x), as.character(body(x))) - } else if (is.data.table(x)) { - as.list(x) - } else { - x - } - }), algo = "xxhash64") + digest(lapply(list(...), hash_info), algo = "xxhash64") } + +#' Hash Input +#' +#' Returns the information of an object to be used to calculate its hash. +#' @param x (any)\cr +#' Object for which to calculate the hash. +#' @export +hash_input = function(x) { + UseMethod("hash_input") +} + +#' @export +hash_input.function = function(x) { + list(formals(x), as.character(body(x))) +} + +#' @export +#' @method hash_input data.table +hash_input.data.table = function(x) { + as.list(x) +} + +#' @export +hash_input.default = function(x) { + x +} + diff --git a/man/calculate_hash.Rd b/man/calculate_hash.Rd index 9a564e8d..b6a35087 100644 --- a/man/calculate_hash.Rd +++ b/man/calculate_hash.Rd @@ -15,8 +15,11 @@ Objects to hash.} } \description{ Calls \code{\link[digest:digest]{digest::digest()}} to calculate the hash for all objects provided. +The hash is calculated using the \link{xxhash64} algorithm. +By specifying methods for the \code{\link{hash_input}} generic, you can control which information of an object +is used to calculate the hash. -The following operations are performed to make hashing more robust: +Methods exist for: \itemize{ \item If an object is a \code{\link[=function]{function()}}, the formals and the body are hashed separately. This ensures that the bytecode or parent environment are not be included diff --git a/man/hash_input.Rd b/man/hash_input.Rd new file mode 100644 index 00000000..96640411 --- /dev/null +++ b/man/hash_input.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calculate_hash.R +\name{hash_input} +\alias{hash_input} +\title{Hash Input} +\usage{ +hash_input(x) +} +\arguments{ +\item{x}{(any)\cr +Object for which to calculate the hash.} +} +\description{ +Returns the information of an object to be used to calculate its hash. +}