-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
207 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,9 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(add_special_chars) | ||
export(add_whitespace) | ||
export(change_case) | ||
export(make_missing) | ||
export(make_string_messy) | ||
export(messy) | ||
export(messy_colnames) | ||
importFrom(rlang,.data) | ||
importFrom(stats,runif) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#' Add special characters to strings | ||
|
||
#' @param data input dataframe | ||
#' @param cols set of columns to apply transformation to. If `NULL` | ||
#' will apply to all columns. Default `NULL`. | ||
#' @param messiness Percentage of values to change. Must be | ||
#' between 0 and 1. Default 0.1. | ||
#' @importFrom rlang .data | ||
#' @return a dataframe the same size as the input data. | ||
#' @export | ||
#' @examples | ||
#' add_special_chars(mtcars) | ||
add_special_chars <- function(data, | ||
cols = NULL, | ||
messiness = 0.1) { | ||
if (messiness < 0 || messiness > 1) { | ||
stop("'messiness' must be between 0 and 1") | ||
} | ||
if (is.null(cols)) { | ||
output <- data |> | ||
dplyr::mutate( | ||
dplyr::across( | ||
dplyr::where(~ is.character(.x) | is.factor(.x)), | ||
~ special_chars(.x, messiness = messiness) | ||
) | ||
) | ||
} else { | ||
# check if all cols present in colnames | ||
if (!all((cols %in% colnames(data)))) { | ||
stop("All elements of 'cols' must be a column name in 'data'") | ||
} else { | ||
output <- data |> | ||
dplyr::mutate( | ||
dplyr::across( | ||
dplyr::all_of(cols) & | ||
dplyr::where(~ is.character(.x) | is.factor(.x)), | ||
~ special_chars(.x, messiness = messiness) | ||
) | ||
) | ||
} | ||
} | ||
return(output) | ||
} | ||
|
||
#' Function to make a character string messy | ||
#' | ||
#' Adds special characters and randomly | ||
#' capitalises strings. | ||
#' @param x Character vector | ||
#' @param messiness Percentage of values to change. Must be | ||
#' between 0 and 1. Default 0.1. | ||
#' @return Messy character vector | ||
#' @noRd | ||
special_chars <- function(x, messiness = 0.1) { | ||
# if factor, convert to character | ||
if (is.factor(x)) { | ||
x <- as.character(x) | ||
} | ||
|
||
special_chars_string <- function(s, ...) { | ||
# characters to insert | ||
random_chars <- c( | ||
"!", "@", "#", "$", "%", "^", "&", | ||
"*", "(", ")", "_", "+", "-", "." | ||
) | ||
|
||
# Convert to vector of characters | ||
chars <- strsplit(s, NULL)[[1]] | ||
|
||
# Randomly insert special characters using lapply | ||
chars <- Reduce(function(acc, char) { | ||
if (stats::runif(1) < messiness) { | ||
char_to_insert <- sample(random_chars, 1) | ||
return(c(acc, char_to_insert, char)) | ||
} else { | ||
return(c(acc, char)) | ||
} | ||
}, chars, init = character(0)) | ||
|
||
# Reassemble the string | ||
return(paste(chars, collapse = "")) | ||
} | ||
|
||
x_messy <- sapply(x, special_chars_string, USE.NAMES = FALSE) | ||
return(x_messy) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#' Resample | ||
#' | ||
#' Resamples x of a specifc size | ||
#' @param x either a vector of one or more elements from which to choose. | ||
#' @return a vector of length size with elements drawn from either x | ||
#' @noRd | ||
resample <- function(x, ...) x[sample.int(length(x), ...)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.