From 530c5ea14a89c9d31b2cc302edb50430328dd5a9 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Wed, 5 Jun 2024 17:14:11 -0700 Subject: [PATCH] udpated `str_extract()` and added `str_extract_all()` --- R/standalone-stringr.R | 33 +++++++----------------- tests/testthat/test-standalone-stringr.R | 21 ++++++++++++++- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/R/standalone-stringr.R b/R/standalone-stringr.R index ea5cbdb..1857948 100644 --- a/R/standalone-stringr.R +++ b/R/standalone-stringr.R @@ -25,25 +25,6 @@ str_squish <- function(string, fixed = FALSE) { return(string) } - -str_extract <- function(string, pattern, fixed = FALSE) { - result <- sapply(string, function(x) { - # Adjust the pattern if fixed is TRUE - if (fixed) { - # Escape special characters to treat them as literal in regex - pattern <- gsub("([][{}()+*^$.|\\\\?])", "\\\\\\1", pattern) - } - match_pos <- regexpr(pattern = pattern, text = x, perl = TRUE) - if (match_pos > 0) { - regmatches(x, match_pos) - } else { - NA_character_ - } - }, USE.NAMES = FALSE) - - return(result) -} - str_remove <- function (string, pattern, fixed = FALSE) { sub (x = string, pattern = pattern, replacement = "", fixed = fixed) } @@ -53,11 +34,15 @@ str_remove_all <- function(string, pattern, fixed = FALSE) { } str_extract <- function(string, pattern, fixed = FALSE) { - ifelse( - str_detect(string, pattern, fixed = fixed), - regmatches(x = string, m = regexpr(pattern = pattern, text = string, fixed = fixed)), - NA_character_ - ) + res <- rep(NA_character_, length.out = length(string)) + res[str_detect(string, pattern, fixed = fixed)] <- + regmatches(x = string, m = regexpr(pattern = pattern, text = string, fixed = fixed)) + + res +} + +str_extract_all <- function(string, pattern, fixed = FALSE) { + regmatches(x = string, m = gregexpr(pattern = pattern, text = string, fixed = fixed)) } str_detect <- function(string, pattern, fixed = FALSE) { diff --git a/tests/testthat/test-standalone-stringr.R b/tests/testthat/test-standalone-stringr.R index 3de5490..8a6400c 100644 --- a/tests/testthat/test-standalone-stringr.R +++ b/tests/testthat/test-standalone-stringr.R @@ -59,7 +59,7 @@ test_that("str_remove_all() works", { }) test_that("str_extract() works", { - shopping_list <- c("apples x43", "bag of flour", "bag of sugar", "milk x2") + shopping_list <- c("apples x43", "bag of flour", "bag of sugar, bag of sugar", "milk x2") s <- str_extract(shopping_list, "[a-z]+") expect_identical(s, c("apples", "bag", "bag", "milk")) @@ -81,6 +81,25 @@ test_that("str_extract() works", { expect_identical(s, stringr::str_extract(shopping_list, "[a-z]+")) }) +test_that("str_extract_all() works", { + shopping_list <- c("apples x43", "bag of flour", "bag of sugar, bag of sugar", "milk x2") + + s <- str_extract_all(shopping_list, "[a-z]+") + expect_identical(s, stringr::str_extract_all(shopping_list, "[a-z]+")) + + s <- str_extract_all(shopping_list, "([a-z]+) of ([a-z]+)") + expect_identical(s, stringr::str_extract_all(shopping_list, "([a-z]+) of ([a-z]+)")) + + s_notfixed <- str_extract_all(shopping_list, "\\d") + expect_identical(s_notfixed, stringr::str_extract_all(shopping_list, "\\d")) + + s_fixed <- str_extract_all(shopping_list, "\\d", fixed = TRUE) + expect_identical(s_fixed, stringr::str_extract_all(shopping_list, stringr::fixed("\\d"))) + + s <- str_extract_all(shopping_list, "[a-z]+") + expect_identical(s, stringr::str_extract_all(shopping_list, "[a-z]+")) +}) + test_that("str_detect() works", { fruits <- c("apple", "banana", "pear", "pineapple")