From c2e581a159521b42d406a2393b8251ce8d7ecace Mon Sep 17 00:00:00 2001 From: Matthew Powers Date: Sat, 10 Feb 2024 10:12:46 -0500 Subject: [PATCH] remove regexp_extract_all --- quinn/__init__.py | 2 -- quinn/functions.py | 14 -------------- tests/test_functions.py | 14 -------------- 3 files changed, 30 deletions(-) diff --git a/quinn/__init__.py b/quinn/__init__.py index 727d5517..3f6cbd24 100644 --- a/quinn/__init__.py +++ b/quinn/__init__.py @@ -24,7 +24,6 @@ exists, forall, multi_equals, - regexp_extract_all, remove_all_whitespace, remove_non_word_characters, single_space, @@ -75,7 +74,6 @@ "week_start_date", "week_end_date", "approx_equal", - "regexp_extract_all", "business_days_between", "uuid5", "with_columns_renamed", diff --git a/quinn/functions.py b/quinn/functions.py index e658a0bd..d09a0818 100644 --- a/quinn/functions.py +++ b/quinn/functions.py @@ -230,20 +230,6 @@ def array_choice(col: Column, seed: int | None = None) -> Column: return col[index] -@F.udf(returnType=ArrayType(StringType())) -def regexp_extract_all(s: Column, regexp: Column) -> Column: - """Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`). - - It returns a list of all matches, or `None` if `s` is `None`. - - :param s: input string (`Column`) - :type s: str - :param regexp: string `re` pattern - :rtype: Column - """ - return None if s is None else re.findall(regexp, s) - - def business_days_between( start_date: Column, end_date: Column, # noqa: ARG001 ) -> Column: diff --git a/tests/test_functions.py b/tests/test_functions.py index 4a9aa3b5..aaafda6c 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -329,20 +329,6 @@ def it_works_with_integer_values(): # chispa.assert_column_equality(actual_df, "random_letter", "expected") -def test_regexp_extract_all(): - df = quinn.create_df( - spark, - [("200 - 300 PA.", ["200", "300"]), ("400 PA.", ["400"]), (None, None)], - [ - ("str", StringType(), True), - ("expected", ArrayType(StringType(), True), True), - ], - ) - actual_df = df.withColumn( - "all_numbers", quinn.regexp_extract_all(F.col("str"), F.lit(r"(\d+)")) - ) - chispa.assert_column_equality(actual_df, "all_numbers", "expected") - def test_business_days_between(): df = quinn.create_df(