Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More sc fixes #193

Merged
merged 2 commits into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions quinn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,19 @@
exists,
forall,
multi_equals,
regexp_extract_all,
remove_all_whitespace,
remove_non_word_characters,
single_space,
uuid5,
week_end_date,
week_start_date,
is_falsy,
is_truthy,
is_false,
is_true,
is_null_or_blank,
is_not_in,
null_between,
)
from quinn.schema_helpers import print_schema_as_code
from quinn.split_columns import split_col
Expand Down Expand Up @@ -68,7 +74,6 @@
"week_start_date",
"week_end_date",
"approx_equal",
"regexp_extract_all",
"business_days_between",
"uuid5",
"with_columns_renamed",
Expand All @@ -78,4 +83,11 @@
"sort_columns",
"append_if_schema_identical",
"flatten_dataframe",
"is_falsy",
"is_truthy",
"is_false",
"is_true",
"is_null_or_blank",
"is_not_in",
"null_between",
]
105 changes: 0 additions & 105 deletions quinn/extensions/column_ext.py

This file was deleted.

104 changes: 90 additions & 14 deletions quinn/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import uuid
from typing import Any

from pyspark.sql.functions import lit, trim, when
import pyspark.sql.functions as F # noqa: N812
from pyspark.sql.types import (
ArrayType,
Expand Down Expand Up @@ -229,20 +230,6 @@ def array_choice(col: Column, seed: int | None = None) -> Column:
return col[index]


@F.udf(returnType=ArrayType(StringType()))
def regexp_extract_all(s: Column, regexp: Column) -> Column:
"""Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`).

It returns a list of all matches, or `None` if `s` is `None`.

:param s: input string (`Column`)
:type s: str
:param regexp: string `re` pattern
:rtype: Column
"""
return None if s is None else re.findall(regexp, s)


def business_days_between(
start_date: Column, end_date: Column, # noqa: ARG001
) -> Column:
Expand Down Expand Up @@ -302,3 +289,92 @@ def uuid5(
variant_part,
F.substring(hashed, 21, 12),
)

def is_falsy(col: Column) -> Column:
"""Returns a Column indicating whether all values in the Column are False or NULL (**falsy**).

Each element in the resulting column is True if all the elements in the
Column are either NULL or False, or False otherwise. This is accomplished by
performing a bitwise or of the ``isNull`` condition and a literal False value and
then wrapping the result in a **when** statement.

:param col: Column object
:returns: Column object
:rtype: Column
"""
return when(col.isNull() | (col == lit(False)), True).otherwise(False)


def is_truthy(col: Column) -> Column:
"""Calculates a boolean expression that is the opposite of isFalsy for the given ``Column`` col.

:param Column col: The ``Column`` to calculate the opposite of isFalsy for.
:returns: A ``Column`` with the results of the calculation.
:rtype: Column
"""
return ~(is_falsy(col))


def is_false(col: Column) -> Column:
"""Function checks if the column is equal to False and returns the column.

:param col: Column
:return: Column
:rtype: Column
"""
return col == lit(False)


def is_true(col: Column) -> Column:
"""Function takes a column of type Column as an argument and returns a column of type Column.

It evaluates whether each element in the column argument is equal to True, and
if so will return True, otherwise False.

:param col: Column object
:returns: Column object
:rtype: Column
"""
return col == lit(True)


def is_null_or_blank(col: Column) -> Column:
r"""Returns a Boolean value which expresses whether a given column is ``null`` or contains only blank characters.

:param \*\*col: The :class:`Column` to check.

:returns: A `Column` containing ``True`` if the column is ``null`` or only contains
blank characters, or ``False`` otherwise.
:rtype: Column
"""
return (col.isNull()) | (trim(col) == "")


def is_not_in(col: Column, _list: list[Any]) -> Column:
"""To see if a value is not in a list of values.

:param col: Column object
:_list: list[Any]
:rtype: Column
"""
return ~(col.isin(_list))


def null_between(col: Column, lower: Column, upper: Column) -> Column:
"""To see if a value is between two values in a null friendly way.

:param col: Column object
:lower: Column
:upper: Column
:rtype: Column
"""
return when(lower.isNull() & upper.isNull(), False).otherwise(
when(col.isNull(), False).otherwise(
when(lower.isNull() & upper.isNotNull() & (col <= upper), True).otherwise(
when(
lower.isNotNull() & upper.isNull() & (col >= lower),
True,
).otherwise(col.between(lower, upper)),
),
),
)
118 changes: 0 additions & 118 deletions tests/extensions/test_column_ext.py

This file was deleted.

Loading
Loading