Skip to content

Commit

Permalink
parametrize test_string_functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael-J-Ward committed Jul 29, 2024
1 parent 2009741 commit e17ba64
Showing 1 changed file with 42 additions and 78 deletions.
120 changes: 42 additions & 78 deletions python/datafusion/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,87 +567,51 @@ def test_array_function_obj_tests(stmt, py_expr):
assert a == b


def test_string_functions(df):
df = df.select(
f.ascii(column("a")),
f.bit_length(column("a")),
f.btrim(literal(" World ")),
f.character_length(column("a")),
f.chr(literal(68)),
f.concat_ws("-", column("a"), literal("test")),
f.concat(column("a"), literal("?")),
f.initcap(column("c")),
f.left(column("a"), literal(3)),
f.length(column("c")),
f.lower(column("a")),
f.lpad(column("a"), literal(7)),
f.ltrim(column("c")),
f.md5(column("a")),
f.octet_length(column("a")),
f.repeat(column("a"), literal(2)),
f.replace(column("a"), literal("l"), literal("?")),
f.reverse(column("a")),
f.right(column("a"), literal(4)),
f.rpad(column("a"), literal(8)),
f.rtrim(column("c")),
f.split_part(column("a"), literal("l"), literal(1)),
f.starts_with(column("a"), literal("Wor")),
f.strpos(column("a"), literal("o")),
f.substr(column("a"), literal(3)),
f.translate(column("a"), literal("or"), literal("ld")),
f.trim(column("c")),
f.upper(column("c")),
f.ends_with(column("a"), literal("llo")),
f.overlay(column("a"), literal("--"), literal(2)),
f.regexp_like(column("a"), literal("(ell|orl)")),
f.regexp_match(column("a"), literal("(ell|orl)")),
f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")),
)

@pytest.mark.parametrize("function, expected_result", [
(f.ascii(column("a")), pa.array([72, 87, 33], type=pa.int32())), # H = 72; W = 87; ! = 33
(f.bit_length(column("a")), pa.array([40, 40, 8], type=pa.int32())),
(f.btrim(literal(" World ")), pa.array(["World", "World", "World"])),
(f.character_length(column("a")), pa.array([5, 5, 1], type=pa.int32())),
(f.chr(literal(68)), pa.array(["D", "D", "D"])),
(f.concat_ws("-", column("a"), literal("test")), pa.array(["Hello-test", "World-test", "!-test"])),
(f.concat(column("a"), literal("?")), pa.array(["Hello?", "World?", "!?"])),
(f.initcap(column("c")), pa.array(["Hello ", " World ", " !"])),
(f.left(column("a"), literal(3)), pa.array(["Hel", "Wor", "!"])),
(f.length(column("c")), pa.array([6, 7, 2], type=pa.int32())),
(f.lower(column("a")), pa.array(["hello", "world", "!"])),
(f.lpad(column("a"), literal(7)), pa.array([" Hello", " World", " !"])),
(f.ltrim(column("c")), pa.array(["hello ", "world ", "!"])),
(f.md5(column("a")), pa.array([
"8b1a9953c4611296a827abf8c47804d7",
"f5a7924e621e84c9280a9a27e1bcb7f6",
"9033e0e305f247c0c3c80d0c7848c8b3",
])),
(f.octet_length(column("a")), pa.array([5, 5, 1], type=pa.int32())),
(f.repeat(column("a"), literal(2)), pa.array(["HelloHello", "WorldWorld", "!!"])),
(f.replace(column("a"), literal("l"), literal("?")), pa.array(["He??o", "Wor?d", "!"])),
(f.reverse(column("a")), pa.array(["olleH", "dlroW", "!"])),
(f.right(column("a"), literal(4)), pa.array(["ello", "orld", "!"])),
(f.rpad(column("a"), literal(8)), pa.array(["Hello ", "World ", "! "])),
(f.rtrim(column("c")), pa.array(["hello", " world", " !"])),
(f.split_part(column("a"), literal("l"), literal(1)), pa.array(["He", "Wor", "!"])),
(f.starts_with(column("a"), literal("Wor")), pa.array([False, True, False])),
(f.strpos(column("a"), literal("o")), pa.array([5, 2, 0], type=pa.int32())),
(f.substr(column("a"), literal(3)), pa.array(["llo", "rld", ""])),
(f.translate(column("a"), literal("or"), literal("ld")), pa.array(["Helll", "Wldld", "!"])),
(f.trim(column("c")), pa.array(["hello", "world", "!"])),
(f.upper(column("c")), pa.array(["HELLO ", " WORLD ", " !"])),
(f.ends_with(column("a"), literal("llo")), pa.array([True, False, False])),
(f.overlay(column("a"), literal("--"), literal(2)), pa.array(["H--lo", "W--ld", "--"])),
(f.regexp_like(column("a"), literal("(ell|orl)")), pa.array([True, True, False])),
(f.regexp_match(column("a"), literal("(ell|orl)")), pa.array([["ell"], ["orl"], None])),
(f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), pa.array(["H-o", "W-d", "!"])),
])
def test_string_functions(df, function, expected_result):
df = df.select(function)
result = df.collect()
assert len(result) == 1
result = result[0]
assert result.column(0) == pa.array(
[72, 87, 33], type=pa.int32()
) # H = 72; W = 87; ! = 33
assert result.column(1) == pa.array([40, 40, 8], type=pa.int32())
assert result.column(2) == pa.array(["World", "World", "World"])
assert result.column(3) == pa.array([5, 5, 1], type=pa.int32())
assert result.column(4) == pa.array(["D", "D", "D"])
assert result.column(5) == pa.array(["Hello-test", "World-test", "!-test"])
assert result.column(6) == pa.array(["Hello?", "World?", "!?"])
assert result.column(7) == pa.array(["Hello ", " World ", " !"])
assert result.column(8) == pa.array(["Hel", "Wor", "!"])
assert result.column(9) == pa.array([6, 7, 2], type=pa.int32())
assert result.column(10) == pa.array(["hello", "world", "!"])
assert result.column(11) == pa.array([" Hello", " World", " !"])
assert result.column(12) == pa.array(["hello ", "world ", "!"])
assert result.column(13) == pa.array(
[
"8b1a9953c4611296a827abf8c47804d7",
"f5a7924e621e84c9280a9a27e1bcb7f6",
"9033e0e305f247c0c3c80d0c7848c8b3",
]
)
assert result.column(14) == pa.array([5, 5, 1], type=pa.int32())
assert result.column(15) == pa.array(["HelloHello", "WorldWorld", "!!"])
assert result.column(16) == pa.array(["He??o", "Wor?d", "!"])
assert result.column(17) == pa.array(["olleH", "dlroW", "!"])
assert result.column(18) == pa.array(["ello", "orld", "!"])
assert result.column(19) == pa.array(["Hello ", "World ", "! "])
assert result.column(20) == pa.array(["hello", " world", " !"])
assert result.column(21) == pa.array(["He", "Wor", "!"])
assert result.column(22) == pa.array([False, True, False])
assert result.column(23) == pa.array([5, 2, 0], type=pa.int32())
assert result.column(24) == pa.array(["llo", "rld", ""])
assert result.column(25) == pa.array(["Helll", "Wldld", "!"])
assert result.column(26) == pa.array(["hello", "world", "!"])
assert result.column(27) == pa.array(["HELLO ", " WORLD ", " !"])
assert result.column(28) == pa.array([True, False, False])
assert result.column(29) == pa.array(["H--lo", "W--ld", "--"])
assert result.column(30) == pa.array([True, True, False])
assert result.column(31) == pa.array([["ell"], ["orl"], None])
assert result.column(32) == pa.array(["H-o", "W-d", "!"])
assert result.column(0) == expected_result


def test_hash_functions(df):
Expand Down

0 comments on commit e17ba64

Please sign in to comment.