Skip to content

Commit

Permalink
Support Utf8View to numeric coercion (#14377) (#14455)
Browse files Browse the repository at this point in the history
* Test for string / numeric coercion

* fix tests

* Update tests

* Add tests to stringview

* add numeric coercion
  • Loading branch information
alamb authored Feb 3, 2025
1 parent 8f10fdf commit 755b26a
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 0 deletions.
2 changes: 2 additions & 0 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -684,8 +684,10 @@ fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
match (lhs_type, rhs_type) {
(Utf8, _) if rhs_type.is_numeric() => Some(Utf8),
(LargeUtf8, _) if rhs_type.is_numeric() => Some(LargeUtf8),
(Utf8View, _) if rhs_type.is_numeric() => Some(Utf8View),
(_, Utf8) if lhs_type.is_numeric() => Some(Utf8),
(_, LargeUtf8) if lhs_type.is_numeric() => Some(LargeUtf8),
(_, Utf8View) if lhs_type.is_numeric() => Some(Utf8View),
_ => None,
}
}
Expand Down
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'Dictionary(Int32, Utf8)') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'Dictionary(Int32, Utf8)') as ts,
arrow_cast(column2, 'Dictionary(Int32, Utf8)') as d,
arrow_cast(column3, 'Dictionary(Int32, Utf8)') as t
from test_datetime_base;


statement ok
drop table test_source

Expand All @@ -56,3 +65,6 @@ drop table test_basic_operator;

statement ok
drop table test_substr_base;

statement ok
drop table test_datetime_base;
11 changes: 11 additions & 0 deletions datafusion/sqllogictest/test_files/string/init_data.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,14 @@ statement ok
create table test_substr_base (
col1 VARCHAR
) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), (''), (NULL);


# --------------------------------------
# Setup test tables with date/time values to test coercion
# --------------------------------------
statement ok
create table test_datetime_base as values
('2024-08-09T12:13:14', '2024-08-09', '12:13:14'),
('2024-08-09T12:13:15', '2024-09-09', '12:14:14'),
(NULL, NULL, NULL)
;
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/string/large_string.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'LargeUtf8') as ts,
arrow_cast(column2, 'LargeUtf8') as d,
arrow_cast(column3, 'LargeUtf8') as t
from test_datetime_base;


# select
query TTTT
SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
Expand Down Expand Up @@ -64,3 +73,6 @@ drop table test_basic_operator;

statement ok
drop table test_substr_base;

statement ok
drop table test_datetime_base;
10 changes: 10 additions & 0 deletions datafusion/sqllogictest/test_files/string/string.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'Utf8') as ts,
arrow_cast(column2, 'Utf8') as d,
arrow_cast(column3, 'Utf8') as t
from test_datetime_base;


#
Expand Down Expand Up @@ -186,3 +193,6 @@ drop table test_basic_operator;

statement ok
drop table test_substr;

statement ok
drop table test_datetime;
47 changes: 47 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_query.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
# with standard values, but different types in string columns
# (String, StringView, etc.)

# --------------------------------------
# Show the input data
# --------------------------------------

# select
query TTTT
SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
Expand All @@ -35,6 +39,49 @@ _ \_ (empty) (empty)
NULL % NULL NULL
NULL R NULL 🔥

# --------------------------------------
# test type coercion (compare to int)
# queries should not error
# --------------------------------------

query BB
select ascii_1 = 1 as col1, 1 = ascii_1 as col2 from test_basic_operator;
----
false false
false false
false false
false false
false false
false false
false false
false false
false false
NULL NULL
NULL NULL

query BB
select ascii_1 <> 1 as col1, 1 <> ascii_1 as col2 from test_basic_operator;
----
true true
true true
true true
true true
true true
true true
true true
true true
true true
NULL NULL
NULL NULL

# Coercion to date/time
query BBB
select ts = '2024-08-09T12:13:14'::timestamp, d = '2024-08-08'::date, t = '12:13:14'::time from test_datetime;
----
true false true
false false false
NULL NULL NULL

# --------------------------------------
# column comparison as filters
# --------------------------------------
Expand Down
11 changes: 11 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ statement ok
create table test_substr as
select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;

statement ok
create table test_datetime as
select
arrow_cast(column1, 'Utf8View') as ts,
arrow_cast(column2, 'Utf8View') as d,
arrow_cast(column3, 'Utf8View') as t
from test_datetime_base;

statement ok
drop table test_source

Expand All @@ -51,6 +59,9 @@ drop table test_basic_operator;
statement ok
drop table test_substr_base;

statement ok
drop table test_datetime_base;


# --------------------------------------
# String_view specific tests
Expand Down

0 comments on commit 755b26a

Please sign in to comment.