Skip to content

Commit

Permalink
refactor: remove ibisNA (ibis-project#9344)
Browse files Browse the repository at this point in the history
- Closes ibis-project#9311 

This PR removes ibis.NA, replacing it appearances for ibis.null()

---------

Co-authored-by: Jim Crist-Harif <[email protected]>
  • Loading branch information
ncclementi and jcrist authored Jun 11, 2024
1 parent ad27f9f commit 83db19d
Show file tree
Hide file tree
Showing 26 changed files with 116 additions and 121 deletions.

Large diffs are not rendered by default.

4 changes: 0 additions & 4 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,6 @@ quartodoc:
- name: param
dynamic: true
signature_name: full
- name: NA
# Ideally exposed under `ibis` but that doesn't seem to work??
package: ibis.expr.api
signature_name: full
- name: "null"
dynamic: true
signature_name: full
Expand Down
2 changes: 1 addition & 1 deletion docs/posts/campaign-finance/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def get_election_type(pgi: StringValue) -> StringValue:
"E": "recount",
}
first_letter = pgi[0]
return first_letter.substitute(election_types, else_=ibis.NA)
return first_letter.substitute(election_types, else_=ibis.null())
cleaned = cleaned.mutate(election_type=get_election_type(_.TRANSACTION_PGI)).drop(
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/ibis-for-pandas-users.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ represented by `NaN`. This can be confusing when working with numeric data,
since `NaN` is also a valid floating point value (along with `+/-inf`).

In Ibis, we try to be more precise: All data types are nullable, and we use
`ibis.NA` to represent `NULL` values, and all datatypes have a `.isnull()` method.
`ibis.null()` to represent `NULL` values, and all datatypes have a `.isnull()` method.
For floating point values, we use different values for `NaN` and `+/-inf`, and there
are the additional methods `.isnan()` and `.isinf()`.

Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/ibis-for-sql-users.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -522,10 +522,10 @@ ibis.to_sql(expr)

### Using `NULL` in expressions

To use `NULL` in an expression, either use the special `ibis.NA` value:
To use `NULL` in an expression, use `ibis.null()` value:

```{python}
pos_two = (t.two > 0).ifelse(t.two, ibis.NA)
pos_two = (t.two > 0).ifelse(t.two, ibis.null())
expr = t.mutate(two_positive=pos_two)
ibis.to_sql(expr)
```
Expand Down
20 changes: 19 additions & 1 deletion ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

__version__ = "9.0.0"

import warnings
from typing import Any

from ibis import examples, util
from ibis.backends import BaseBackend
from ibis.common.exceptions import IbisError
Expand Down Expand Up @@ -36,7 +39,7 @@ def __dir__() -> list[str]:
return sorted(out)


def __getattr__(name: str) -> BaseBackend:
def load_backend(name: str) -> BaseBackend:
"""Load backends in a lazy way with `ibis.<backend-name>`.
This also registers the backend options.
Expand All @@ -52,6 +55,7 @@ def __getattr__(name: str) -> BaseBackend:
attribute is "cached", so this function is only called the first time.
"""

entry_points = {ep for ep in util.backend_entry_points() if ep.name == name}

if not entry_points:
Expand Down Expand Up @@ -123,3 +127,17 @@ def connect(*args, **kwargs):
setattr(proxy, name, getattr(backend, name))

return proxy


def __getattr__(name: str) -> Any:
if name == "NA":
warnings.warn(
"The 'ibis.NA' constant is deprecated as of v9.1 and will be removed in a future "
"version. Use 'ibis.null()' instead.",
DeprecationWarning,
stacklevel=2,
)

return null() # noqa: F405
else:
return load_backend(name)
10 changes: 5 additions & 5 deletions ibis/backends/clickhouse/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def test_isnull_notnull(con, expr, expected):
("expr", "expected"),
[
(ibis.coalesce(5, None, 4), 5),
(ibis.coalesce(ibis.NA, 4, ibis.NA), 4),
(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14),
(ibis.coalesce(ibis.null(), 4, ibis.null()), 4),
(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14),
],
)
def test_coalesce(con, expr, expected):
Expand All @@ -127,7 +127,7 @@ def test_coalesce(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
(ibis.NA.fill_null(5), 5),
(ibis.null().fill_null(5), 5),
(L(5).fill_null(10), 5),
(L(5).nullif(5), None),
(L(10).nullif(5), 10),
Expand All @@ -150,7 +150,7 @@ def test_fill_null_nullif(con, expr, expected):
(L(datetime(2015, 9, 1, hour=14, minute=48, second=5)), "DateTime"),
(L(date(2015, 9, 1)), "Date"),
param(
ibis.NA,
ibis.null(),
"Null",
marks=pytest.mark.xfail(
raises=AssertionError,
Expand Down Expand Up @@ -418,7 +418,7 @@ def test_numeric_builtins_work(alltypes, df):
def test_null_column(alltypes):
t = alltypes
nrows = t.count().execute()
expr = t.mutate(na_column=ibis.NA).na_column
expr = t.mutate(na_column=ibis.null()).na_column
result = expr.execute()
expected = pd.Series([None] * nrows, name="na_column")
tm.assert_series_equal(result, expected)
Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/dask/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def sort_kind():
return "mergesort"


default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
range_offset = pytest.mark.parametrize(
"range_offset",
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_lead(con, t, df, row_offset, default, row_window):
expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(-row_offset)).compute()
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -59,7 +59,7 @@ def test_lag(con, t, df, row_offset, default, row_window):
expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(row_offset)).compute()
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -78,7 +78,7 @@ def test_lead_delta(con, t, pandas_df, range_offset, default, range_window):
.reindex(pandas_df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand All @@ -98,7 +98,7 @@ def test_lag_delta(t, con, pandas_df, range_offset, default, range_window):
.reindex(pandas_df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected, check_names=False)

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/impala/tests/test_case_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,6 @@ def test_identical_to(mockcon, snapshot):


def test_identical_to_special_case(snapshot):
expr = ibis.NA.cast("int64").identical_to(ibis.NA.cast("int64")).name("tmp")
expr = ibis.null().cast("int64").identical_to(ibis.null().cast("int64")).name("tmp")
result = ibis.to_sql(expr, dialect="impala")
snapshot.assert_match(result, "out.sql")
8 changes: 4 additions & 4 deletions ibis/backends/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def test_decimal_timestamp_builtins(con):
dc * 2,
dc**2,
dc.cast("double"),
api.ifelse(table.l_discount > 0, dc * table.l_discount, api.NA),
api.ifelse(table.l_discount > 0, dc * table.l_discount, api.null()),
dc.fill_null(0),
ts < (ibis.now() + ibis.interval(months=3)),
ts < (ibis.timestamp("2005-01-01") + ibis.interval(months=3)),
Expand Down Expand Up @@ -632,10 +632,10 @@ def test_unions_with_ctes(con, alltypes):
@pytest.mark.parametrize(
("left", "right", "expected"),
[
(ibis.NA.cast("int64"), ibis.NA.cast("int64"), True),
(ibis.null().cast("int64"), ibis.null().cast("int64"), True),
(L(1), L(1), True),
(ibis.NA.cast("int64"), L(1), False),
(L(1), ibis.NA.cast("int64"), False),
(ibis.null().cast("int64"), L(1), False),
(L(1), ibis.null().cast("int64"), False),
(L(0), L(1), False),
(L(1), L(0), False),
],
Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def sort_kind():
return "mergesort"


default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
range_offset = pytest.mark.parametrize(
"range_offset",
Expand Down Expand Up @@ -49,7 +49,7 @@ def test_lead(t, df, row_offset, default, row_window):
expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(-row_offset))
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand All @@ -61,7 +61,7 @@ def test_lag(t, df, row_offset, default, row_window):
expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
result = expr.execute()
expected = df.dup_strings.shift(con.execute(row_offset))
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand All @@ -80,7 +80,7 @@ def test_lead_delta(t, df, range_offset, default, range_window):
.reindex(df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand All @@ -100,7 +100,7 @@ def test_lag_delta(t, df, range_offset, default, range_window):
.reindex(df.plain_datetimes_naive)
.reset_index(drop=True)
)
if default is not ibis.NA:
if default is not ibis.null():
expected = expected.fillna(con.execute(default))
tm.assert_series_equal(result, expected.rename("tmp"))

Expand Down
22 changes: 11 additions & 11 deletions ibis/backends/postgres/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_strftime(con, pattern):
[
param(L("foo_bar"), "text", id="text"),
param(L(5), "integer", id="integer"),
param(ibis.NA, "null", id="null"),
param(ibis.null(), "null", id="null"),
# TODO(phillipc): should this really be double?
param(L(1.2345), "numeric", id="numeric"),
param(
Expand Down Expand Up @@ -335,7 +335,7 @@ def test_regexp_extract(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.NA.fill_null(5), 5, id="filled"),
param(ibis.null().fill_null(5), 5, id="filled"),
param(L(5).fill_null(10), 5, id="not_filled"),
param(L(5).nullif(5), None, id="nullif_null"),
param(L(10).nullif(5), 10, id="nullif_not_null"),
Expand All @@ -349,8 +349,8 @@ def test_fill_null_nullif(con, expr, expected):
("expr", "expected"),
[
param(ibis.coalesce(5, None, 4), 5, id="first"),
param(ibis.coalesce(ibis.NA, 4, ibis.NA), 4, id="second"),
param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="third"),
param(ibis.coalesce(ibis.null(), 4, ibis.null()), 4, id="second"),
param(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14, id="third"),
],
)
def test_coalesce(con, expr, expected):
Expand All @@ -360,12 +360,12 @@ def test_coalesce(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.coalesce(ibis.NA, ibis.NA), None, id="all_null"),
param(ibis.coalesce(ibis.null(), ibis.null()), None, id="all_null"),
param(
ibis.coalesce(
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
),
None,
id="all_nulls_with_all_cast",
Expand All @@ -377,7 +377,7 @@ def test_coalesce_all_na(con, expr, expected):


def test_coalesce_all_na_double(con):
expr = ibis.coalesce(ibis.NA, ibis.NA, ibis.NA.cast("double"))
expr = ibis.coalesce(ibis.null(), ibis.null(), ibis.null().cast("double"))
assert np.isnan(con.execute(expr))


Expand Down Expand Up @@ -815,14 +815,14 @@ def test_first_last_value(alltypes, df, func, expected_index):
def test_null_column(alltypes):
t = alltypes
nrows = t.count().execute()
expr = t.mutate(na_column=ibis.NA).na_column
expr = t.mutate(na_column=ibis.null()).na_column
result = expr.execute()
tm.assert_series_equal(result, pd.Series([None] * nrows, name="na_column"))


def test_null_column_union(alltypes, df):
t = alltypes
s = alltypes[["double_col"]].mutate(string_col=ibis.NA.cast("string"))
s = alltypes[["double_col"]].mutate(string_col=ibis.null().cast("string"))
expr = t[["double_col", "string_col"]].union(s)
result = expr.execute()
nrows = t.count().execute()
Expand Down
18 changes: 9 additions & 9 deletions ibis/backends/risingwave/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def test_regexp(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.NA.fill_null(5), 5, id="filled"),
param(ibis.null().fill_null(5), 5, id="filled"),
param(L(5).fill_null(10), 5, id="not_filled"),
param(L(5).nullif(5), None, id="nullif_null"),
param(L(10).nullif(5), 10, id="nullif_not_null"),
Expand All @@ -180,8 +180,8 @@ def test_fill_null_nullif(con, expr, expected):
("expr", "expected"),
[
param(ibis.coalesce(5, None, 4), 5, id="first"),
param(ibis.coalesce(ibis.NA, 4, ibis.NA), 4, id="second"),
param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="third"),
param(ibis.coalesce(ibis.null(), 4, ibis.null()), 4, id="second"),
param(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14, id="third"),
],
)
def test_coalesce(con, expr, expected):
Expand All @@ -191,12 +191,12 @@ def test_coalesce(con, expr, expected):
@pytest.mark.parametrize(
("expr", "expected"),
[
param(ibis.coalesce(ibis.NA, ibis.NA), None, id="all_null"),
param(ibis.coalesce(ibis.null(), ibis.null()), None, id="all_null"),
param(
ibis.coalesce(
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.NA.cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
ibis.null().cast("int8"),
),
None,
id="all_nulls_with_all_cast",
Expand All @@ -208,7 +208,7 @@ def test_coalesce_all_na(con, expr, expected):


def test_coalesce_all_na_double(con):
expr = ibis.coalesce(ibis.NA, ibis.NA, ibis.NA.cast("double"))
expr = ibis.coalesce(ibis.null(), ibis.null(), ibis.null().cast("double"))
assert np.isnan(con.execute(expr))


Expand Down Expand Up @@ -595,7 +595,7 @@ def test_first_last_value(alltypes, df, func, expected_index):
def test_null_column(alltypes):
t = alltypes
nrows = t.count().execute()
expr = t.mutate(na_column=ibis.NA).na_column
expr = t.mutate(na_column=ibis.null()).na_column
result = expr.execute()
tm.assert_series_equal(result, pd.Series([None] * nrows, name="na_column"))

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sqlite/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_builtin_agg_udf(con):
def total(x) -> float:
"""Totally total."""

expr = total(con.tables.functional_alltypes.limit(2).select(n=ibis.NA).n)
expr = total(con.tables.functional_alltypes.limit(2).select(n=ibis.null()).n)
result = con.execute(expr)
assert result == 0.0

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/sql/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_coalesce(functional_alltypes, snapshot):
d = functional_alltypes.double_col
f = functional_alltypes.float_col

expr = ibis.coalesce((d > 30).ifelse(d, ibis.NA), ibis.NA, f).name("tmp")
expr = ibis.coalesce((d > 30).ifelse(d, ibis.null()), ibis.null(), f).name("tmp")
snapshot.assert_match(to_sql(expr.name("tmp")), "out.sql")


Expand Down
Loading

0 comments on commit 83db19d

Please sign in to comment.