refactor: remove ibisNA (ibis-project#9344)

- Closes ibis-project#9311 This PR removes ibis.NA, replacing it appearances for ibis.null() --------- Co-authored-by: Jim Crist-Harif <[email protected]>
jcrist · Jun 11, 2024 · 83db19d · 83db19d
1 parent ad27f9f
commit 83db19d
Show file tree

Hide file tree

Showing 26 changed files with 116 additions and 121 deletions.
diff --git a/docs/_freeze/posts/campaign-finance/index/execute-results/html.json b/docs/_freeze/posts/campaign-finance/index/execute-results/html.json
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -298,10 +298,6 @@ quartodoc:
             - name: param
               dynamic: true
               signature_name: full
-            - name: NA
-              # Ideally exposed under `ibis` but that doesn't seem to work??
-              package: ibis.expr.api
-              signature_name: full
             - name: "null"
               dynamic: true
               signature_name: full

diff --git a/docs/posts/campaign-finance/index.qmd b/docs/posts/campaign-finance/index.qmd
@@ -245,7 +245,7 @@ def get_election_type(pgi: StringValue) -> StringValue:
         "E": "recount",
     }
     first_letter = pgi[0]
-    return first_letter.substitute(election_types, else_=ibis.NA)
+    return first_letter.substitute(election_types, else_=ibis.null())
 
 
 cleaned = cleaned.mutate(election_type=get_election_type(_.TRANSACTION_PGI)).drop(

diff --git a/docs/tutorials/ibis-for-pandas-users.qmd b/docs/tutorials/ibis-for-pandas-users.qmd
@@ -507,7 +507,7 @@ represented by `NaN`. This can be confusing when working with numeric data,
 since `NaN` is also a valid floating point value (along with `+/-inf`).
 
 In Ibis, we try to be more precise: All data types are nullable, and we use
-`ibis.NA` to represent `NULL` values, and all datatypes have a `.isnull()` method.
+`ibis.null()` to represent `NULL` values, and all datatypes have a `.isnull()` method.
 For floating point values, we use different values for `NaN` and `+/-inf`, and there
 are the additional methods `.isnan()` and `.isinf()`.
 

diff --git a/docs/tutorials/ibis-for-sql-users.qmd b/docs/tutorials/ibis-for-sql-users.qmd
@@ -522,10 +522,10 @@ ibis.to_sql(expr)
 
 ### Using `NULL` in expressions
 
-To use `NULL` in an expression, either use the special `ibis.NA` value:
+To use `NULL` in an expression, use `ibis.null()` value:
 
 ```{python}
-pos_two = (t.two > 0).ifelse(t.two, ibis.NA)
+pos_two = (t.two > 0).ifelse(t.two, ibis.null())
 expr = t.mutate(two_positive=pos_two)
 ibis.to_sql(expr)
 ```

diff --git a/ibis/__init__.py b/ibis/__init__.py
@@ -4,6 +4,9 @@
 
 __version__ = "9.0.0"
 
+import warnings
+from typing import Any
+
 from ibis import examples, util
 from ibis.backends import BaseBackend
 from ibis.common.exceptions import IbisError
@@ -36,7 +39,7 @@ def __dir__() -> list[str]:
     return sorted(out)
 
 
-def __getattr__(name: str) -> BaseBackend:
+def load_backend(name: str) -> BaseBackend:
     """Load backends in a lazy way with `ibis.<backend-name>`.
 
     This also registers the backend options.
@@ -52,6 +55,7 @@ def __getattr__(name: str) -> BaseBackend:
     attribute is "cached", so this function is only called the first time.
 
     """
+
     entry_points = {ep for ep in util.backend_entry_points() if ep.name == name}
 
     if not entry_points:
@@ -123,3 +127,17 @@ def connect(*args, **kwargs):
         setattr(proxy, name, getattr(backend, name))
 
     return proxy
+
+
+def __getattr__(name: str) -> Any:
+    if name == "NA":
+        warnings.warn(
+            "The 'ibis.NA' constant is deprecated as of v9.1 and will be removed in a future "
+            "version. Use 'ibis.null()' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+        return null()  # noqa: F405
+    else:
+        return load_backend(name)
diff --git a/ibis/backends/clickhouse/tests/test_functions.py b/ibis/backends/clickhouse/tests/test_functions.py
@@ -116,8 +116,8 @@ def test_isnull_notnull(con, expr, expected):
     ("expr", "expected"),
     [
         (ibis.coalesce(5, None, 4), 5),
-        (ibis.coalesce(ibis.NA, 4, ibis.NA), 4),
-        (ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14),
+        (ibis.coalesce(ibis.null(), 4, ibis.null()), 4),
+        (ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14),
     ],
 )
 def test_coalesce(con, expr, expected):
@@ -127,7 +127,7 @@ def test_coalesce(con, expr, expected):
 @pytest.mark.parametrize(
     ("expr", "expected"),
     [
-        (ibis.NA.fill_null(5), 5),
+        (ibis.null().fill_null(5), 5),
         (L(5).fill_null(10), 5),
         (L(5).nullif(5), None),
         (L(10).nullif(5), 10),
@@ -150,7 +150,7 @@ def test_fill_null_nullif(con, expr, expected):
         (L(datetime(2015, 9, 1, hour=14, minute=48, second=5)), "DateTime"),
         (L(date(2015, 9, 1)), "Date"),
         param(
-            ibis.NA,
+            ibis.null(),
             "Null",
             marks=pytest.mark.xfail(
                 raises=AssertionError,
@@ -418,7 +418,7 @@ def test_numeric_builtins_work(alltypes, df):
 def test_null_column(alltypes):
     t = alltypes
     nrows = t.count().execute()
-    expr = t.mutate(na_column=ibis.NA).na_column
+    expr = t.mutate(na_column=ibis.null()).na_column
     result = expr.execute()
     expected = pd.Series([None] * nrows, name="na_column")
     tm.assert_series_equal(result, expected)

diff --git a/ibis/backends/dask/tests/test_window.py b/ibis/backends/dask/tests/test_window.py
@@ -20,7 +20,7 @@ def sort_kind():
     return "mergesort"
 
 
-default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
+default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
 row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
 range_offset = pytest.mark.parametrize(
     "range_offset",
@@ -48,7 +48,7 @@ def test_lead(con, t, df, row_offset, default, row_window):
     expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
     result = expr.execute()
     expected = df.dup_strings.shift(con.execute(-row_offset)).compute()
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 
@@ -59,7 +59,7 @@ def test_lag(con, t, df, row_offset, default, row_window):
     expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
     result = expr.execute()
     expected = df.dup_strings.shift(con.execute(row_offset)).compute()
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 
@@ -78,7 +78,7 @@ def test_lead_delta(con, t, pandas_df, range_offset, default, range_window):
         .reindex(pandas_df.plain_datetimes_naive)
         .reset_index(drop=True)
     )
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 
@@ -98,7 +98,7 @@ def test_lag_delta(t, con, pandas_df, range_offset, default, range_window):
         .reindex(pandas_df.plain_datetimes_naive)
         .reset_index(drop=True)
     )
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected, check_names=False)
 

diff --git a/ibis/backends/impala/tests/test_case_exprs.py b/ibis/backends/impala/tests/test_case_exprs.py
@@ -100,6 +100,6 @@ def test_identical_to(mockcon, snapshot):
 
 
 def test_identical_to_special_case(snapshot):
-    expr = ibis.NA.cast("int64").identical_to(ibis.NA.cast("int64")).name("tmp")
+    expr = ibis.null().cast("int64").identical_to(ibis.null().cast("int64")).name("tmp")
     result = ibis.to_sql(expr, dialect="impala")
     snapshot.assert_match(result, "out.sql")
diff --git a/ibis/backends/impala/tests/test_exprs.py b/ibis/backends/impala/tests/test_exprs.py
@@ -384,7 +384,7 @@ def test_decimal_timestamp_builtins(con):
         dc * 2,
         dc**2,
         dc.cast("double"),
-        api.ifelse(table.l_discount > 0, dc * table.l_discount, api.NA),
+        api.ifelse(table.l_discount > 0, dc * table.l_discount, api.null()),
         dc.fill_null(0),
         ts < (ibis.now() + ibis.interval(months=3)),
         ts < (ibis.timestamp("2005-01-01") + ibis.interval(months=3)),
@@ -632,10 +632,10 @@ def test_unions_with_ctes(con, alltypes):
 @pytest.mark.parametrize(
     ("left", "right", "expected"),
     [
-        (ibis.NA.cast("int64"), ibis.NA.cast("int64"), True),
+        (ibis.null().cast("int64"), ibis.null().cast("int64"), True),
         (L(1), L(1), True),
-        (ibis.NA.cast("int64"), L(1), False),
-        (L(1), ibis.NA.cast("int64"), False),
+        (ibis.null().cast("int64"), L(1), False),
+        (L(1), ibis.null().cast("int64"), False),
         (L(0), L(1), False),
         (L(1), L(0), False),
     ],

diff --git a/ibis/backends/pandas/tests/test_window.py b/ibis/backends/pandas/tests/test_window.py
@@ -20,7 +20,7 @@ def sort_kind():
     return "mergesort"
 
 
-default = pytest.mark.parametrize("default", [ibis.NA, ibis.literal("a")])
+default = pytest.mark.parametrize("default", [ibis.null(), ibis.literal("a")])
 row_offset = pytest.mark.parametrize("row_offset", list(map(ibis.literal, [-1, 1, 0])))
 range_offset = pytest.mark.parametrize(
     "range_offset",
@@ -49,7 +49,7 @@ def test_lead(t, df, row_offset, default, row_window):
     expr = t.dup_strings.lead(row_offset, default=default).over(row_window)
     result = expr.execute()
     expected = df.dup_strings.shift(con.execute(-row_offset))
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected.rename("tmp"))
 
@@ -61,7 +61,7 @@ def test_lag(t, df, row_offset, default, row_window):
     expr = t.dup_strings.lag(row_offset, default=default).over(row_window)
     result = expr.execute()
     expected = df.dup_strings.shift(con.execute(row_offset))
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected.rename("tmp"))
 
@@ -80,7 +80,7 @@ def test_lead_delta(t, df, range_offset, default, range_window):
         .reindex(df.plain_datetimes_naive)
         .reset_index(drop=True)
     )
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected.rename("tmp"))
 
@@ -100,7 +100,7 @@ def test_lag_delta(t, df, range_offset, default, range_window):
         .reindex(df.plain_datetimes_naive)
         .reset_index(drop=True)
     )
-    if default is not ibis.NA:
+    if default is not ibis.null():
         expected = expected.fillna(con.execute(default))
     tm.assert_series_equal(result, expected.rename("tmp"))
 

diff --git a/ibis/backends/postgres/tests/test_functions.py b/ibis/backends/postgres/tests/test_functions.py
@@ -150,7 +150,7 @@ def test_strftime(con, pattern):
     [
         param(L("foo_bar"), "text", id="text"),
         param(L(5), "integer", id="integer"),
-        param(ibis.NA, "null", id="null"),
+        param(ibis.null(), "null", id="null"),
         # TODO(phillipc): should this really be double?
         param(L(1.2345), "numeric", id="numeric"),
         param(
@@ -335,7 +335,7 @@ def test_regexp_extract(con, expr, expected):
 @pytest.mark.parametrize(
     ("expr", "expected"),
     [
-        param(ibis.NA.fill_null(5), 5, id="filled"),
+        param(ibis.null().fill_null(5), 5, id="filled"),
         param(L(5).fill_null(10), 5, id="not_filled"),
         param(L(5).nullif(5), None, id="nullif_null"),
         param(L(10).nullif(5), 10, id="nullif_not_null"),
@@ -349,8 +349,8 @@ def test_fill_null_nullif(con, expr, expected):
     ("expr", "expected"),
     [
         param(ibis.coalesce(5, None, 4), 5, id="first"),
-        param(ibis.coalesce(ibis.NA, 4, ibis.NA), 4, id="second"),
-        param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="third"),
+        param(ibis.coalesce(ibis.null(), 4, ibis.null()), 4, id="second"),
+        param(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14, id="third"),
     ],
 )
 def test_coalesce(con, expr, expected):
@@ -360,12 +360,12 @@ def test_coalesce(con, expr, expected):
 @pytest.mark.parametrize(
     ("expr", "expected"),
     [
-        param(ibis.coalesce(ibis.NA, ibis.NA), None, id="all_null"),
+        param(ibis.coalesce(ibis.null(), ibis.null()), None, id="all_null"),
         param(
             ibis.coalesce(
-                ibis.NA.cast("int8"),
-                ibis.NA.cast("int8"),
-                ibis.NA.cast("int8"),
+                ibis.null().cast("int8"),
+                ibis.null().cast("int8"),
+                ibis.null().cast("int8"),
             ),
             None,
             id="all_nulls_with_all_cast",
@@ -377,7 +377,7 @@ def test_coalesce_all_na(con, expr, expected):
 
 
 def test_coalesce_all_na_double(con):
-    expr = ibis.coalesce(ibis.NA, ibis.NA, ibis.NA.cast("double"))
+    expr = ibis.coalesce(ibis.null(), ibis.null(), ibis.null().cast("double"))
     assert np.isnan(con.execute(expr))
 
 
@@ -815,14 +815,14 @@ def test_first_last_value(alltypes, df, func, expected_index):
 def test_null_column(alltypes):
     t = alltypes
     nrows = t.count().execute()
-    expr = t.mutate(na_column=ibis.NA).na_column
+    expr = t.mutate(na_column=ibis.null()).na_column
     result = expr.execute()
     tm.assert_series_equal(result, pd.Series([None] * nrows, name="na_column"))
 
 
 def test_null_column_union(alltypes, df):
     t = alltypes
-    s = alltypes[["double_col"]].mutate(string_col=ibis.NA.cast("string"))
+    s = alltypes[["double_col"]].mutate(string_col=ibis.null().cast("string"))
     expr = t[["double_col", "string_col"]].union(s)
     result = expr.execute()
     nrows = t.count().execute()

diff --git a/ibis/backends/risingwave/tests/test_functions.py b/ibis/backends/risingwave/tests/test_functions.py
@@ -166,7 +166,7 @@ def test_regexp(con, expr, expected):
 @pytest.mark.parametrize(
     ("expr", "expected"),
     [
-        param(ibis.NA.fill_null(5), 5, id="filled"),
+        param(ibis.null().fill_null(5), 5, id="filled"),
         param(L(5).fill_null(10), 5, id="not_filled"),
         param(L(5).nullif(5), None, id="nullif_null"),
         param(L(10).nullif(5), 10, id="nullif_not_null"),
@@ -180,8 +180,8 @@ def test_fill_null_nullif(con, expr, expected):
     ("expr", "expected"),
     [
         param(ibis.coalesce(5, None, 4), 5, id="first"),
-        param(ibis.coalesce(ibis.NA, 4, ibis.NA), 4, id="second"),
-        param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="third"),
+        param(ibis.coalesce(ibis.null(), 4, ibis.null()), 4, id="second"),
+        param(ibis.coalesce(ibis.null(), ibis.null(), 3.14), 3.14, id="third"),
     ],
 )
 def test_coalesce(con, expr, expected):
@@ -191,12 +191,12 @@ def test_coalesce(con, expr, expected):
 @pytest.mark.parametrize(
     ("expr", "expected"),
     [
-        param(ibis.coalesce(ibis.NA, ibis.NA), None, id="all_null"),
+        param(ibis.coalesce(ibis.null(), ibis.null()), None, id="all_null"),
         param(
             ibis.coalesce(
-                ibis.NA.cast("int8"),
-                ibis.NA.cast("int8"),
-                ibis.NA.cast("int8"),
+                ibis.null().cast("int8"),
+                ibis.null().cast("int8"),
+                ibis.null().cast("int8"),
             ),
             None,
             id="all_nulls_with_all_cast",
@@ -208,7 +208,7 @@ def test_coalesce_all_na(con, expr, expected):
 
 
 def test_coalesce_all_na_double(con):
-    expr = ibis.coalesce(ibis.NA, ibis.NA, ibis.NA.cast("double"))
+    expr = ibis.coalesce(ibis.null(), ibis.null(), ibis.null().cast("double"))
     assert np.isnan(con.execute(expr))
 
 
@@ -595,7 +595,7 @@ def test_first_last_value(alltypes, df, func, expected_index):
 def test_null_column(alltypes):
     t = alltypes
     nrows = t.count().execute()
-    expr = t.mutate(na_column=ibis.NA).na_column
+    expr = t.mutate(na_column=ibis.null()).na_column
     result = expr.execute()
     tm.assert_series_equal(result, pd.Series([None] * nrows, name="na_column"))
 

diff --git a/ibis/backends/sqlite/tests/test_client.py b/ibis/backends/sqlite/tests/test_client.py
@@ -47,7 +47,7 @@ def test_builtin_agg_udf(con):
     def total(x) -> float:
         """Totally total."""
 
-    expr = total(con.tables.functional_alltypes.limit(2).select(n=ibis.NA).n)
+    expr = total(con.tables.functional_alltypes.limit(2).select(n=ibis.null()).n)
     result = con.execute(expr)
     assert result == 0.0
 

diff --git a/ibis/backends/tests/sql/test_sql.py b/ibis/backends/tests/sql/test_sql.py
@@ -121,7 +121,7 @@ def test_coalesce(functional_alltypes, snapshot):
     d = functional_alltypes.double_col
     f = functional_alltypes.float_col
 
-    expr = ibis.coalesce((d > 30).ifelse(d, ibis.NA), ibis.NA, f).name("tmp")
+    expr = ibis.coalesce((d > 30).ifelse(d, ibis.null()), ibis.null(), f).name("tmp")
     snapshot.assert_match(to_sql(expr.name("tmp")), "out.sql")