Skip to content

Commit

Permalink
Pin dask to 2024.1.1 (#1301)
Browse files Browse the repository at this point in the history
* Bump dask min version to 2023.6.0

* Remove dask compat code

* Linting

* Bump pyarrow and uvicorn deps to unblock environment solve

* Undo unintentional pytest mindep change

* Pin to dask 2024.1.1

* Tighten sklearn xfail in test_model.py

* Drop tpot from 3.12 CI deps

* Explicitly add xgboost to testing deps
  • Loading branch information
charlesbluca authored Mar 1, 2024
1 parent e9db6af commit 6706433
Show file tree
Hide file tree
Showing 21 changed files with 47 additions and 105 deletions.
4 changes: 2 additions & 2 deletions continuous_integration/docker/conda.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
python>=3.9
dask>=2022.3.0
dask==2024.1.1
pandas>=1.4.0
jpype1>=1.0.2
openjdk>=8
Expand All @@ -12,7 +12,7 @@ sphinx>=3.2.1
tzlocal>=2.1
fastapi>=0.92.0
httpx>=0.24.1
uvicorn>=0.13.4
uvicorn>=0.14
pyarrow>=14.0.1
prompt_toolkit>=3.0.8
pygments>=2.7.1
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/docker/main.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ RUN mamba install -y \
# build requirements
"maturin>=1.3,<1.4" \
# core dependencies
"dask>=2022.3.0" \
"dask==2024.1.1" \
"pandas>=1.4.0" \
"fastapi>=0.92.0" \
"httpx>=0.24.1" \
"uvicorn>=0.13.4" \
"uvicorn>=0.14" \
"tzlocal>=2.1" \
"prompt_toolkit>=3.0.8" \
"pygments>=2.7.1" \
Expand Down
5 changes: 3 additions & 2 deletions continuous_integration/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -26,12 +26,13 @@ dependencies:
- pytest-xdist
- pytest
- python=3.10
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- zlib
5 changes: 3 additions & 2 deletions continuous_integration/environment-3.11.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -26,12 +26,13 @@ dependencies:
- pytest-xdist
- pytest
- python=3.11
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- zlib
8 changes: 5 additions & 3 deletions continuous_integration/environment-3.12.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -27,12 +27,14 @@ dependencies:
- pytest-xdist
- pytest
- python=3.12
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# TODO: add once tpot supports python 3.12
# - tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- zlib
5 changes: 3 additions & 2 deletions continuous_integration/environment-3.9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask=2022.3.0
- dask=2024.1.1
- fastapi=0.92.0
- fugue=0.7.3
- httpx=0.24.1
Expand All @@ -26,6 +26,7 @@ dependencies:
- pytest-xdist
- pytest
- python=3.9
- py-xgboost=1.7.0
- scikit-learn=1.0.0
- sphinx
# TODO: remove this constraint when we require pandas>2
Expand All @@ -34,5 +35,5 @@ dependencies:
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal=2.1
- uvicorn=0.13.4
- uvicorn=0.14
- zlib
5 changes: 3 additions & 2 deletions continuous_integration/gpuci/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ channels:
dependencies:
- c-compiler
- zlib
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -32,14 +32,15 @@ dependencies:
- pytest-xdist
- pytest
- python=3.10
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
# GPU-specific requirements
- cudatoolkit=11.8
- cudf=24.04
Expand Down
5 changes: 3 additions & 2 deletions continuous_integration/gpuci/environment-3.9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ channels:
dependencies:
- c-compiler
- zlib
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -32,14 +32,15 @@ dependencies:
- pytest-xdist
- pytest
- python=3.9
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
# GPU-specific requirements
- cudatoolkit=11.8
- cudf=24.04
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ requirements:
- xz # [linux64]
run:
- python
- dask >=2022.3.0
- dask ==2024.1.1
- pandas >=1.4.0
- fastapi >=0.92.0
- httpx >=0.24.1
- uvicorn >=0.13.4
- uvicorn >=0.14
- tzlocal >=2.1
- prompt-toolkit >=3.0.8
- pygments >=2.7.1
Expand Down
11 changes: 0 additions & 11 deletions dask_sql/_compat.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,12 @@
import dask
import pandas as pd
import prompt_toolkit
from packaging.version import parse as parseVersion

_pandas_version = parseVersion(pd.__version__)
_prompt_toolkit_version = parseVersion(prompt_toolkit.__version__)
_dask_version = parseVersion(dask.__version__)

INDEXER_WINDOW_STEP_IMPLEMENTED = _pandas_version >= parseVersion("1.5.0")
PANDAS_GT_200 = _pandas_version >= parseVersion("2.0.0")

# TODO: remove if prompt-toolkit min version gets bumped
PIPE_INPUT_CONTEXT_MANAGER = _prompt_toolkit_version >= parseVersion("3.0.29")

# TODO: remove when dask min version gets bumped
BROADCAST_JOIN_SUPPORT_WORKING = _dask_version > parseVersion("2023.1.0")

# Parquet predicate-support version checks
PQ_NOT_IN_SUPPORT = parseVersion(dask.__version__) > parseVersion("2023.5.1")
PQ_IS_SUPPORT = parseVersion(dask.__version__) >= parseVersion("2023.3.1")

DASK_CUDF_TODATETIME_SUPPORT = _dask_version >= parseVersion("2023.5.1")
9 changes: 0 additions & 9 deletions dask_sql/physical/rel/logical/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from dask.base import tokenize
from dask.highlevelgraph import HighLevelGraph

from dask_sql._compat import BROADCAST_JOIN_SUPPORT_WORKING
from dask_sql.datacontainer import ColumnContainer, DataContainer
from dask_sql.physical.rel.base import BaseRelPlugin
from dask_sql.physical.rel.logical.filter import filter_or_scalar
Expand Down Expand Up @@ -259,14 +258,6 @@ def _join_on_columns(
added_columns = list(lhs_columns_to_add.keys())

broadcast = dask_config.get("sql.join.broadcast")
if not BROADCAST_JOIN_SUPPORT_WORKING and (
isinstance(broadcast, float) or broadcast
):
warnings.warn(
"Broadcast Joins may not work as expected with dask<2023.1.1"
"For more information refer to https://github.com/dask/dask/issues/9851"
" and https://github.com/dask/dask/issues/9870"
)
if join_type == "leftanti" and not is_cudf_type(df_lhs_with_tmp):
df = df_lhs_with_tmp.merge(
df_rhs_with_tmp,
Expand Down
15 changes: 5 additions & 10 deletions dask_sql/physical/rex/core/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from dask.highlevelgraph import HighLevelGraph
from dask.utils import random_state_data

from dask_sql._compat import DASK_CUDF_TODATETIME_SUPPORT
from dask_sql._datafusion_lib import SqlTypeName
from dask_sql.datacontainer import DataContainer
from dask_sql.mappings import (
Expand Down Expand Up @@ -964,15 +963,11 @@ def date_part(self, what, df: SeriesOrScalar):
elif what in {"YEAR", "YEARS"}:
return df.year
elif what == "DATE":
if isinstance(df, pd.Timestamp):
return df.date()
else:
if is_cudf_type(df) and not DASK_CUDF_TODATETIME_SUPPORT:
raise RuntimeError(
"Dask-cuDF to_datetime support requires Dask version >= 2023.5.1"
)
else:
return dd.to_datetime(df.strftime("%Y-%m-%d"))
return (
df.date()
if isinstance(df, pd.Timestamp)
else dd.to_datetime(df.strftime("%Y-%m-%d"))
)
else:
raise NotImplementedError(f"Extraction of {what} is not (yet) implemented.")

Expand Down
6 changes: 0 additions & 6 deletions dask_sql/physical/utils/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
from dask.layers import DataFrameIOLayer
from dask.utils import M, apply, is_arraylike

from dask_sql._compat import PQ_IS_SUPPORT, PQ_NOT_IN_SUPPORT

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -501,8 +499,6 @@ def _get_blockwise_input(input_index, indices: list, dsk: RegenerableGraph):


def _inv(symbol: str):
if symbol == "in" and not PQ_NOT_IN_SUPPORT:
raise ValueError("This version of dask does not support 'not in'")
return {
">": "<",
"<": ">",
Expand Down Expand Up @@ -568,8 +564,6 @@ def _blockwise_isin_dnf(op, indices: list, dsk: RegenerableGraph) -> DNF:

def _blockwise_isna_dnf(op, indices: list, dsk: RegenerableGraph) -> DNF:
# Return DNF expression pattern for `isna`
if not PQ_IS_SUPPORT:
raise ValueError("This version of dask does not support 'is' predicates.")
left = _get_blockwise_input(0, indices, dsk)
return DNF((left, "is", None))

Expand Down
4 changes: 2 additions & 2 deletions docs/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ dependencies:
- sphinx>=4.0.0
- sphinx-tabs
- dask-sphinx-theme>=2.0.3
- dask>=2022.3.0
- dask==2024.1.1
- pandas>=1.4.0
- fugue>=0.7.3
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- fastapi>=0.92.0
- httpx>=0.24.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- tzlocal>=2.1
- prompt_toolkit>=3.0.8
- pygments>=2.7.1
Expand Down
4 changes: 2 additions & 2 deletions docs/requirements-docs.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
sphinx>=4.0.0
sphinx-tabs
dask-sphinx-theme>=3.0.0
dask>=2022.3.0
dask==2024.1.1
pandas>=1.4.0
fugue>=0.7.3
# FIXME: https://github.com/fugue-project/fugue/issues/526
triad<0.9.2
fastapi>=0.92.0
httpx>=0.24.1
uvicorn>=0.13.4
uvicorn>=0.14
tzlocal>=2.1
prompt_toolkit>=3.0.8
pygments>=2.7.1
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ classifiers = [
readme = "README.md"
requires-python = ">=3.9"
dependencies = [
"dask[dataframe]>=2022.3.0",
"distributed>=2022.3.0",
"dask[dataframe]==2024.1.1",
"distributed==2024.1.1",
"pandas>=1.4.0",
"fastapi>=0.92.0",
"httpx>=0.24.1",
"uvicorn>=0.13.4",
"uvicorn>=0.14",
"tzlocal>=2.1",
"prompt_toolkit>=3.0.8",
"pygments>=2.7.1",
Expand Down
9 changes: 0 additions & 9 deletions tests/integration/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dask.utils_test import hlg_layer
from packaging.version import parse as parseVersion

from dask_sql._compat import PQ_IS_SUPPORT, PQ_NOT_IN_SUPPORT
from tests.utils import assert_eq

DASK_GT_2022_4_2 = parseVersion(dask.__version__) >= parseVersion("2022.4.2")
Expand Down Expand Up @@ -182,10 +181,6 @@ def test_filter_year(c):
"SELECT * FROM parquet_ddf WHERE b NOT IN (1, 3, 5, 6)",
lambda x: x[~x["b"].isin([1, 3, 5, 6])],
[[("b", "not in", (1, 3, 5, 6))]],
marks=pytest.mark.skipif(
not PQ_NOT_IN_SUPPORT,
reason="Requires https://github.com/dask/dask/pull/10320",
),
),
(
"SELECT a FROM parquet_ddf WHERE (b > 5 AND b < 10) OR a = 1",
Expand Down Expand Up @@ -317,10 +312,6 @@ def test_filter_decimal(c, gpu):
c.drop_table("df")


@pytest.mark.skipif(
not PQ_IS_SUPPORT,
reason="Requires https://github.com/dask/dask/pull/10320",
)
def test_predicate_pushdown_isna(tmpdir):
from dask_sql.context import Context

Expand Down
5 changes: 0 additions & 5 deletions tests/integration/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dask.utils_test import hlg_layer

from dask_sql import Context
from dask_sql._compat import BROADCAST_JOIN_SUPPORT_WORKING
from dask_sql.datacontainer import Statistics
from tests.utils import assert_eq

Expand Down Expand Up @@ -524,10 +523,6 @@ def test_join_reorder(c):
assert_eq(result_df, expected_df, check_index=False)


@pytest.mark.xfail(
not BROADCAST_JOIN_SUPPORT_WORKING,
reason="Broadcast Joins do not work as expected with dask<2023.1.1",
)
@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
def test_broadcast_join(c, client, gpu):
df1 = dd.from_pandas(
Expand Down
Loading

0 comments on commit 6706433

Please sign in to comment.