From bf2f8f29328dacc51ff69e069a09bebf6eb0bd0c Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:17:56 -0700 Subject: [PATCH] Remove `DASK_DATAFRAME__QUERY_PLANNING` environment variable (#270) * remove DASK_DATAFRAME__QUERY_PLANNING False Signed-off-by: Sarah Yurick * isort Signed-off-by: Sarah Yurick --------- Signed-off-by: Sarah Yurick --- examples/translation_example.py | 3 --- nemo_curator/classifiers/aegis.py | 1 - nemo_curator/classifiers/fineweb_edu.py | 2 -- nemo_curator/scripts/semdedup/clustering.py | 1 - tests/test_semdedup.py | 2 -- .../distributed_data_classification.ipynb | 4 +--- tutorials/single_node_tutorial/single_gpu_tutorial.ipynb | 1 - 7 files changed, 1 insertion(+), 13 deletions(-) diff --git a/examples/translation_example.py b/examples/translation_example.py index 13777085b..d9d3be734 100644 --- a/examples/translation_example.py +++ b/examples/translation_example.py @@ -1,7 +1,4 @@ import os - -os.environ["DASK_DATAFRAME__QUERY_PLANNING"] = "False" -import argparse import re import time from dataclasses import dataclass diff --git a/nemo_curator/classifiers/aegis.py b/nemo_curator/classifiers/aegis.py index 703ac56df..1306903fa 100644 --- a/nemo_curator/classifiers/aegis.py +++ b/nemo_curator/classifiers/aegis.py @@ -14,7 +14,6 @@ import os os.environ["RAPIDS_NO_INITIALIZE"] = "1" -os.environ["DASK_DATAFRAME__QUERY_PLANNING"] = "False" from dataclasses import dataclass from functools import lru_cache from typing import List, Optional, Union diff --git a/nemo_curator/classifiers/fineweb_edu.py b/nemo_curator/classifiers/fineweb_edu.py index 9722b2823..9547fc8fe 100644 --- a/nemo_curator/classifiers/fineweb_edu.py +++ b/nemo_curator/classifiers/fineweb_edu.py @@ -14,7 +14,6 @@ import os os.environ["RAPIDS_NO_INITIALIZE"] = "1" -os.environ["DASK_DATAFRAME__QUERY_PLANNING"] = "False" import torch from crossfit import op from crossfit.backend.torch.hf.model import HFModel @@ -23,7 +22,6 @@ from nemo_curator.classifiers.base import ( DistributedDataClassifier, _get_suggest_memory_for_classifier, - _run_classifier_helper, ) from nemo_curator.datasets import DocumentDataset diff --git a/nemo_curator/scripts/semdedup/clustering.py b/nemo_curator/scripts/semdedup/clustering.py index 82b83c54b..c4ff21f93 100644 --- a/nemo_curator/scripts/semdedup/clustering.py +++ b/nemo_curator/scripts/semdedup/clustering.py @@ -16,7 +16,6 @@ import os from datetime import datetime -os.environ["DASK_DATAFRAME__QUERY_PLANNING"] = "False" import dask_cudf from nemo_curator.datasets import DocumentDataset diff --git a/tests/test_semdedup.py b/tests/test_semdedup.py index 3b4216e01..65d109e4c 100644 --- a/tests/test_semdedup.py +++ b/tests/test_semdedup.py @@ -14,8 +14,6 @@ import os import pytest - -os.environ["DASK_DATAFRAME__QUERY_PLANNING"] = "False" from dask.dataframe.utils import assert_eq from distributed import Client diff --git a/tutorials/distributed_data_classification/distributed_data_classification.ipynb b/tutorials/distributed_data_classification/distributed_data_classification.ipynb index c5bd5f32d..4b855ba89 100644 --- a/tutorials/distributed_data_classification/distributed_data_classification.ipynb +++ b/tutorials/distributed_data_classification/distributed_data_classification.ipynb @@ -20,8 +20,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "env: PYTHONWARNINGS=ignore\n", - "env: DASK_DATAFRAME__QUERY_PLANNING=False\n" + "env: PYTHONWARNINGS=ignore\n" ] } ], @@ -29,7 +28,6 @@ "# Silence Warnings (HuggingFace internal warnings)\n", "\n", "%env PYTHONWARNINGS=ignore\n", - "%env DASK_DATAFRAME__QUERY_PLANNING=False\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] diff --git a/tutorials/single_node_tutorial/single_gpu_tutorial.ipynb b/tutorials/single_node_tutorial/single_gpu_tutorial.ipynb index 6aa39605a..de585e08d 100644 --- a/tutorials/single_node_tutorial/single_gpu_tutorial.ipynb +++ b/tutorials/single_node_tutorial/single_gpu_tutorial.ipynb @@ -110,7 +110,6 @@ }, "outputs": [], "source": [ - "%env DASK_DATAFRAME__QUERY_PLANNING False\n", "%env CUDA_VISIBLE_DEVICES 0" ] },