From 363a66b1c9357f5e5b048fdde9a50ddc2605abcd Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:35:21 -0800 Subject: [PATCH] Fix broken NeMo dependencies (#372) * add packaging Signed-off-by: Sarah Yurick * move to requires Signed-off-by: Sarah Yurick * move to github ci file Signed-off-by: Sarah Yurick * add pin Signed-off-by: Sarah Yurick * add torch Signed-off-by: Sarah Yurick * add suggestion from mamba readme Signed-off-by: Sarah Yurick * try github install Signed-off-by: Sarah Yurick * add comma Signed-off-by: Sarah Yurick * another attempt Signed-off-by: Sarah Yurick * remove nemo toolkit Signed-off-by: Sarah Yurick * add datasets Signed-off-by: Sarah Yurick * try removing cython Signed-off-by: Sarah Yurick * remove cython Signed-off-by: Sarah Yurick * sentencepiece Signed-off-by: Sarah Yurick * run black Signed-off-by: Sarah Yurick * apply ryan's suggestion Signed-off-by: Sarah Yurick --------- Signed-off-by: Sarah Yurick --- .github/workflows/test.yml | 3 +-- Dockerfile | 2 +- README.md | 2 -- docs/user-guide/image/gettingstarted.rst | 2 -- nemo_curator/filters/code.py | 8 ++++---- pyproject.toml | 3 ++- tutorials/image-curation/image-curation.ipynb | 2 +- ...Synthetic Data Generation - Hello World Examples.ipynb | 4 ++-- 8 files changed, 11 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index baa968f47..1d8cc9258 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -37,9 +37,8 @@ jobs: # Installing wheel beforehand due to fasttext issue: # https://github.com/facebookresearch/fastText/issues/512#issuecomment-1837367666 - # Explicitly install cython: https://github.com/VKCOM/YouTokenToMe/issues/94 run: | - pip install wheel cython + pip install wheel pip install --no-cache-dir . pip install pytest - name: Run tests diff --git a/Dockerfile b/Dockerfile index 16ddd54af..51fe7be43 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,7 @@ RUN conda create -y --name curator -c conda-forge -c nvidia \ libcusparse \ libcusolver && \ source activate curator && \ - pip install --upgrade cython pytest pip + pip install --upgrade pytest pip RUN \ --mount=type=bind,source=/opt/NeMo-Curator/nemo_curator/__init__.py,target=/opt/NeMo-Curator/nemo_curator/__init__.py,from=curator-update \ diff --git a/README.md b/README.md index 5cba9d103..4513c7af6 100644 --- a/README.md +++ b/README.md @@ -83,14 +83,12 @@ You can get NeMo-Curator in 3 ways. #### PyPi ```bash -pip install cython pip install --extra-index-url https://pypi.nvidia.com nemo-curator[all] ``` #### Source ```bash git clone https://github.com/NVIDIA/NeMo-Curator.git -pip install cython pip install --extra-index-url https://pypi.nvidia.com "./NeMo-Curator[all]" ``` diff --git a/docs/user-guide/image/gettingstarted.rst b/docs/user-guide/image/gettingstarted.rst index dae4240d1..2ccacb25e 100644 --- a/docs/user-guide/image/gettingstarted.rst +++ b/docs/user-guide/image/gettingstarted.rst @@ -33,7 +33,6 @@ NeMo Curator's PyPi page can be found `here =0.0.6", "dask-mpi>=2021.11.0", "dask[complete]>=2021.7.1", + "datasets", "distributed>=2021.7.1", "fasttext==0.9.2", "ftfy==6.1.1", @@ -54,7 +55,6 @@ dependencies = [ "lxml_html_clean", "mecab-python3", "mwparserfromhell==0.6.5", - "nemo_toolkit[nlp]>=1.23.0", "numpy<2", "openai", "peft", @@ -62,6 +62,7 @@ dependencies = [ "presidio-anonymizer==2.2.351", "pycld2", "resiliparse", + "sentencepiece", "spacy>=3.6.0, <3.8.0", "unidic-lite==1.0.8", "usaddress==0.5.10", diff --git a/tutorials/image-curation/image-curation.ipynb b/tutorials/image-curation/image-curation.ipynb index 947fbfef1..1ac3c1029 100644 --- a/tutorials/image-curation/image-curation.ipynb +++ b/tutorials/image-curation/image-curation.ipynb @@ -49,7 +49,7 @@ }, "outputs": [], "source": [ - "!pip install cython ipywidgets aiofiles\n", + "!pip install ipywidgets aiofiles\n", "# Install from source by default\n", "!pip install --extra-index-url https://pypi.nvidia.com ../../[image]\n", "%env DASK_DATAFRAME__QUERY_PLANNING False" diff --git a/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb b/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb index bbe0ed8c8..1bc14d14c 100644 --- a/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb +++ b/tutorials/synthetic-data-hello-world/Synthetic Data Generation - Hello World Examples.ipynb @@ -58,11 +58,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "!pip install -qU wheel cython" + "!pip install -qU wheel" ] }, {