From 74b39edb1fc724f458a199a8e94fa2e233d78745 Mon Sep 17 00:00:00 2001 From: yonishelach Date: Wed, 20 Nov 2024 19:03:11 +0200 Subject: [PATCH 1/3] Fix build image --- Dockerfile | 18 ++++++++-------- notebook.ipynb | 2 +- project_setup.py | 56 ++++++++++++++++++++++++++++++------------------ 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5465e6c..07f7586 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,20 @@ -FROM mlrun/mlrun-gpu +FROM mlrun/mlrun-gpu:1.7.0 # Update apt-get to install ffmpeg (support audio file formats): RUN apt-get update -y RUN apt-get install ffmpeg -y # Install demo requirements: -RUN pip install -U mlrun -RUN pip install -U git+https://github.com/huggingface/transformers.git -RUN pip install tqdm mpi4py -RUN pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -RUN pip install pyannote.audio faster-whisper bitsandbytes accelerate datasets peft optimum -RUN pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ -RUN pip install langchain openai + +RUN pip install transformers==4.44.1 +RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 +RUN pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2 +RUN pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ +RUN pip install langchain==0.0.327 openai==0.28.1 RUN pip install git+https://github.com/suno-ai/bark.git -RUN pip install streamlit st-annotated-text spacy librosa presidio-anonymizer presidio-analyzer nltk flair +RUN pip install streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0 RUN python -m spacy download en_core_web_lg +RUN pip install -U SQLAlchemy # Align onnxruntime to use gpu: RUN pip uninstall -y onnxruntime-gpu diff --git a/notebook.ipynb b/notebook.ipynb index 5c3112f..278471e 100644 --- a/notebook.ipynb +++ b/notebook.ipynb @@ -187,8 +187,8 @@ " name=\"call-center-demo\",\n", " user_project=True,\n", " parameters={\n", + " \"build_image\": True,\n", " \"source\": \"git://github.com/mlrun/demo-call-center.git#main\",\n", - " \"default_image\": \"yonishelach/call-center-transformers\",\n", " \"gpus\": 1 if run_with_gpu else 0 ,\n", " },\n", ")" diff --git a/project_setup.py b/project_setup.py index 176848f..7454eb0 100644 --- a/project_setup.py +++ b/project_setup.py @@ -37,21 +37,25 @@ def setup( # Unpack parameters: source = project.get_param(key="source") - default_image = project.get_param(key="default_image") + default_image = project.get_param(key="default_image", default=None) + build_image = project.get_param(key="build_image", default=False) gpus = project.get_param(key="gpus", default=0) node_name = project.get_param(key="node_name", default=None) + node_selector = project.get_param(key="node_selector", default={"alpha.eksctl.io/nodegroup-name": "added-t4"}) # Set the project git source: if source: print(f"Project Source: {source}") project.set_source(source=source, pull_at_runtime=True) - # Set or build the default image: - if default_image is None: + # Set default image: + if default_image: + project.set_default_image(default_image) + + # Build the image: + if build_image: print("Building default image for the demo:") _build_image(project=project) - else: - project.set_default_image(default_image) # Set the secrets: _set_secrets( @@ -65,8 +69,8 @@ def setup( mlrun.get_run_db().get_hub_catalog(source_name="default", force_refresh=True) # Set the functions: - _set_calls_generation_functions(project=project, gpus=gpus, node_name=node_name) - _set_calls_analysis_functions(project=project, gpus=gpus, node_name=node_name) + _set_calls_generation_functions(project=project, gpus=gpus, node_name=node_name, node_selector=node_selector) + _set_calls_analysis_functions(project=project, gpus=gpus, node_name=node_name, node_selector=node_selector) # Set the workflows: _set_workflows(project=project) @@ -84,21 +88,19 @@ def _build_image(project: mlrun.projects.MlrunProject): base_image="mlrun/mlrun-gpu", commands=[ # Update apt-get to install ffmpeg (support audio file formats): - "apt-get update -y", - "apt-get install ffmpeg -y", + "apt-get update -y && apt-get install ffmpeg -y", # Install demo requirements: - "pip install tqdm mpi4py", - "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118", - "pip install pyannote.audio faster-whisper bitsandbytes transformers accelerate datasets peft optimum", - "pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/", - "pip install langchain openai", + "pip install transformers==4.44.1", + "pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118", + "pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2", + "pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/", + "pip install langchain==0.0.327 openai==0.28.1", "pip install git+https://github.com/suno-ai/bark.git", # suno-bark - "pip install streamlit st-annotated-text spacy librosa presidio-anonymizer presidio-analyzer nltk flair", + "pip install streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0", + "python -m spacy download en_core_web_lg", "pip install -U SQLAlchemy", - "pip uninstall -y onnxruntime-gpu", - "pip uninstall -y onnxruntime", + "pip uninstall -y onnxruntime-gpu onnxruntime", "pip install onnxruntime-gpu", - "python -m spacy download en_core_web_lg", ], set_as_default=True, ) @@ -129,6 +131,7 @@ def _set_function( node_name: str = None, with_repo: bool = None, image: str = None, + node_selector: dict = None, ): # Set the given function: if with_repo is None: @@ -139,7 +142,7 @@ def _set_function( # Configure GPUs according to the given kind: if gpus >= 1: - mlrun_function.with_node_selection(node_selector={"alpha.eksctl.io/nodegroup-name": "added-t4"}) + mlrun_function.with_node_selection(node_selector=node_selector) if kind == "mpijob": # 1 GPU for each rank: mlrun_function.with_limits(gpus=1) @@ -157,7 +160,8 @@ def _set_function( def _set_calls_generation_functions( project: mlrun.projects.MlrunProject, gpus: int, - node_name: str = None + node_name: str = None, + node_selector: dict = None, ): # Client and agent data generator _set_function( @@ -166,6 +170,7 @@ def _set_calls_generation_functions( name="structured-data-generator", kind="job", node_name=node_name, + node_selector=node_selector, ) # Conversation generator: @@ -175,6 +180,7 @@ def _set_calls_generation_functions( name="conversations-generator", kind="job", node_name=node_name, + node_selector=node_selector, ) # Text to audio generator: @@ -184,13 +190,15 @@ def _set_calls_generation_functions( name="text-to-audio-generator", kind="job", # TODO: MPI once MLRun supports it out of the box gpus=gpus, + node_selector=node_selector, ) def _set_calls_analysis_functions( project: mlrun.projects.MlrunProject, gpus: int, - node_name: str = None + node_name: str = None, + node_selector: dict = None, ): # DB management: _set_function( @@ -199,6 +207,7 @@ def _set_calls_analysis_functions( name="db-management", kind="job", node_name=node_name, + node_selector=node_selector, ) # Speech diarization: @@ -208,6 +217,7 @@ def _set_calls_analysis_functions( name="silero-vad", kind="job", node_name=node_name, + node_selector=node_selector, ) # Transcription: @@ -218,6 +228,7 @@ def _set_calls_analysis_functions( kind="mpijob" if gpus > 1 else "job", gpus=gpus, node_name=node_name, + node_selector=node_selector, ) # PII recognition: @@ -228,6 +239,7 @@ def _set_calls_analysis_functions( kind="job", node_name=node_name, image="guyliguazio/call-center-11.8:1.4.1.6", + node_selector=node_selector, ) # Question answering: @@ -238,6 +250,7 @@ def _set_calls_analysis_functions( kind="job", gpus=gpus, node_name=node_name, + node_selector=node_selector, ) # Postprocessing: @@ -248,6 +261,7 @@ def _set_calls_analysis_functions( with_repo=False, kind="job", node_name=node_name, + node_selector=node_selector, ) From c05843d596075ff9ecb3be8c36895b5ebfcdbfa0 Mon Sep 17 00:00:00 2001 From: yonishelach Date: Thu, 21 Nov 2024 10:12:22 +0200 Subject: [PATCH 2/3] remove node_selector parameter on-non gpu functions --- project_setup.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/project_setup.py b/project_setup.py index 7454eb0..f1670be 100644 --- a/project_setup.py +++ b/project_setup.py @@ -170,7 +170,6 @@ def _set_calls_generation_functions( name="structured-data-generator", kind="job", node_name=node_name, - node_selector=node_selector, ) # Conversation generator: @@ -180,7 +179,6 @@ def _set_calls_generation_functions( name="conversations-generator", kind="job", node_name=node_name, - node_selector=node_selector, ) # Text to audio generator: @@ -207,7 +205,6 @@ def _set_calls_analysis_functions( name="db-management", kind="job", node_name=node_name, - node_selector=node_selector, ) # Speech diarization: @@ -217,7 +214,6 @@ def _set_calls_analysis_functions( name="silero-vad", kind="job", node_name=node_name, - node_selector=node_selector, ) # Transcription: @@ -239,7 +235,6 @@ def _set_calls_analysis_functions( kind="job", node_name=node_name, image="guyliguazio/call-center-11.8:1.4.1.6", - node_selector=node_selector, ) # Question answering: @@ -261,7 +256,6 @@ def _set_calls_analysis_functions( with_repo=False, kind="job", node_name=node_name, - node_selector=node_selector, ) From 85763c5265feaccb4f7a29ce7b198e3b0e5b1ad3 Mon Sep 17 00:00:00 2001 From: yonishelach Date: Thu, 21 Nov 2024 10:51:33 +0200 Subject: [PATCH 3/3] remove pii image --- project_setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/project_setup.py b/project_setup.py index f1670be..0605734 100644 --- a/project_setup.py +++ b/project_setup.py @@ -234,7 +234,6 @@ def _set_calls_analysis_functions( name="pii-recognition", kind="job", node_name=node_name, - image="guyliguazio/call-center-11.8:1.4.1.6", ) # Question answering: