From 74b39edb1fc724f458a199a8e94fa2e233d78745 Mon Sep 17 00:00:00 2001
From: yonishelach <yonatanshelach@gmail.com>
Date: Wed, 20 Nov 2024 19:03:11 +0200
Subject: [PATCH 1/3] Fix build image

---
 Dockerfile       | 18 ++++++++--------
 notebook.ipynb   |  2 +-
 project_setup.py | 56 ++++++++++++++++++++++++++++++------------------
 3 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 5465e6c..07f7586 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,20 +1,20 @@
-FROM mlrun/mlrun-gpu
+FROM mlrun/mlrun-gpu:1.7.0
 
 # Update apt-get to install ffmpeg (support audio file formats):
 RUN apt-get update -y
 RUN apt-get install ffmpeg -y
 
 # Install demo requirements:
-RUN pip install -U mlrun
-RUN pip install -U git+https://github.com/huggingface/transformers.git
-RUN pip install tqdm mpi4py
-RUN pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
-RUN pip install pyannote.audio faster-whisper bitsandbytes accelerate datasets peft optimum
-RUN pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
-RUN pip install langchain openai
+
+RUN pip install transformers==4.44.1
+RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
+RUN pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2
+RUN pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
+RUN pip install langchain==0.0.327 openai==0.28.1
 RUN pip install git+https://github.com/suno-ai/bark.git
-RUN pip install streamlit st-annotated-text spacy librosa presidio-anonymizer presidio-analyzer nltk flair
+RUN pip install streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0
 RUN python -m spacy download en_core_web_lg
+RUN pip install -U SQLAlchemy
 
 # Align onnxruntime to use gpu:
 RUN pip uninstall -y onnxruntime-gpu
diff --git a/notebook.ipynb b/notebook.ipynb
index 5c3112f..278471e 100644
--- a/notebook.ipynb
+++ b/notebook.ipynb
@@ -187,8 +187,8 @@
     "    name=\"call-center-demo\",\n",
     "    user_project=True,\n",
     "    parameters={\n",
+    "        \"build_image\": True,\n",
     "        \"source\": \"git://github.com/mlrun/demo-call-center.git#main\",\n",
-    "        \"default_image\": \"yonishelach/call-center-transformers\",\n",
     "        \"gpus\": 1 if run_with_gpu else 0 ,\n",
     "    },\n",
     ")"
diff --git a/project_setup.py b/project_setup.py
index 176848f..7454eb0 100644
--- a/project_setup.py
+++ b/project_setup.py
@@ -37,21 +37,25 @@ def setup(
 
     # Unpack parameters:
     source = project.get_param(key="source")
-    default_image = project.get_param(key="default_image")
+    default_image = project.get_param(key="default_image", default=None)
+    build_image = project.get_param(key="build_image", default=False)
     gpus = project.get_param(key="gpus", default=0)
     node_name = project.get_param(key="node_name", default=None)
+    node_selector = project.get_param(key="node_selector", default={"alpha.eksctl.io/nodegroup-name": "added-t4"})
 
     # Set the project git source:
     if source:
         print(f"Project Source: {source}")
         project.set_source(source=source, pull_at_runtime=True)
 
-    # Set or build the default image:
-    if default_image is None:
+    # Set default image:
+    if default_image:
+        project.set_default_image(default_image)
+
+    # Build the image:
+    if build_image:
         print("Building default image for the demo:")
         _build_image(project=project)
-    else:
-        project.set_default_image(default_image)
 
     # Set the secrets:
     _set_secrets(
@@ -65,8 +69,8 @@ def setup(
     mlrun.get_run_db().get_hub_catalog(source_name="default", force_refresh=True)
 
     # Set the functions:
-    _set_calls_generation_functions(project=project, gpus=gpus, node_name=node_name)
-    _set_calls_analysis_functions(project=project, gpus=gpus, node_name=node_name)
+    _set_calls_generation_functions(project=project, gpus=gpus, node_name=node_name, node_selector=node_selector)
+    _set_calls_analysis_functions(project=project, gpus=gpus, node_name=node_name, node_selector=node_selector)
 
     # Set the workflows:
     _set_workflows(project=project)
@@ -84,21 +88,19 @@ def _build_image(project: mlrun.projects.MlrunProject):
         base_image="mlrun/mlrun-gpu",
         commands=[
             # Update apt-get to install ffmpeg (support audio file formats):
-            "apt-get update -y",
-            "apt-get install ffmpeg -y",
+            "apt-get update -y && apt-get install ffmpeg -y",
             # Install demo requirements:
-            "pip install tqdm mpi4py",
-            "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118",
-            "pip install pyannote.audio faster-whisper bitsandbytes transformers accelerate datasets peft optimum",
-            "pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/",
-            "pip install langchain openai",
+            "pip install transformers==4.44.1",
+            "pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118",
+            "pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2",
+            "pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/",
+            "pip install langchain==0.0.327 openai==0.28.1",
             "pip install git+https://github.com/suno-ai/bark.git",  # suno-bark
-            "pip install streamlit st-annotated-text spacy librosa presidio-anonymizer presidio-analyzer nltk flair",
+            "pip install streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0",
+            "python -m spacy download en_core_web_lg",
             "pip install -U SQLAlchemy",
-            "pip uninstall -y onnxruntime-gpu",
-            "pip uninstall -y onnxruntime",
+            "pip uninstall -y onnxruntime-gpu onnxruntime",
             "pip install onnxruntime-gpu",
-            "python -m spacy download en_core_web_lg",
         ],
         set_as_default=True,
     )
@@ -129,6 +131,7 @@ def _set_function(
         node_name: str = None,
         with_repo: bool = None,
         image: str = None,
+        node_selector: dict = None,
 ):
     # Set the given function:
     if with_repo is None:
@@ -139,7 +142,7 @@ def _set_function(
 
     # Configure GPUs according to the given kind:
     if gpus >= 1:
-        mlrun_function.with_node_selection(node_selector={"alpha.eksctl.io/nodegroup-name": "added-t4"})
+        mlrun_function.with_node_selection(node_selector=node_selector)
         if kind == "mpijob":
             # 1 GPU for each rank:
             mlrun_function.with_limits(gpus=1)
@@ -157,7 +160,8 @@ def _set_function(
 def _set_calls_generation_functions(
     project: mlrun.projects.MlrunProject,
     gpus: int,
-    node_name: str = None
+    node_name: str = None,
+    node_selector: dict = None,
 ):
     # Client and agent data generator
     _set_function(
@@ -166,6 +170,7 @@ def _set_calls_generation_functions(
         name="structured-data-generator",
         kind="job",
         node_name=node_name,
+        node_selector=node_selector,
     )
 
     # Conversation generator:
@@ -175,6 +180,7 @@ def _set_calls_generation_functions(
         name="conversations-generator",
         kind="job",
         node_name=node_name,
+        node_selector=node_selector,
     )
 
     # Text to audio generator:
@@ -184,13 +190,15 @@ def _set_calls_generation_functions(
         name="text-to-audio-generator",
         kind="job",  # TODO: MPI once MLRun supports it out of the box
         gpus=gpus,
+        node_selector=node_selector,
     )
 
 
 def _set_calls_analysis_functions(
     project: mlrun.projects.MlrunProject,
     gpus: int,
-    node_name: str = None
+    node_name: str = None,
+    node_selector: dict = None,
 ):
     # DB management:
     _set_function(
@@ -199,6 +207,7 @@ def _set_calls_analysis_functions(
         name="db-management",
         kind="job",
         node_name=node_name,
+        node_selector=node_selector,
     )
 
     # Speech diarization:
@@ -208,6 +217,7 @@ def _set_calls_analysis_functions(
         name="silero-vad",
         kind="job",
         node_name=node_name,
+        node_selector=node_selector,
     )
 
     # Transcription:
@@ -218,6 +228,7 @@ def _set_calls_analysis_functions(
         kind="mpijob" if gpus > 1 else "job",
         gpus=gpus,
         node_name=node_name,
+        node_selector=node_selector,
     )
 
     # PII recognition:
@@ -228,6 +239,7 @@ def _set_calls_analysis_functions(
         kind="job",
         node_name=node_name,
         image="guyliguazio/call-center-11.8:1.4.1.6",
+        node_selector=node_selector,
     )
 
     # Question answering:
@@ -238,6 +250,7 @@ def _set_calls_analysis_functions(
         kind="job",
         gpus=gpus,
         node_name=node_name,
+        node_selector=node_selector,
     )
 
     # Postprocessing:
@@ -248,6 +261,7 @@ def _set_calls_analysis_functions(
         with_repo=False,
         kind="job",
         node_name=node_name,
+        node_selector=node_selector,
     )
 
 

From c05843d596075ff9ecb3be8c36895b5ebfcdbfa0 Mon Sep 17 00:00:00 2001
From: yonishelach <yonatanshelach@gmail.com>
Date: Thu, 21 Nov 2024 10:12:22 +0200
Subject: [PATCH 2/3] remove node_selector parameter on-non gpu functions

---
 project_setup.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/project_setup.py b/project_setup.py
index 7454eb0..f1670be 100644
--- a/project_setup.py
+++ b/project_setup.py
@@ -170,7 +170,6 @@ def _set_calls_generation_functions(
         name="structured-data-generator",
         kind="job",
         node_name=node_name,
-        node_selector=node_selector,
     )
 
     # Conversation generator:
@@ -180,7 +179,6 @@ def _set_calls_generation_functions(
         name="conversations-generator",
         kind="job",
         node_name=node_name,
-        node_selector=node_selector,
     )
 
     # Text to audio generator:
@@ -207,7 +205,6 @@ def _set_calls_analysis_functions(
         name="db-management",
         kind="job",
         node_name=node_name,
-        node_selector=node_selector,
     )
 
     # Speech diarization:
@@ -217,7 +214,6 @@ def _set_calls_analysis_functions(
         name="silero-vad",
         kind="job",
         node_name=node_name,
-        node_selector=node_selector,
     )
 
     # Transcription:
@@ -239,7 +235,6 @@ def _set_calls_analysis_functions(
         kind="job",
         node_name=node_name,
         image="guyliguazio/call-center-11.8:1.4.1.6",
-        node_selector=node_selector,
     )
 
     # Question answering:
@@ -261,7 +256,6 @@ def _set_calls_analysis_functions(
         with_repo=False,
         kind="job",
         node_name=node_name,
-        node_selector=node_selector,
     )
 
 

From 85763c5265feaccb4f7a29ce7b198e3b0e5b1ad3 Mon Sep 17 00:00:00 2001
From: yonishelach <yonatanshelach@gmail.com>
Date: Thu, 21 Nov 2024 10:51:33 +0200
Subject: [PATCH 3/3] remove pii image

---
 project_setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/project_setup.py b/project_setup.py
index f1670be..0605734 100644
--- a/project_setup.py
+++ b/project_setup.py
@@ -234,7 +234,6 @@ def _set_calls_analysis_functions(
         name="pii-recognition",
         kind="job",
         node_name=node_name,
-        image="guyliguazio/call-center-11.8:1.4.1.6",
     )
 
     # Question answering: