Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix build image #12

Merged
merged 3 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
FROM mlrun/mlrun-gpu
FROM mlrun/mlrun-gpu:1.7.0

# Update apt-get to install ffmpeg (support audio file formats):
RUN apt-get update -y
RUN apt-get install ffmpeg -y

# Install demo requirements:
RUN pip install -U mlrun
RUN pip install -U git+https://github.com/huggingface/transformers.git
RUN pip install tqdm mpi4py
RUN pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
RUN pip install pyannote.audio faster-whisper bitsandbytes accelerate datasets peft optimum
RUN pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
RUN pip install langchain openai

RUN pip install transformers==4.44.1
RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
RUN pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2
RUN pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
RUN pip install langchain==0.0.327 openai==0.28.1
RUN pip install git+https://github.com/suno-ai/bark.git
RUN pip install streamlit st-annotated-text spacy librosa presidio-anonymizer presidio-analyzer nltk flair
RUN pip install streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0
RUN python -m spacy download en_core_web_lg
RUN pip install -U SQLAlchemy

# Align onnxruntime to use gpu:
RUN pip uninstall -y onnxruntime-gpu
Expand Down
2 changes: 1 addition & 1 deletion notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@
" name=\"call-center-demo\",\n",
" user_project=True,\n",
" parameters={\n",
" \"build_image\": True,\n",
" \"source\": \"git://github.com/mlrun/demo-call-center.git#main\",\n",
" \"default_image\": \"yonishelach/call-center-transformers\",\n",
" \"gpus\": 1 if run_with_gpu else 0 ,\n",
" },\n",
")"
Expand Down
51 changes: 29 additions & 22 deletions project_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,25 @@ def setup(

# Unpack parameters:
source = project.get_param(key="source")
default_image = project.get_param(key="default_image")
default_image = project.get_param(key="default_image", default=None)
build_image = project.get_param(key="build_image", default=False)
gpus = project.get_param(key="gpus", default=0)
node_name = project.get_param(key="node_name", default=None)
node_selector = project.get_param(key="node_selector", default={"alpha.eksctl.io/nodegroup-name": "added-t4"})

# Set the project git source:
if source:
print(f"Project Source: {source}")
project.set_source(source=source, pull_at_runtime=True)

# Set or build the default image:
if default_image is None:
# Set default image:
if default_image:
project.set_default_image(default_image)

# Build the image:
if build_image:
print("Building default image for the demo:")
_build_image(project=project)
else:
project.set_default_image(default_image)

# Set the secrets:
_set_secrets(
Expand All @@ -65,8 +69,8 @@ def setup(
mlrun.get_run_db().get_hub_catalog(source_name="default", force_refresh=True)

# Set the functions:
_set_calls_generation_functions(project=project, gpus=gpus, node_name=node_name)
_set_calls_analysis_functions(project=project, gpus=gpus, node_name=node_name)
_set_calls_generation_functions(project=project, gpus=gpus, node_name=node_name, node_selector=node_selector)
_set_calls_analysis_functions(project=project, gpus=gpus, node_name=node_name, node_selector=node_selector)

# Set the workflows:
_set_workflows(project=project)
Expand All @@ -84,21 +88,19 @@ def _build_image(project: mlrun.projects.MlrunProject):
base_image="mlrun/mlrun-gpu",
commands=[
# Update apt-get to install ffmpeg (support audio file formats):
"apt-get update -y",
"apt-get install ffmpeg -y",
"apt-get update -y && apt-get install ffmpeg -y",
# Install demo requirements:
"pip install tqdm mpi4py",
"pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118",
"pip install pyannote.audio faster-whisper bitsandbytes transformers accelerate datasets peft optimum",
"pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/",
"pip install langchain openai",
"pip install transformers==4.44.1",
"pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118",
"pip install bitsandbytes==0.41.1 accelerate==0.24.1 datasets==2.14.6 peft==0.5.0 optimum==1.13.2",
"pip install auto-gptq==0.4.2 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/",
"pip install langchain==0.0.327 openai==0.28.1",
"pip install git+https://github.com/suno-ai/bark.git", # suno-bark
"pip install streamlit st-annotated-text spacy librosa presidio-anonymizer presidio-analyzer nltk flair",
"pip install streamlit==1.28.0 st-annotated-text==4.0.1 spacy==3.7.2 librosa==0.10.1 presidio-anonymizer==2.2.34 presidio-analyzer==2.2.34 nltk==3.8.1 flair==0.13.0",
"python -m spacy download en_core_web_lg",
"pip install -U SQLAlchemy",
"pip uninstall -y onnxruntime-gpu",
"pip uninstall -y onnxruntime",
"pip uninstall -y onnxruntime-gpu onnxruntime",
"pip install onnxruntime-gpu",
"python -m spacy download en_core_web_lg",
],
set_as_default=True,
)
Expand Down Expand Up @@ -129,6 +131,7 @@ def _set_function(
node_name: str = None,
with_repo: bool = None,
image: str = None,
node_selector: dict = None,
):
# Set the given function:
if with_repo is None:
Expand All @@ -139,7 +142,7 @@ def _set_function(

# Configure GPUs according to the given kind:
if gpus >= 1:
mlrun_function.with_node_selection(node_selector={"alpha.eksctl.io/nodegroup-name": "added-t4"})
mlrun_function.with_node_selection(node_selector=node_selector)
if kind == "mpijob":
# 1 GPU for each rank:
mlrun_function.with_limits(gpus=1)
Expand All @@ -157,7 +160,8 @@ def _set_function(
def _set_calls_generation_functions(
project: mlrun.projects.MlrunProject,
gpus: int,
node_name: str = None
node_name: str = None,
node_selector: dict = None,
):
# Client and agent data generator
_set_function(
Expand All @@ -184,13 +188,15 @@ def _set_calls_generation_functions(
name="text-to-audio-generator",
kind="job", # TODO: MPI once MLRun supports it out of the box
gpus=gpus,
node_selector=node_selector,
)


def _set_calls_analysis_functions(
project: mlrun.projects.MlrunProject,
gpus: int,
node_name: str = None
node_name: str = None,
node_selector: dict = None,
):
# DB management:
_set_function(
Expand Down Expand Up @@ -218,6 +224,7 @@ def _set_calls_analysis_functions(
kind="mpijob" if gpus > 1 else "job",
gpus=gpus,
node_name=node_name,
node_selector=node_selector,
)

# PII recognition:
Expand All @@ -227,7 +234,6 @@ def _set_calls_analysis_functions(
name="pii-recognition",
kind="job",
node_name=node_name,
image="guyliguazio/call-center-11.8:1.4.1.6",
)

# Question answering:
Expand All @@ -238,6 +244,7 @@ def _set_calls_analysis_functions(
kind="job",
gpus=gpus,
node_name=node_name,
node_selector=node_selector,
)

# Postprocessing:
Expand Down