Merge pull request #290 from Wordcab/285-try-whispers2t-as-potential-…

…backend TensorRT-LLM backend based on WhisperS2T
Wordcab · Apr 1, 2024 · 419278c · 419278c
2 parents fa83363 + 35ee65c
commit 419278c
Show file tree

Hide file tree

Showing 92 changed files with 54,837 additions and 501 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,6 +1,7 @@
 __pycache__
 .coverage
 .darglint
+.env_dev
 .flake8
 .gitignore
 .nox
@@ -17,5 +18,4 @@ noxfile.py
 test.py
 tests
 whisper_model
-whisper_model_he
 wordcab_transcribe/**/__pycache__
diff --git a/.env b/.env
@@ -8,7 +8,7 @@
 # The name of the project, used for API documentation.
 PROJECT_NAME="Wordcab Transcribe"
 # The version of the project, used for API documentation.
-VERSION="0.5.1"
+VERSION="0.5.3"
 # The description of the project, used for API documentation.
 DESCRIPTION="💬 ASR FastAPI server using faster-whisper and Auto-Tuning Spectral Clustering for diarization."
 # This API prefix is used for all endpoints in the API outside of the status and cortex endpoints.
@@ -24,35 +24,56 @@ DEBUG=True
 #
 # Cloud models:
 # The available models are: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, or large-v2
-# You can try different model size, but you should see a trade-off between performance and speed.
+# You can try different model size, but you should see a trade-off between performance and speed. Note that the
+# "distil" whisper models do not support languages other than English.
 #
 # Local models:
 # You can also link a local folder path to use a custom model. If you do so, you should also mount the folder in the
 # docker run command as a volume.
 # e.g. WHISPER_MODEL="/app/models/custom"
 # docker cmd: -v /path/to/custom/model:/app/models/custom
-WHISPER_MODEL="large-v2"
+WHISPER_MODEL="large-v3"
+# You can specify one of two engines, "faster-whisper" or "tensorrt-llm". At the moment, "faster-whisper" is more
+# stable, adjustable, and accurate, while "tensorrt-llm" is faster but less accurate and adjustable.
+WHISPER_ENGINE="tensorrt-llm"
+# The align model is used for aligning timestamps under the "tensorrt-llm" engine. The available options are:
+# "tiny", "small", "base", or "medium".
+ALIGN_MODEL="tiny"
 # The compute_type parameter is used to control the precision of the model. You can choose between:
-# "int8", "int8_float16", "int8_bfloat16", "int16", "float_16", "bfloat16", "float32".
+# "int8", "int8_float16", "int8_bfloat16", "int16", "float_16", "bfloat16", "float32" if you're
+# whisper engine is set to "fast-whisper". If you're using "tensorrt-llm", keep it at "float16".
 # The default value is "float16".
 COMPUTE_TYPE="float16"
 # The extra_languages parameter is used to control the languages that need an extra model to be loaded.
-# You can specify multiple languages separated by a comma. The available languages are: `he` (Hebrew).
+# You can specify multiple languages separated by a comma.
 EXTRA_LANGUAGES=
+# This is used to control the parallelism of the tokenizers, but should be set to False for now.
+TOKENIZERS_PARALLELISM=False
 #
 # --------------------------------------------------- DIARIZATION ---------------------------------------------------- #
 #
+# The diarization_backend parameter is used to control the diarization model used. The available options are:
+# "longform_diarizer" or "default_diarizer". It's suggested to use "default_diarizer" for better stability.
+# The "longform_diarizer" is still being developed.
+DIARIZATION_BACKEND="default_diarizer"
 # In a MSDD (Multiscale Diarization Decoder) model, the diarization model is trained on multiple window lengths.
 # The window_lengths are specified in seconds, and separated by a comma. If not specified, the default value will
 # be "1.5, 1.25, 1.0, 0.75, 0.5".
-WINDOW_LENGTHS="1.5,1.25,1.0,0.75,0.5"
+WINDOW_LENGTHS="2.0,1.5,1.0,0.75,0.5"
 # The shift_lengths are specified in seconds, and separated by a comma. If not specified, the default value will
 # be "0.75, 0.625, 0.5, 0.375, 0.25".
-SHIFT_LENGTHS="0.75,0.625,0.5,0.375,0.25"
+SHIFT_LENGTHS="1.0,0.75,0.625,0.5,0.25"
 # The multiscale_weights are float values separated by a comma. If not specified, the default value will be
 # "1.0, 1.0, 1.0, 1.0, 1.0".
 MULTISCALE_WEIGHTS="1.0,1.0,1.0,1.0,1.0"
 #
+# --------------------------------------------------- POST-PROCESSING------------------------------------------------- #
+#
+# This parameter is used to control the punctuation-based alignment. If set to True, the predicted punctuation
+# will be used to adjust speaker diarization. The default value is True, but note this comes with a performance
+# tradeoff.
+ENABLE_PUNCTUATION_BASED_ALIGNMENT=False
+#
 # ---------------------------------------------- ASR TYPE CONFIGURATION ---------------------------------------------- #
 #
 # The asr_type parameter is used to control the type of ASR used. The available options are: `async` or `live`.
@@ -82,7 +103,7 @@ USERNAME="admin"
 PASSWORD="admin"
 # This openssl_key parameter is used to control the key used to encrypt the access tokens.
 # You should absolutely change this value before deploying the API in production.
-OPENSSL_KEY="0123456789abcdefghijklmnopqrstuvwyz"  # <--- CHANGE ABSOLUTELY THIS VALUE
+OPENSSL_KEY="0123456789abcdefghijklmnopqrstuvwyz"  # <--- CHANGE THIS VALUE
 # This openssl_algorithm parameter is used to control the algorithm used to encrypt the access tokens.
 # You should in most case not change this value.
 OPENSSL_ALGORITHM="HS256"
@@ -107,7 +128,7 @@ SVIX_APP_ID=
 AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
 AWS_STORAGE_BUCKET_NAME=
-AWS_S3_REGION_NAME=
+AWS_REGION_NAME=
 #
 # -------------------------------------------------- REMOTE SERVERS -------------------------------------------------- #
 # The remote servers configuration is used to control the number of servers used to process the requests if you don't

diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@ __pycache__/
 .coverage
 .coverage.*
 .DS_Store
+.env_dev
 .nox/
 .pytest_cache/
 .python-version

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,6 +3,7 @@ repos:
     rev: "v4.3.0"
     hooks:
       - id: check-added-large-files
+        args: [--maxkb=2000]
       - id: check-toml
       - id: check-yaml
 

diff --git a/Dockerfile b/Dockerfile
@@ -1,17 +1,71 @@
 FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 AS runtime
 
+ENV NVIDIA_DRIVER_CAPABILITIES ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}
+
+ENV PYTHONUNBUFFERED=1
+
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get install -y \
-    git \
-    curl \
-    ffmpeg \
+
+RUN apt update && apt install -y \
     libsndfile1 \
     software-properties-common \
-    python3-pip
+    ffmpeg \
+    build-essential \
+    ca-certificates \
+    ccache \
+    cmake \
+    gnupg2 \
+    wget \
+    git \
+    curl \
+    gdb \
+    openmpi-bin \
+    libopenmpi-dev \
+    libffi-dev \
+    libssl-dev \
+    python3-pip \
+    libbz2-dev \
+    python3-dev \
+    liblzma-dev \
+    && rm -rf /var/lib/apt/lists/*
 
-ENV PYTHONUNBUFFERED=1
-RUN python3 -m pip install pip --upgrade \
-    && python3 -m pip install hatch
+RUN apt update && apt install -y \
+    libtiff-tools=4.3.0-6ubuntu0.8 \
+    libtiff5=4.3.0-6ubuntu0.8 \
+    libgnutls30=3.7.3-4ubuntu1.4 \
+    openssl=3.0.2-0ubuntu1.15 \
+    libpam-modules=1.4.0-11ubuntu2.4 \
+    libpam-modules-bin=1.4.0-11ubuntu2.4 \
+    libpam-runtime=1.4.0-11ubuntu2.4 \
+    libpam0g=1.4.0-11ubuntu2.4 \
+    login=1:4.8.1-2ubuntu2.2 \
+    passwd=1:4.8.1-2ubuntu2.2 \
+    uidmap=1:4.8.1-2ubuntu2.2 \
+    binutils=2.38-4ubuntu2.6
+
+RUN cd /tmp && \
+    wget https://www.python.org/ftp/python/3.10.12/Python-3.10.12.tgz && \
+    tar -xvf Python-3.10.12.tgz && \
+    cd Python-3.10.12 && \
+    ./configure --enable-optimizations --with-ssl && \
+    make && make install && \
+    cd .. && rm -r Python-3.10.12 && \
+    ln -s /usr/local/bin/python3 /usr/local/bin/python && \
+    ln -s /usr/local/bin/pip3 /usr/local/bin/pip
+
+RUN export CUDNN_PATH=$(python -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))') && \
+    echo 'export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:'${CUDNN_PATH} >> ~/.bashrc
+
+ENV MPI4PY_VERSION="3.1.5"
+ENV RELEASE_URL="https://github.com/mpi4py/mpi4py/archive/refs/tags/${MPI4PY_VERSION}.tar.gz"
+
+RUN curl -L ${RELEASE_URL} | tar -zx -C /tmp \
+    && sed -i 's/>= 40\\.9\\.0/>= 40.9.0, < 69/g' /tmp/mpi4py-${MPI4PY_VERSION}/pyproject.toml \
+    && pip install /tmp/mpi4py-${MPI4PY_VERSION} \
+    && rm -rf /tmp/mpi4py*
+
+RUN python -m pip install pip --upgrade \
+    && python -m pip install hatch
 
 WORKDIR /app
 COPY . .