diff --git a/README.md b/README.md
index 09009fe..10f860f 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,9 @@
# LinTO-STT
-LinTO-STT is the transcription service within the [LinTO stack](https://github.com/linto-ai/linto-platform-stack),
-which can currently work with Speech-To-Text (STT) models.
+LinTO-STT is an API for Automatic Speech Recognition (ASR).
+
+LinTO-STT can either be used as a standalone transcription service or deployed within a micro-services infrastructure using a message broker connector.
+
The following families of STT models are currently supported (please refer to respective documentation for more details):
* [Kaldi models](kaldi/README.md)
* [Whisper models](whisper/README.md)
diff --git a/kaldi/Dockerfile b/kaldi/Dockerfile
index f062951..a28632e 100644
--- a/kaldi/Dockerfile
+++ b/kaldi/Dockerfile
@@ -1,5 +1,5 @@
FROM python:3.9
-LABEL maintainer="irebai@linagora.com, rbaraglia@linagora.com"
+LABEL maintainer="contact@linto.ai, jlouradour@linagora.com, dgaynullin@linagora.com"
ARG KALDI_MKL
diff --git a/kaldi/README.md b/kaldi/README.md
index 0e3a31a..7ebfa85 100644
--- a/kaldi/README.md
+++ b/kaldi/README.md
@@ -1,7 +1,6 @@
# LinTO-STT-Kaldi
-LinTO-STT-Kaldi is the transcription service within the [LinTO stack](https://github.com/linto-ai/linto-platform-stack)
-based on Speech-To-Text (STT) models trained with [Kaldi](https://github.com/kaldi-asr/kaldi).
+LinTO-STT-Kaldi is an API for Automatic Speech Recognition (ASR) based on models trained with [Kaldi](https://github.com/kaldi-asr/kaldi).
LinTO-STT-Kaldi can either be used as a standalone transcription service or deployed within a micro-services infrastructure using a message broker connector.
diff --git a/kaldi/requirements.txt b/kaldi/requirements.txt
index 867a095..5eec3f4 100644
--- a/kaldi/requirements.txt
+++ b/kaldi/requirements.txt
@@ -2,7 +2,7 @@ celery[redis,auth,msgpack]>=4.4.7
numpy>=1.18.5
flask>=1.1.2
flask-cors>=3.0.10
-flask-swagger-ui>=3.36.0
+flask-swagger-ui==3.36.0
flask-sock
gevent
gunicorn
diff --git a/wait-for-it.sh b/wait-for-it.sh
index 92cbdbb..f6f20d1 100755
--- a/wait-for-it.sh
+++ b/wait-for-it.sh
@@ -67,6 +67,8 @@ wait_for_wrapper()
return $WAITFORIT_RESULT
}
+echo "NOCOMMIT wait-for-it $*"
+
# process arguments
while [[ $# -gt 0 ]]
do
@@ -173,7 +175,7 @@ fi
if [[ $WAITFORIT_CLI != "" ]]; then
if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
- echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
+ echoerr "$WAITFORIT_cmdname returns $WAITFORIT_CLI: strict mode, refusing to execute subprocess"
exit $WAITFORIT_RESULT
fi
exec "${WAITFORIT_CLI[@]}"
diff --git a/whisper/.envdefault b/whisper/.envdefault
index 88c27ea..75919c0 100644
--- a/whisper/.envdefault
+++ b/whisper/.envdefault
@@ -13,13 +13,18 @@ BROKER_PASS=
# STT MODELING PARAMETERS
############################################
-# The model can be a path to a model, or a model name ("tiny", "base", "small", "medium", "large-v1", "large-v2" or "large-v3")
-MODEL=medium
+# The model can be a path to a model (e.g. "/root/.cache/whisper/large-v3.pt", "/root/.cache/huggingface/hub/models--openai--whisper-large-v3"),
+# or a model size ("tiny", "base", "small", "medium", "large-v1", "large-v2" or "large-v3")
+# or a HuggingFace model name (e.g. "distil-whisper/distil-large-v2")
+MODEL=large-v3
# The language can be in different formats: "en", "en-US", "English", ...
# If not set or set to "*", the language will be detected automatically.
LANGUAGE=*
+# Prompt to use for the model. This can be used to provide context to the model, to encourage disfluencies or a special behaviour regarding punctuation and capitalization.
+PROMPT=
+
# An alignment wav2vec model can be used to get word timestamps.
# It can be a path to a model, a language code (fr, en, ...), or "wav2vec" to automatically chose a model for the language
# This option is experimental (and not implemented with ctranslate2).
@@ -30,7 +35,9 @@ LANGUAGE=*
############################################
# Device to use. It can be "cuda" to force/check GPU, "cpu" to force computation on CPU, or a specific GPU ("cuda:0", "cuda:1", ...)
-# DEVICE=cuda:0
+# DEVICE=cuda
+# CUDA_DEVICE_ORDER=PCI_BUS_ID
+# CUDA_VISIBLE_DEVICES=0
# Number of threads per worker when running on CPU
OMP_NUM_THREADS=4
diff --git a/whisper/Dockerfile.ctranslate2 b/whisper/Dockerfile.ctranslate2
index 52fbc44..ed19116 100644
--- a/whisper/Dockerfile.ctranslate2
+++ b/whisper/Dockerfile.ctranslate2
@@ -1,5 +1,5 @@
FROM ghcr.io/opennmt/ctranslate2:latest-ubuntu20.04-cuda11.2
-LABEL maintainer="jlouradour@linagora.com"
+LABEL maintainer="contact@linto.ai, jlouradour@linagora.com, dgaynullin@linagora.com"
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg git
diff --git a/whisper/Dockerfile.ctranslate2.cpu b/whisper/Dockerfile.ctranslate2.cpu
index c8d6972..df5eac7 100644
--- a/whisper/Dockerfile.ctranslate2.cpu
+++ b/whisper/Dockerfile.ctranslate2.cpu
@@ -1,5 +1,5 @@
FROM python:3.9
-LABEL maintainer="jlouradour@linagora.com"
+LABEL maintainer="contact@linto.ai, jlouradour@linagora.com, dgaynullin@linagora.com"
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg git
diff --git a/whisper/Dockerfile.torch b/whisper/Dockerfile.torch
index 2f3a0d0..06b22f3 100644
--- a/whisper/Dockerfile.torch
+++ b/whisper/Dockerfile.torch
@@ -1,5 +1,5 @@
FROM python:3.9
-LABEL maintainer="jlouradour@linagora.com"
+LABEL maintainer="contact@linto.ai, jlouradour@linagora.com, dgaynullin@linagora.com"
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg
diff --git a/whisper/Dockerfile.torch.cpu b/whisper/Dockerfile.torch.cpu
index e9198d5..17a3fb8 100644
--- a/whisper/Dockerfile.torch.cpu
+++ b/whisper/Dockerfile.torch.cpu
@@ -1,5 +1,5 @@
FROM python:3.9
-LABEL maintainer="jlouradour@linagora.com"
+LABEL maintainer="contact@linto.ai, jlouradour@linagora.com, dgaynullin@linagora.com"
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg
diff --git a/whisper/README.md b/whisper/README.md
index 20a3c7d..41dc46a 100644
--- a/whisper/README.md
+++ b/whisper/README.md
@@ -1,17 +1,71 @@
# LinTO-STT-Whisper
-LinTO-STT-Whisper is the transcription service within the [LinTO stack](https://github.com/linto-ai/linto-platform-stack)
-based on Speech-To-Text (STT) [Whisper models](https://openai.com/research/whisper).
+LinTO-STT-Whisper is an API for Automatic Speech Recognition (ASR) based on [Whisper models](https://openai.com/research/whisper).
LinTO-STT-Whisper can either be used as a standalone transcription service or deployed within a micro-services infrastructure using a message broker connector.
## Pre-requisites
+### Requirements
+
+The transcription service requires [docker](https://www.docker.com/products/docker-desktop/) up and running.
+
+For GPU capabilities, it is also needed to install
+[nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
+
### Hardware
+
To run the transcription models you'll need:
-* At least 8Go of disk space to build the docker image.
+* At least 8GB of disk space to build the docker image
+ and models can occupy several GB of disk space depending on the model size (it can be up to 5GB).
* Up to 7GB of RAM depending on the model used.
-* One CPU per worker. Inference time scales on CPU performances.
+* One CPU per worker. Inference time scales on CPU performances.
+
+On GPU, approximate VRAM peak usage are indicated in the following table
+for some model sizes, depending on the backend
+(note that the lowest precision supported by the GPU card is automatically chosen when loading the model).
+
+
+ Model size |
+ Backend and precision |
+
+
+ [ct2/faster_whisper](whisper/Dockerfile.ctranslate2) |
+ [torch/whisper_timestamped](whisper/Dockerfile.torch) |
+
+
+ int8 |
+ float16 |
+ float32 |
+ float32 |
+
+
+ tiny |
+ 1.5G |
+ 1.5G |
+
+
+
+ distil-whisper/distil-large-v2 |
+ 2.2G |
+ 3.2G |
+ 4.8G |
+ 4.4G |
+
+
+ large (large-v3, ...) |
+ 2.8G |
+ 4.8G |
+ 8.2G |
+ 10.4G |
+
+
### Model(s)
@@ -23,8 +77,8 @@ and can occupy several GB of disk space.
LinTO-STT-Whisper has also the option to work with a wav2vec model to perform word alignment.
The wav2vec model can be specified either
-* (TorchAudio) with a string corresponding to a `torchaudio` pipeline (e.g. "WAV2VEC2_ASR_BASE_960H") or
-* (HuggingFace's Transformers) with a string corresponding to a HuggingFace repository of a wav2vec model (e.g. "jonatasgrosman/wav2vec2-large-xlsr-53-english"), or
+* (TorchAudio) with a string corresponding to a `torchaudio` pipeline (e.g. `WAV2VEC2_ASR_BASE_960H`) or
+* (HuggingFace's Transformers) with a string corresponding to a HuggingFace repository of a wav2vec model (e.g. `jonatasgrosman/wav2vec2-large-xlsr-53-english`), or
* (SpeechBrain) with a path corresponding to a folder with a SpeechBrain model
Default wav2vec models are provided for French (fr), English (en), Spanish (es), German (de), Dutch (nl), Japanese (ja), Chinese (zh).
@@ -32,8 +86,6 @@ Default wav2vec models are provided for French (fr), English (en), Spanish (es),
But we advise not to use a companion wav2vec alignment model.
This is not needed neither tested anymore.
-### Docker
-The transcription service requires docker up and running.
### (micro-service) Service broker and shared folder
The STT only entry point in task mode are tasks posted on a message broker. Supported message broker are RabbitMQ, Redis, Amazon SQS.
@@ -63,14 +115,16 @@ cp whisper/.envdefault whisper/.env
| PARAMETER | DESCRIPTION | EXEMPLE |
|---|---|---|
| SERVICE_MODE | STT serving mode see [Serving mode](#serving-mode) | `http` \| `task` |
-| MODEL | Path to a Whisper model, type of Whisper model used, or HuggingFace identifier of a Whisper model. | \ \| `large-v3` \| `distil-whisper/distil-large-v2` \| ... |
+| MODEL | Path to a Whisper model, type of Whisper model used, or HuggingFace identifier of a Whisper model. | `large-v3` \| `distil-whisper/distil-large-v2` \| \ \| ... |
| LANGUAGE | (Optional) Language to recognize | `*` \| `fr` \| `fr-FR` \| `French` \| `en` \| `en-US` \| `English` \| ... |
| PROMPT | (Optional) Prompt to use for the Whisper model | `some free text to encourage a certain transcription style (disfluencies, no punctuation, ...)` |
-| ALIGNMENT_MODEL | (Optional) Path to the wav2vec model for word alignment, or name of HuggingFace repository or torchaudio pipeline | \ \| `WAV2VEC2_ASR_BASE_960H` \| `jonatasgrosman/wav2vec2-large-xlsr-53-english` \| ... |
-| CONCURRENCY | Maximum number of parallel requests | `3` |
+| ALIGNMENT_MODEL | (Optional and deprecated) Path to the wav2vec model for word alignment, or name of HuggingFace repository or torchaudio pipeline | `WAV2VEC2_ASR_BASE_960H` \| `jonatasgrosman/wav2vec2-large-xlsr-53-english` \| \ \| ... |
+| DEVICE | (Optional) Device to use for the model | `cpu` \| `cuda` ... |
+| CUDA_VISIBLE_DEVICES | (Optional) GPU device index to use, if several. We also recommend to set `CUDA_DEVICE_ORDER=PCI_BUS_ID` on multi-GPU machines | `0` \| `1` \| `2` \| ... |
+| CONCURRENCY | Maximum number of parallel requests | `2` |
| SERVICE_NAME | (For the task mode) queue's name for task processing | `my-stt` |
| SERVICE_BROKER | (For the task mode) URL of the message broker | `redis://my-broker:6379` |
-| BROKER_PASS | (For the task mode only) broker password | `my-password` |
+| BROKER_PASS | (For the task mode only) broker password | `my-password` \| (empty) |
#### MODEL environment variable
@@ -79,7 +133,7 @@ The model will be (downloaded if required and) loaded in memory when calling the
When using a Whisper model from Hugging Face (transformers) along with ctranslate2 (faster_whisper),
it will also download torch library to make the conversion from torch to ctranslate2.
-If you want to preload the model (and later specify a path `ASR_PATH` as `MODEL`),
+If you want to preload the model (and later specify a path `` as `MODEL`),
you may want to download one of OpenAI Whisper models:
* Mutli-lingual Whisper models can be downloaded with the following links:
* [tiny](https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt)
@@ -144,26 +198,28 @@ The SERVICE_MODE value in the .env should be set to ```http```.
```bash
docker run --rm \
-p HOST_SERVING_PORT:80 \
--v ASR_PATH:/opt/model.pt \
--env-file whisper/.env \
linto-stt-whisper:latest
```
This will run a container providing an [HTTP API](#http-api) binded on the host HOST_SERVING_PORT port.
-You may also want to mount your cache folder CACHE_PATH (e.g. "~/.cache") ```-v CACHE_PATH:/root/.cache```
-in order to avoid downloading models each time.
-
-Also if you want to specifiy a custom alignment model already downloaded in a folder WAV2VEC_PATH,
-you can add option ```-v WAV2VEC_PATH:/opt/wav2vec``` and environment variable ```ALIGNMENT_MODEL=/opt/wav2vec```.
+You may also want to add specific options:
+* To enable GPU capabilities, add ```--gpus all```.
+ Note that you can use environment variable `DEVICE=cuda` to make sure GPU is used (and maybe set `CUDA_VISIBLE_DEVICES` if there are several available GPU cards).
+* To mount a local cache folder `` (e.g. "`$HOME/.cache`") and avoid downloading models each time,
+ use ```-v :/root/.cache```
+ If you use `MODEL=/opt/model.pt` environment variable, you may want to mount the model file (or folder) with option ```-v :/opt/model.pt```.
+* If you want to specifiy a custom alignment model already downloaded in a folder ``,
+ you can add option ```-v :/opt/wav2vec``` and environment variable ```ALIGNMENT_MODEL=/opt/wav2vec```.
**Parameters:**
| Variables | Description | Example |
|:-|:-|:-|
-| HOST_SERVING_PORT | Host serving port | 8080 |
-| ASR_PATH | Path to the Whisper model on the host machine mounted to /opt/model.pt | /my/path/to/models/medium.pt |
-| CACHE_PATH | (Optional) Path to a folder to download wav2vec alignment models when relevant | /home/username/.cache |
-| WAV2VEC_PATH | (Optional) Path to a folder to a custom wav2vec alignment model | /my/path/to/models/wav2vec |
+| `HOST_SERVING_PORT` | Host serving port | 8080 |
+| `` | (Optional) Path to a folder to download wav2vec alignment models when relevant | /home/username/.cache |
+| `` | Path to the Whisper model on the host machine mounted to /opt/model.pt | /my/path/to/models/medium.pt |
+| `` | (Optional) Path to a folder to a custom wav2vec alignment model | /my/path/to/models/wav2vec |
### Micro-service within LinTO-Platform stack
The TASK serving mode connect a celery worker to a message broker.
@@ -174,25 +230,27 @@ You need a message broker up and running at MY_SERVICE_BROKER.
```bash
docker run --rm \
--v ASR_PATH:/opt/model.pt \
-v SHARED_AUDIO_FOLDER:/opt/audio \
--env-file whisper/.env \
linto-stt-whisper:latest
```
-You may also want to mount your cache folder CACHE_PATH (e.g. "~/.cache") ```-v CACHE_PATH:/root/.cache```
-in order to avoid downloading models each time.
-
-Also if you want to specifiy a custom alignment model already downloaded in a folder WAV2VEC_PATH,
-you can add option ```-v WAV2VEC_PATH:/opt/wav2vec``` and environment variable ```ALIGNMENT_MODEL=/opt/wav2vec```.
+You may also want to add specific options:
+* To enable GPU capabilities, add ```--gpus all```.
+ Note that you can use environment variable `DEVICE=cuda` to make sure GPU is used (and maybe set `CUDA_VISIBLE_DEVICES` if there are several available GPU cards).
+* To mount a local cache folder `` (e.g. "`$HOME/.cache`") and avoid downloading models each time,
+ use ```-v :/root/.cache```
+ If you use `MODEL=/opt/model.pt` environment variable, you may want to mount the model file (or folder) with option ```-v :/opt/model.pt```.
+* If you want to specifiy a custom alignment model already downloaded in a folder ``,
+ you can add option ```-v :/opt/wav2vec``` and environment variable ```ALIGNMENT_MODEL=/opt/wav2vec```.
**Parameters:**
| Variables | Description | Example |
|:-|:-|:-|
-| SHARED_AUDIO_FOLDER | Shared audio folder mounted to /opt/audio | /my/path/to/models/vosk-model |
-| ASR_PATH | Path to the Whisper model on the host machine mounted to /opt/model.pt | /my/path/to/models/medium.pt |
-| CACHE_PATH | (Optional) Path to a folder to download wav2vec alignment models when relevant | /home/username/.cache |
-| WAV2VEC_PATH | (Optional) Path to a folder to a custom wav2vec alignment model | /my/path/to/models/wav2vec |
+| `` | Shared audio folder mounted to /opt/audio | /my/path/to/models/vosk-model |
+| `` | (Optional) Path to a folder to download wav2vec alignment models when relevant | /home/username/.cache |
+| `` | Path to the Whisper model on the host machine mounted to /opt/model.pt | /my/path/to/models/medium.pt |
+| `` | (Optional) Path to a folder to a custom wav2vec alignment model | /my/path/to/models/wav2vec |
## Usages
@@ -274,9 +332,10 @@ This project is developped under the AGPLv3 License (see LICENSE).
## Acknowlegment.
-* [Faster Whisper](https://github.com/SYSTRAN/faster-whisper)
-* [OpenAI Whisper](https://github.com/openai/whisper)
* [Ctranslate2](https://github.com/OpenNMT/CTranslate2)
+ * [Faster-Whisper](https://github.com/SYSTRAN/faster-whisper)
+* [OpenAI Whisper](https://github.com/openai/whisper)
+ * [Whisper-Timestamped](https://github.com/linto-ai/whisper-timestamped)
+* [HuggingFace Transformers](https://github.com/huggingface/transformers)
* [SpeechBrain](https://github.com/speechbrain/speechbrain)
* [TorchAudio](https://github.com/pytorch/audio)
-* [HuggingFace Transformers](https://github.com/huggingface/transformers)
\ No newline at end of file
diff --git a/whisper/RELEASE.md b/whisper/RELEASE.md
index 2967139..4d46f19 100644
--- a/whisper/RELEASE.md
+++ b/whisper/RELEASE.md
@@ -1,3 +1,7 @@
+# 1.0.1
+- support of model.safetensors
+- ct2/faster_whisper: Information about used precision added in the logs
+
# 1.0.0
- First build of linto-stt-whisper
- Based on 4.0.5 of linto-stt https://github.com/linto-ai/linto-stt/blob/a54b7b7ac2bc491a1795bb6dfb318a39c8b76d63/RELEASE.md
diff --git a/whisper/requirements.ctranslate2.txt b/whisper/requirements.ctranslate2.txt
index 2ddc118..530dcff 100644
--- a/whisper/requirements.ctranslate2.txt
+++ b/whisper/requirements.ctranslate2.txt
@@ -2,7 +2,7 @@ celery[redis,auth,msgpack]>=4.4.7
flask>=1.1.2
flask-cors>=3.0.10
flask-sock
-flask-swagger-ui>=3.36.0
+flask-swagger-ui==3.36.0
gevent
gunicorn
lockfile
diff --git a/whisper/requirements.torch.txt b/whisper/requirements.torch.txt
index 75e747c..3976414 100644
--- a/whisper/requirements.torch.txt
+++ b/whisper/requirements.torch.txt
@@ -2,7 +2,7 @@ celery[redis,auth,msgpack]>=4.4.7
flask>=1.1.2
flask-cors>=3.0.10
flask-sock
-flask-swagger-ui>=3.36.0
+flask-swagger-ui==3.36.0
gevent
gunicorn
lockfile
@@ -13,7 +13,6 @@ speechbrain
transformers
wavio>=0.0.4
websockets
-# openai-whisper
-git+https://github.com/linto-ai/whisper-timestamped.git
+whisper-timestamped
onnxruntime
torchaudio
\ No newline at end of file
diff --git a/whisper/stt/processing/load_model.py b/whisper/stt/processing/load_model.py
index b87a414..c3f1e88 100644
--- a/whisper/stt/processing/load_model.py
+++ b/whisper/stt/processing/load_model.py
@@ -65,20 +65,41 @@ def load_whisper_model(model_type_or_file, device="cpu", download_root=None):
)
logger.info(f"CTranslate2 model in {output_dir}")
if not os.path.isdir(output_dir):
- import huggingface_hub
+ from transformers.utils import cached_file
+ import json
+ kwargs = dict(cache_dir=download_root, use_auth_token=None, revision=None)
delete_hf_path = False
if not os.path.isdir(model_type_or_file):
- hf_path = huggingface_hub.hf_hub_download(
- repo_id=model_type_or_file, filename="pytorch_model.bin"
- )
+ model_path = None
+ hf_path = None
+ for candidate in ["pytorch_model.bin", "model.safetensors", "whisper.ckpt", "pytorch_model.bin.index.json", "model.safetensors.index.json"]:
+ try:
+ hf_path = model_path = cached_file(model_type_or_file, candidate, **kwargs)
+ except OSError:
+ continue
+ if candidate.endswith("index.json"):
+ index_file = model_path
+ mapping = json.load(open(index_file))
+ assert "weight_map" in mapping
+ assert isinstance(mapping["weight_map"], dict)
+ model_path = list(set(mapping["weight_map"].values()))
+ folder = os.path.dirname(index_file)
+ model_path = [os.path.join(folder, p) for p in model_path]
+ break
+ if model_path is None:
+ raise RuntimeError(f"Could not find model {model_type_or_file} from HuggingFace nor local folders.")
hf_path = os.path.dirname(os.path.dirname(os.path.dirname(hf_path)))
-
delete_hf_path = not os.path.exists(hf_path)
else:
- assert os.path.isfile(
- os.path.join(model_type_or_file, "pytorch_model.bin")
- ), f"Could not find pytorch_model.bin in {model_type_or_file}"
+ hf_path = None
+ for candidate in ["pytorch_model.bin", "model.safetensors", "whisper.ckpt", "pytorch_model.bin.index.json", "model.safetensors.index.json"]:
+ model_path = os.path.join(model_type_or_file, candidate)
+ if os.path.exists(model_path):
+ hf_path = model_path
+ break
+ if hf_path is None:
+ raise RuntimeError(f"Could not find pytorch_model.bin in {model_type_or_file}")
check_torch_installed()
@@ -135,6 +156,7 @@ def load_whisper_model(model_type_or_file, device="cpu", download_root=None):
# num_workers=1,
# download_root=os.path.join(download_root, f"huggingface/hub/models--guillaumekln--faster-whisper-{model_type_or_file}"),
)
+ logger.info(f"Whisper model loaded with compute_type={compute_type}. (t={time.time() - start}s)")
break
except ValueError as err:
logger.info(