diff --git a/README.md b/README.md index bd8e1ed..b2427ae 100644 --- a/README.md +++ b/README.md @@ -225,6 +225,21 @@ with open("youtube_video_output.json", "w", encoding="utf-8") as f: json.dump(r_json, f, indent=4, ensure_ascii=False) ``` +## Running Local Models + +To run the API with local models, you need to mount a volume to the container or +include the models in the image. You then need to modify the `.env` file to point to the local model, +as shown below: + +``` +WHISPER_MODEL="/app/models/custom" +``` + +Note that if you're using the `tensorrt_llm` whisper engine, and these are not located in the +container, the default directory these models will be saved to is `/app/src/wordcab_transcribe/whisper_models`. +If you're saving/mounting models to this directory, be sure to see the supported models in the `.env` file, +so your self-hosted model does not conflict with the default model names. + ## 🚀 Contributing ### Getting started diff --git a/pyproject.toml b/pyproject.toml index 3cb475b..a96b793 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,11 +27,11 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ + "tensorrt_llm==0.9.0.dev2024032600", "Cython==3.0.10", "youtokentome @ git+https://github.com/gburlet/YouTokenToMe.git@dependencies", "deepmultilingualpunctuation==1.0.1", "nemo_toolkit[asr]==1.23.0", - "tensorrt_llm==0.9.0.dev2024032600", "aiohttp==3.9.3", "aiofiles==23.2.1", "boto3", diff --git a/src/wordcab_transcribe/services/longform_diarization/diarize_service.py b/src/wordcab_transcribe/services/longform_diarization/diarize_service.py index 1a28bd6..5df80ae 100644 --- a/src/wordcab_transcribe/services/longform_diarization/diarize_service.py +++ b/src/wordcab_transcribe/services/longform_diarization/diarize_service.py @@ -124,7 +124,7 @@ def __call__( str(processed_audio_filepath), num_speakers=oracle_num_speakers, out_dir=temp_dir, - num_workers=1, + num_workers=0, ) segments = self.convert_annotation_to_segments(annotation)