diff --git a/README.md b/README.md
index bd8e1ed..b2427ae 100644
--- a/README.md
+++ b/README.md
@@ -225,6 +225,21 @@ with open("youtube_video_output.json", "w", encoding="utf-8") as f:
   json.dump(r_json, f, indent=4, ensure_ascii=False)
 ```
 
+## Running Local Models
+
+To run the API with local models, you need to mount a volume to the container or
+include the models in the image. You then need to modify the `.env` file to point to the local model,
+as shown below:
+
+```
+WHISPER_MODEL="/app/models/custom" 
+```
+
+Note that if you're using the `tensorrt_llm` whisper engine, and these are not located in the
+container, the default directory these models will be saved to is `/app/src/wordcab_transcribe/whisper_models`.
+If you're saving/mounting models to this directory, be sure to see the supported models in the `.env` file, 
+so your self-hosted model does not conflict with the default model names.
+
 ## 🚀 Contributing
 
 ### Getting started
diff --git a/pyproject.toml b/pyproject.toml
index 3cb475b..a96b793 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,11 +27,11 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dependencies = [
+  "tensorrt_llm==0.9.0.dev2024032600",
   "Cython==3.0.10",
   "youtokentome @ git+https://github.com/gburlet/YouTokenToMe.git@dependencies",
   "deepmultilingualpunctuation==1.0.1",
   "nemo_toolkit[asr]==1.23.0",
-  "tensorrt_llm==0.9.0.dev2024032600",
   "aiohttp==3.9.3",
   "aiofiles==23.2.1",
   "boto3",
diff --git a/src/wordcab_transcribe/services/longform_diarization/diarize_service.py b/src/wordcab_transcribe/services/longform_diarization/diarize_service.py
index 1a28bd6..5df80ae 100644
--- a/src/wordcab_transcribe/services/longform_diarization/diarize_service.py
+++ b/src/wordcab_transcribe/services/longform_diarization/diarize_service.py
@@ -124,7 +124,7 @@ def __call__(
             str(processed_audio_filepath),
             num_speakers=oracle_num_speakers,
             out_dir=temp_dir,
-            num_workers=1,
+            num_workers=0,
         )
 
         segments = self.convert_annotation_to_segments(annotation)