diff --git a/.env b/.env index 34cb2bc..aca1213 100644 --- a/.env +++ b/.env @@ -30,8 +30,13 @@ DEBUG=True # Local models: # You can also link a local folder path to use a custom model. If you do so, you should also mount the folder in the # docker run command as a volume, or include the model directory in your Dockerfile to bake it into the image. -# e.g. WHISPER_MODEL="/app/models/custom" -# docker cmd: -v /path/to/custom/model:/app/models/custom +# Note that for the default tensorrt-llm whisper engine, the simplest way to get a converted model is to use +# hatch to start the server locally once. Specify the WHISPER_MODEL and ALIGN_MODEL here, then run +# "hatch run runtime:launch" in your terminal. This will download and convert these models. +# You'll then find the converted models in cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models +# Then in your Dockerfile, copy the converted models to the /app/src/wordcab_transcribe/whisper_models folder. +# Example for WHISPER_MODEL: COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/large-v3 /app/src/wordcab_transcribe/whisper_models/large-v3 +# Example for ALIGN_MODEL: COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/tiny /app/src/wordcab_transcribe/whisper_models/tiny WHISPER_MODEL="large-v3" # You can specify one of two engines, "faster-whisper" or "tensorrt-llm". At the moment, "faster-whisper" is more # stable, adjustable, and accurate, while "tensorrt-llm" is faster but less accurate and adjustable. diff --git a/README.md b/README.md index 8482953..6e7d2db 100644 --- a/README.md +++ b/README.md @@ -227,18 +227,18 @@ with open("youtube_video_output.json", "w", encoding="utf-8") as f: ## Running Local Models -To run the API with local models, you need to mount a volume to the container or -include the models in the image. You then need to modify the `.env` file to point to the local model, -as shown below: +You can link a local folder path to use a custom model. If you do so, you should mount the folder in the +docker run command as a volume, or include the model directory in your Dockerfile to bake it into the image. -``` -WHISPER_MODEL="/app/models/custom" -``` +**Note** that for the default `tensorrt-llm` whisper engine, the simplest way to get a converted model is to use +`hatch` to start the server locally once. Specify the `WHISPER_MODEL` and `ALIGN_MODEL` in `.env`, then run +`hatch run runtime:launch` in your terminal. This will download and convert these models. + +You'll then find the converted models in `cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models`. +Then in your Dockerfile, copy the converted models to the `/app/src/wordcab_transcribe/whisper_models` directory. -Note that if you're using the `tensorrt_llm` whisper engine, and these are not located in the -container, the default directory these models will be saved to is `/app/src/wordcab_transcribe/whisper_models`. -If you're saving/mounting models to this directory, be sure to see the supported models in the `.env` file, -so your self-hosted model does not conflict with the default model names. +Example Dockerfile line for `WHISPER_MODEL`: `COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/large-v3 /app/src/wordcab_transcribe/whisper_models/large-v3` +Example Dockerfile line for `ALIGN_MODEL`: `COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/tiny /app/src/wordcab_transcribe/whisper_models/tiny` ## 🚀 Contributing diff --git a/pyproject.toml b/pyproject.toml index c1c2759..a96b793 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,7 @@ features = [ ] [tool.hatch.envs.runtime.scripts] -launch = "uvicorn --host='::' --port=5001 src.wordcab_transcribe.main:app" +launch = "uvicorn --host=0.0.0.0 --port=5001 src.wordcab_transcribe.main:app" [tool.hatch.envs.quality] features = [ diff --git a/src/wordcab_transcribe/config.py b/src/wordcab_transcribe/config.py index 365796c..6f30e32 100644 --- a/src/wordcab_transcribe/config.py +++ b/src/wordcab_transcribe/config.py @@ -93,22 +93,26 @@ def project_name_must_not_be_none(cls, value: str): # noqa: B902, N805 @field_validator("whisper_model") def whisper_model_compatibility_check(cls, value: str): # noqa: B902, N805 """Check that the whisper engine is compatible.""" - if value.lower() not in [ - "tiny", - "tiny.en", - "base", - "base.en", - "small", - "small.en", - "medium", - "medium.en", - "large", - "large-v1", - "large-v2", - "large-v3", - "distil-large-v2", - "distil-large-v3", - ]: + if ( + value.lower() + not in [ + "tiny", + "tiny.en", + "base", + "base.en", + "small", + "small.en", + "medium", + "medium.en", + "large", + "large-v1", + "large-v2", + "large-v3", + "distil-large-v2", + "distil-large-v3", + ] + and "/" not in value + ): raise ValueError( "The whisper models must be one of `tiny`, `tiny.en`, `base`," " `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`,"