diff --git a/.env b/.env index 25bbff0..34cb2bc 100644 --- a/.env +++ b/.env @@ -23,13 +23,13 @@ DEBUG=True # The whisper_model parameter is used to control the model used for ASR. # # Cloud models: -# The available models are: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, or large-v2 -# You can try different model size, but you should see a trade-off between performance and speed. Note that the -# "distil" whisper models do not support languages other than English. +# The available models are: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2 +# large-v3, distil-large-v2, and distil-large-v3. Note that the distil models only support "en" as a source_lang. +# You can try different model size, but you should see a trade-off between performance and speed. # # Local models: # You can also link a local folder path to use a custom model. If you do so, you should also mount the folder in the -# docker run command as a volume. +# docker run command as a volume, or include the model directory in your Dockerfile to bake it into the image. # e.g. WHISPER_MODEL="/app/models/custom" # docker cmd: -v /path/to/custom/model:/app/models/custom WHISPER_MODEL="large-v3" @@ -53,9 +53,9 @@ TOKENIZERS_PARALLELISM=False # --------------------------------------------------- DIARIZATION ---------------------------------------------------- # # # The diarization_backend parameter is used to control the diarization model used. The available options are: -# "longform_diarizer" or "default_diarizer". It's suggested to use "default_diarizer" for better stability. -# The "longform_diarizer" is still being developed. -DIARIZATION_BACKEND="default_diarizer" +# "longform-diarizer" or "default-diarizer". It's suggested to use "default-diarizer" for better stability. +# The "longform-diarizer" is still being developed. +DIARIZATION_BACKEND="default-diarizer" # In a MSDD (Multiscale Diarization Decoder) model, the diarization model is trained on multiple window lengths. # The window_lengths are specified in seconds, and separated by a comma. If not specified, the default value will # be "1.5, 1.25, 1.0, 0.75, 0.5". diff --git a/pyproject.toml b/pyproject.toml index a96b793..c1c2759 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,7 @@ features = [ ] [tool.hatch.envs.runtime.scripts] -launch = "uvicorn --host=0.0.0.0 --port=5001 src.wordcab_transcribe.main:app" +launch = "uvicorn --host='::' --port=5001 src.wordcab_transcribe.main:app" [tool.hatch.envs.quality] features = [ diff --git a/src/wordcab_transcribe/config.py b/src/wordcab_transcribe/config.py index c78bc6f..365796c 100644 --- a/src/wordcab_transcribe/config.py +++ b/src/wordcab_transcribe/config.py @@ -142,7 +142,7 @@ def align_model_compatibility_check(cls, value: str): # noqa: B902, N805 @field_validator("diarization_backend") def diarization_backend_compatibility_check(cls, value: str): # noqa: B902, N805 """Check that the diarization engine is compatible.""" - if value.lower() not in ["default_diarizer", "longform_diarizer"]: + if value.lower() not in ["default-diarizer", "longform-diarizer"]: raise ValueError( "The diarization backend must be one of `default_diarizer` or" " `longform_diarizer`." @@ -323,7 +323,7 @@ def __post_init__(self): extra_languages=extra_languages, extra_languages_model_paths=extra_languages_model_paths, # Diarization - diarization_backend=getenv("DIARIZATION_BACKEND", "longform_diarizer"), + diarization_backend=getenv("DIARIZATION_BACKEND", "longform-diarizer"), window_lengths=window_lengths, shift_lengths=shift_lengths, multiscale_weights=multiscale_weights, diff --git a/src/wordcab_transcribe/services/asr_service.py b/src/wordcab_transcribe/services/asr_service.py index 92ec76d..19bf25e 100644 --- a/src/wordcab_transcribe/services/asr_service.py +++ b/src/wordcab_transcribe/services/asr_service.py @@ -335,7 +335,7 @@ def create_transcription_local_service(self) -> None: def create_diarization_local_service(self) -> None: """Create a local diarization service.""" - if settings.diarization_backend == "longform_diarizer": + if settings.diarization_backend == "longform-diarizer": self.local_services.diarization = LongFormDiarizeService( device=self.device, ) @@ -653,7 +653,7 @@ async def process_diarization(self, task: ASRTask, debug_mode: bool) -> None: """ try: if isinstance(task.diarization.execution, LocalExecution): - if settings.diarization_backend == "longform_diarizer": + if settings.diarization_backend == "longform-diarizer": out = await time_and_tell_async( lambda: self.local_services.diarization( waveform=task.audio, @@ -1101,7 +1101,7 @@ def __init__( """Initialize the ASRDiarizationOnly class.""" super().__init__() - if settings.diarization_backend == "longform_diarizer": + if settings.diarization_backend == "longform-diarizer": self.diarization_service = LongFormDiarizeService( device=self.device, )