From ddc68e69db1f97187c2766a5dca4277311c5f11d Mon Sep 17 00:00:00 2001 From: anish-work Date: Thu, 12 Sep 2024 14:41:55 +0530 Subject: [PATCH 1/9] fetch ghana translation target languages from api --- daras_ai_v2/asr.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index 72a64a9c8..7d42387ab 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -327,7 +327,7 @@ def translation_language_selector( if model == TranslationModels.google: languages = google_translate_target_languages() elif model == TranslationModels.ghana_nlp: - languages = GHANA_NLP_SUPPORTED + languages = ghana_nlp_translate_target_languages() else: raise ValueError("Unsupported translation model: " + str(model)) @@ -385,6 +385,21 @@ def google_translate_language_selector( ) +@redis_cache_decorator(ex=settings.REDIS_MODELS_CACHE_EXPIRY) +def ghana_nlp_translate_target_languages(): + """ + Get list of supported languages for Ghana NLP Translation. + :return: Dictionary of language codes and display names. + Reference: https://translation.ghananlp.org/api-details#api=ghananlp-translation-webservice-api + """ + r = requests.get( + "https://translation-api.ghananlp.org/v1/languages", + headers={"Ocp-Apim-Subscription-Key": str(settings.GHANA_NLP_SUBKEY)}, + ) + raise_for_status(r) + return r.json().get("languages", {}) + + @redis_cache_decorator(ex=settings.REDIS_MODELS_CACHE_EXPIRY) def google_translate_target_languages() -> dict[str, str]: """ From 34a5fbcb38514d012d43eee21366fa8c3864e291 Mon Sep 17 00:00:00 2001 From: anish-work Date: Tue, 17 Sep 2024 14:18:38 +0530 Subject: [PATCH 2/9] use a variable for GHANA_API_AUTH_HEADERS & add null safety --- daras_ai_v2/asr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index 7d42387ab..f62180b3d 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -394,10 +394,10 @@ def ghana_nlp_translate_target_languages(): """ r = requests.get( "https://translation-api.ghananlp.org/v1/languages", - headers={"Ocp-Apim-Subscription-Key": str(settings.GHANA_NLP_SUBKEY)}, + headers=GHANA_API_AUTH_HEADERS, ) raise_for_status(r) - return r.json().get("languages", {}) + return r.json().get("languages", {}) or {} @redis_cache_decorator(ex=settings.REDIS_MODELS_CACHE_EXPIRY) @@ -549,7 +549,7 @@ def _call_ghana_nlp_chunked( def _call_ghana_nlp_raw(text: str, source_language: str, target_language: str) -> str: r = requests.post( "https://translation-api.ghananlp.org/v1/translate", - headers={"Ocp-Apim-Subscription-Key": str(settings.GHANA_NLP_SUBKEY)}, + headers=GHANA_API_AUTH_HEADERS, json={"in": text, "lang": source_language + "-" + target_language}, ) raise_for_status(r) From e1285fbafdf501a83733aa325f47ad2e00d0aef3 Mon Sep 17 00:00:00 2001 From: anish-work Date: Wed, 18 Sep 2024 03:34:22 +0530 Subject: [PATCH 3/9] use langs from ghana api --- daras_ai_v2/asr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index f62180b3d..e0ebc00e1 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -520,10 +520,10 @@ def run_ghana_nlp_translate( source_language and target_language ), "Both Source & Target language is required for Ghana NLP" source_language = normalised_lang_in_collection( - source_language, GHANA_NLP_SUPPORTED + source_language, ghana_nlp_translate_target_languages() ) target_language = normalised_lang_in_collection( - target_language, GHANA_NLP_SUPPORTED + target_language, ghana_nlp_translate_target_languages() ) if source_language == target_language: return texts From 023f5a15c6ba30f3358fece19c73c78e787f5785 Mon Sep 17 00:00:00 2001 From: anish-work Date: Wed, 18 Sep 2024 04:09:08 +0530 Subject: [PATCH 4/9] prefer translation_source before translating & hide auto detect for ghana_nlp --- daras_ai_v2/asr.py | 3 ++- recipes/asr_page.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index e0ebc00e1..3abf64088 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -309,7 +309,7 @@ class TranslationModels(TranslationModel, Enum): supports_auto_detect=True, ) ghana_nlp = TranslationModel( - label="Ghana NLP Translate", + label="Ghana NLP Translate", supports_auto_detect=False ) @@ -332,6 +332,7 @@ def translation_language_selector( raise ValueError("Unsupported translation model: " + str(model)) options = list(languages.keys()) + print(options, ">>>") return gui.selectbox( label=label, key=key, diff --git a/recipes/asr_page.py b/recipes/asr_page.py index f68be1a08..0ea6f9b07 100644 --- a/recipes/asr_page.py +++ b/recipes/asr_page.py @@ -153,7 +153,7 @@ def render_settings(self): model=translation_model, label=f"###### {field_title_desc(self.RequestModel, 'translation_source')}", key="translation_source", - allow_none=True, + allow_none=translation_model.supports_auto_detect, ) gui.caption( "This is usually inferred from the spoken `language`, but in case that is set to Auto detect, you can specify one explicitly.", @@ -211,7 +211,7 @@ def run(self, state: dict): asr_output, target_language=request.translation_target, source_language=forced_asr_languages.get( - selected_model, request.language or request.translation_source + selected_model, request.translation_source or request.language ), glossary_url=request.glossary_document, model=request.translation_model, From 69fd4d6ff80ca8cb6171480ca807a3bef2521f5c Mon Sep 17 00:00:00 2001 From: anish-work Date: Wed, 18 Sep 2024 15:12:00 +0530 Subject: [PATCH 5/9] update chirp supported languages --- daras_ai_v2/asr.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index 3abf64088..39a77b147 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -59,15 +59,16 @@ # https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages CHIRP_SUPPORTED = { - "af-ZA", "sq-AL", "am-ET", "ar-EG", "hy-AM", "as-IN", "ast-ES", "az-AZ", "eu-ES", "be-BY", "bs-BA", "bg-BG", - "my-MM", "ca-ES", "ceb-PH", "ckb-IQ", "yue-Hant-HK", "zh-TW", "hr-HR", "cs-CZ", "da-DK", "nl-NL", - "en-AU", "en-IN", "en-GB", "en-US", "et-EE", "fil-PH", "fi-FI", "fr-CA", "fr-FR", "gl-ES", "ka-GE", "de-DE", - "el-GR", "gu-IN", "ha-NG", "iw-IL", "hi-IN", "hu-HU", "is-IS", "id-ID", "it-IT", "ja-JP", "jv-ID", "kea-CV", - "kam-KE", "kn-IN", "kk-KZ", "km-KH", "ko-KR", "ky-KG", "lo-LA", "lv-LV", "ln-CD", "lt-LT", "luo-KE", "lb-LU", - "mk-MK", "ms-MY", "ml-IN", "mt-MT", "mi-NZ", "mr-IN", "mn-MN", "ne-NP", "no-NO", "ny-MW", "oc-FR", "ps-AF", "fa-IR", - "pl-PL", "pt-BR", "pa-Guru-IN", "ro-RO", "ru-RU", "nso-ZA", "sr-RS", "sn-ZW", "sd-IN", "si-LK", "sk-SK", "sl-SI", - "so-SO", "es-ES", "es-US", "su-ID", "sw", "sv-SE", "tg-TJ", "ta-IN", "te-IN", "th-TH", "tr-TR", "uk-UA", "ur-PK", - "uz-UZ", "vi-VN", "cy-GB", "wo-SN", "yo-NG", "zu-ZA" + 'fa-IR', 'sr-RS', 'es-US', 'ur-PK', 'yo-NG', 'te-IN', 'sn-ZW', 'es-ES', 'jv-ID', 'no-NO', 'cmn-Hans-CN', 'ha-NG', + 'es-419', 'wo-SN', 'rup-BG', 'ceb-PH', 'ms-MY', 'umb-AO', 'ny-MW', 'sw-KE', 'et-EE', 'ga-IE', 'kn-IN', 'sd-IN', + 'en-GB', 'ml-IN', 'fil-PH', 'my-MM', 'uk-UA', 'lt-LT', 'en-US', 'ff-SN', 'su-ID', 'ru-RU', 'xh-ZA', 'en-IN', + 'it-IT', 'ky-KG', 'en-AU', 'id-ID', 'ja-JP', 'fr-CA', 'nl-NL', 'fi-FI', 'zu-ZA', 'ar-EG', 'bs-BA', 'gl-ES', 'si-LK', + 'pa-Guru-IN', 'ast-ES', 'tr-TR', 'mt-MT', 'hy-AM', 'da-DK', 'vi-VN', 'kam-KE', 'hu-HU', 'cs-CZ', 'sl-SI', 'ko-KR', + 'km-KH', 'kk-KZ', 'nso-ZA', 'mk-MK', 'de-DE', 'mr-IN', 'th-TH', 'as-IN', 'kea-CV', 'bg-BG', 'sk-SK', 'bn-BD', + 'el-GR', 'cy-GB', 'ro-RO', 'ckb-IQ', 'ca-ES', 'sq-AL', 'af-ZA', 'ig-NG', 'cmn-Hant-TW', 'mi-NZ', 'gu-IN', 'tg-TJ', + 'oc-FR', 'so-SO', 'be-BY', 'fr-FR', 'luo-KE', 'sv-SE', 'is-IS', 'bn-IN', 'lg-UG', 'uz-UZ', 'iw-IL', 'ps-AF', + 'ta-IN', 'sw', 'mn-MN', 'ka-GE', 'az-AZ', 'pt-BR', 'hi-IN', 'lo-LA', 'am-ET', 'eu-ES', 'yue-Hant-HK', 'pl-PL', + 'om-ET', 'hr-HR', 'lv-LV', 'or-IN', 'ln-CD', 'ne-NP', 'lb-LU' } # fmt: skip WHISPER_SUPPORTED = { @@ -332,7 +333,6 @@ def translation_language_selector( raise ValueError("Unsupported translation model: " + str(model)) options = list(languages.keys()) - print(options, ">>>") return gui.selectbox( label=label, key=key, From 14189de710e7ea23bd0ad3c2c56dbb0abe99efad Mon Sep 17 00:00:00 2001 From: anish-work Date: Thu, 19 Sep 2024 00:03:03 +0530 Subject: [PATCH 6/9] remove redundant default value --- daras_ai_v2/asr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index 39a77b147..97baf67fc 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -398,7 +398,7 @@ def ghana_nlp_translate_target_languages(): headers=GHANA_API_AUTH_HEADERS, ) raise_for_status(r) - return r.json().get("languages", {}) or {} + return r.json().get("languages") or {} @redis_cache_decorator(ex=settings.REDIS_MODELS_CACHE_EXPIRY) From 5330ea85b8feb244b438aa0b8d3fb94dfc2c58a8 Mon Sep 17 00:00:00 2001 From: anish-work Date: Thu, 19 Sep 2024 18:46:38 +0530 Subject: [PATCH 7/9] add null check for auto detect --- recipes/asr_page.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/recipes/asr_page.py b/recipes/asr_page.py index 0ea6f9b07..57e55c5b4 100644 --- a/recipes/asr_page.py +++ b/recipes/asr_page.py @@ -153,7 +153,11 @@ def render_settings(self): model=translation_model, label=f"###### {field_title_desc(self.RequestModel, 'translation_source')}", key="translation_source", - allow_none=translation_model.supports_auto_detect, + allow_none=( + translation_model.supports_auto_detect + if translation_model + else True + ), ) gui.caption( "This is usually inferred from the spoken `language`, but in case that is set to Auto detect, you can specify one explicitly.", From cc1ab4695f9461e22722e4efda8c45a547274570 Mon Sep 17 00:00:00 2001 From: anish-work Date: Thu, 19 Sep 2024 19:12:13 +0530 Subject: [PATCH 8/9] skip loading ghana langs if env var not found --- daras_ai_v2/asr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index 97baf67fc..1bbeb8a41 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -321,7 +321,9 @@ def translation_language_selector( key: str, **kwargs, ) -> str | None: - if not model: + if not model or ( + model == TranslationModels.ghana_nlp and not settings.GHANA_NLP_SUBKEY + ): gui.session_state[key] = None return From 205bef974a2a6ce0fec5692d7e8015a69e128d7f Mon Sep 17 00:00:00 2001 From: anish-work Date: Thu, 19 Sep 2024 19:48:11 +0530 Subject: [PATCH 9/9] pass empty list if ghana env not found --- daras_ai_v2/asr.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/daras_ai_v2/asr.py b/daras_ai_v2/asr.py index 1bbeb8a41..db594e780 100644 --- a/daras_ai_v2/asr.py +++ b/daras_ai_v2/asr.py @@ -321,16 +321,17 @@ def translation_language_selector( key: str, **kwargs, ) -> str | None: - if not model or ( - model == TranslationModels.ghana_nlp and not settings.GHANA_NLP_SUBKEY - ): + if not model: gui.session_state[key] = None return if model == TranslationModels.google: languages = google_translate_target_languages() elif model == TranslationModels.ghana_nlp: - languages = ghana_nlp_translate_target_languages() + if not settings.GHANA_NLP_SUBKEY: + languages = {} + else: + languages = ghana_nlp_translate_target_languages() else: raise ValueError("Unsupported translation model: " + str(model))