Skip to content

Commit

Permalink
Merge pull request #462 from GooeyAI/ghana_mt_lang_api
Browse files Browse the repository at this point in the history
fetch ghana translation target languages from api
  • Loading branch information
anish-work authored Sep 19, 2024
2 parents 86bfe50 + 205bef9 commit 9865e5b
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 16 deletions.
47 changes: 33 additions & 14 deletions daras_ai_v2/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,16 @@

# https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
CHIRP_SUPPORTED = {
"af-ZA", "sq-AL", "am-ET", "ar-EG", "hy-AM", "as-IN", "ast-ES", "az-AZ", "eu-ES", "be-BY", "bs-BA", "bg-BG",
"my-MM", "ca-ES", "ceb-PH", "ckb-IQ", "yue-Hant-HK", "zh-TW", "hr-HR", "cs-CZ", "da-DK", "nl-NL",
"en-AU", "en-IN", "en-GB", "en-US", "et-EE", "fil-PH", "fi-FI", "fr-CA", "fr-FR", "gl-ES", "ka-GE", "de-DE",
"el-GR", "gu-IN", "ha-NG", "iw-IL", "hi-IN", "hu-HU", "is-IS", "id-ID", "it-IT", "ja-JP", "jv-ID", "kea-CV",
"kam-KE", "kn-IN", "kk-KZ", "km-KH", "ko-KR", "ky-KG", "lo-LA", "lv-LV", "ln-CD", "lt-LT", "luo-KE", "lb-LU",
"mk-MK", "ms-MY", "ml-IN", "mt-MT", "mi-NZ", "mr-IN", "mn-MN", "ne-NP", "no-NO", "ny-MW", "oc-FR", "ps-AF", "fa-IR",
"pl-PL", "pt-BR", "pa-Guru-IN", "ro-RO", "ru-RU", "nso-ZA", "sr-RS", "sn-ZW", "sd-IN", "si-LK", "sk-SK", "sl-SI",
"so-SO", "es-ES", "es-US", "su-ID", "sw", "sv-SE", "tg-TJ", "ta-IN", "te-IN", "th-TH", "tr-TR", "uk-UA", "ur-PK",
"uz-UZ", "vi-VN", "cy-GB", "wo-SN", "yo-NG", "zu-ZA"
'fa-IR', 'sr-RS', 'es-US', 'ur-PK', 'yo-NG', 'te-IN', 'sn-ZW', 'es-ES', 'jv-ID', 'no-NO', 'cmn-Hans-CN', 'ha-NG',
'es-419', 'wo-SN', 'rup-BG', 'ceb-PH', 'ms-MY', 'umb-AO', 'ny-MW', 'sw-KE', 'et-EE', 'ga-IE', 'kn-IN', 'sd-IN',
'en-GB', 'ml-IN', 'fil-PH', 'my-MM', 'uk-UA', 'lt-LT', 'en-US', 'ff-SN', 'su-ID', 'ru-RU', 'xh-ZA', 'en-IN',
'it-IT', 'ky-KG', 'en-AU', 'id-ID', 'ja-JP', 'fr-CA', 'nl-NL', 'fi-FI', 'zu-ZA', 'ar-EG', 'bs-BA', 'gl-ES', 'si-LK',
'pa-Guru-IN', 'ast-ES', 'tr-TR', 'mt-MT', 'hy-AM', 'da-DK', 'vi-VN', 'kam-KE', 'hu-HU', 'cs-CZ', 'sl-SI', 'ko-KR',
'km-KH', 'kk-KZ', 'nso-ZA', 'mk-MK', 'de-DE', 'mr-IN', 'th-TH', 'as-IN', 'kea-CV', 'bg-BG', 'sk-SK', 'bn-BD',
'el-GR', 'cy-GB', 'ro-RO', 'ckb-IQ', 'ca-ES', 'sq-AL', 'af-ZA', 'ig-NG', 'cmn-Hant-TW', 'mi-NZ', 'gu-IN', 'tg-TJ',
'oc-FR', 'so-SO', 'be-BY', 'fr-FR', 'luo-KE', 'sv-SE', 'is-IS', 'bn-IN', 'lg-UG', 'uz-UZ', 'iw-IL', 'ps-AF',
'ta-IN', 'sw', 'mn-MN', 'ka-GE', 'az-AZ', 'pt-BR', 'hi-IN', 'lo-LA', 'am-ET', 'eu-ES', 'yue-Hant-HK', 'pl-PL',
'om-ET', 'hr-HR', 'lv-LV', 'or-IN', 'ln-CD', 'ne-NP', 'lb-LU'
} # fmt: skip

WHISPER_SUPPORTED = {
Expand Down Expand Up @@ -309,7 +310,7 @@ class TranslationModels(TranslationModel, Enum):
supports_auto_detect=True,
)
ghana_nlp = TranslationModel(
label="Ghana NLP Translate",
label="Ghana NLP Translate", supports_auto_detect=False
)


Expand All @@ -327,7 +328,10 @@ def translation_language_selector(
if model == TranslationModels.google:
languages = google_translate_target_languages()
elif model == TranslationModels.ghana_nlp:
languages = GHANA_NLP_SUPPORTED
if not settings.GHANA_NLP_SUBKEY:
languages = {}
else:
languages = ghana_nlp_translate_target_languages()
else:
raise ValueError("Unsupported translation model: " + str(model))

Expand Down Expand Up @@ -385,6 +389,21 @@ def google_translate_language_selector(
)


@redis_cache_decorator(ex=settings.REDIS_MODELS_CACHE_EXPIRY)
def ghana_nlp_translate_target_languages():
"""
Get list of supported languages for Ghana NLP Translation.
:return: Dictionary of language codes and display names.
Reference: https://translation.ghananlp.org/api-details#api=ghananlp-translation-webservice-api
"""
r = requests.get(
"https://translation-api.ghananlp.org/v1/languages",
headers=GHANA_API_AUTH_HEADERS,
)
raise_for_status(r)
return r.json().get("languages") or {}


@redis_cache_decorator(ex=settings.REDIS_MODELS_CACHE_EXPIRY)
def google_translate_target_languages() -> dict[str, str]:
"""
Expand Down Expand Up @@ -505,10 +524,10 @@ def run_ghana_nlp_translate(
source_language and target_language
), "Both Source & Target language is required for Ghana NLP"
source_language = normalised_lang_in_collection(
source_language, GHANA_NLP_SUPPORTED
source_language, ghana_nlp_translate_target_languages()
)
target_language = normalised_lang_in_collection(
target_language, GHANA_NLP_SUPPORTED
target_language, ghana_nlp_translate_target_languages()
)
if source_language == target_language:
return texts
Expand All @@ -534,7 +553,7 @@ def _call_ghana_nlp_chunked(
def _call_ghana_nlp_raw(text: str, source_language: str, target_language: str) -> str:
r = requests.post(
"https://translation-api.ghananlp.org/v1/translate",
headers={"Ocp-Apim-Subscription-Key": str(settings.GHANA_NLP_SUBKEY)},
headers=GHANA_API_AUTH_HEADERS,
json={"in": text, "lang": source_language + "-" + target_language},
)
raise_for_status(r)
Expand Down
8 changes: 6 additions & 2 deletions recipes/asr_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,11 @@ def render_settings(self):
model=translation_model,
label=f"###### {field_title_desc(self.RequestModel, 'translation_source')}",
key="translation_source",
allow_none=True,
allow_none=(
translation_model.supports_auto_detect
if translation_model
else True
),
)
gui.caption(
"This is usually inferred from the spoken `language`, but in case that is set to Auto detect, you can specify one explicitly.",
Expand Down Expand Up @@ -211,7 +215,7 @@ def run(self, state: dict):
asr_output,
target_language=request.translation_target,
source_language=forced_asr_languages.get(
selected_model, request.language or request.translation_source
selected_model, request.translation_source or request.language
),
glossary_url=request.glossary_document,
model=request.translation_model,
Expand Down

0 comments on commit 9865e5b

Please sign in to comment.