From e0f323ea256868cdf7d3db46083e43983ea17942 Mon Sep 17 00:00:00 2001 From: pajowu Date: Fri, 7 Jul 2023 16:11:55 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Deduplicate=20model=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/app/models.yml | 10 +++++----- server/scripts/generate_models_list.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/server/app/models.yml b/server/app/models.yml index 632bbfa0..36edbf43 100644 --- a/server/app/models.yml +++ b/server/app/models.yml @@ -15,7 +15,7 @@ German: size: 1.9G type: transcription compressed: true -- name: big +- name: big-2 url: https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip description: Latest big wideband model from Tuda-DE project @@ -108,7 +108,7 @@ Russian Other: size: 1.5G type: transcription compressed: true -- name: big +- name: big-2 url: https://alphacephei.com/vosk/models/vosk-model-ru-0.10.zip description: Big narrowband Russian model for servers size: 2.5G @@ -235,7 +235,7 @@ Arabic: size: 318M type: transcription compressed: true -- name: big +- name: big-2 url: https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip description: Big model from LINTO project @@ -249,7 +249,7 @@ Farsi: size: 47M type: transcription compressed: true -- name: small +- name: small-2 url: https://alphacephei.com/vosk/models/vosk-model-small-fa-0.5.zip description: Bigger small model for desktop application (Persian) size: 60M @@ -270,7 +270,7 @@ Ukrainian: size: 73M type: transcription compressed: true -- name: small +- name: small-2 url: https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip description: Small model from Speech Recognition for Ukrainian diff --git a/server/scripts/generate_models_list.py b/server/scripts/generate_models_list.py index 1868482a..ccf43e92 100644 --- a/server/scripts/generate_models_list.py +++ b/server/scripts/generate_models_list.py @@ -104,9 +104,19 @@ def print_table_from_dict_list(dict_list, columns=None): print_table_from_dict_list(models, columns=["lang", "name", "url"]) by_language = defaultdict(list) +names_by_language = defaultdict(set) for model in models: lang = model["lang"] del model["lang"] + + i = 2 + name = model["name"] + while name in names_by_language[lang]: + name = model["name"] + "-" + str(i) + i += 1 + model["name"] = name + names_by_language[lang].add(name) + by_language[lang] += [model] with open(Path(__file__).parent.parent / "app" / "models.yml", "w") as outfile: