diff --git a/python_gpt_po/po_translator.py b/python_gpt_po/po_translator.py index e090c96..2d4e312 100644 --- a/python_gpt_po/po_translator.py +++ b/python_gpt_po/po_translator.py @@ -10,6 +10,7 @@ from dataclasses import dataclass import polib +import pycountry from dotenv import load_dotenv from openai import OpenAI from pkg_resources import DistributionNotFound, get_distribution @@ -58,18 +59,43 @@ def disable_fuzzy_translations(po_file_path): @staticmethod def get_file_language(po_file_path, po_file, languages, folder_language): """Determines the language for a .po file.""" - # Attempt to get language from the file metadata first file_lang = po_file.metadata.get('Language', '') + normalized_lang = POFileHandler.normalize_language_code(file_lang) - # If the file's language is not valid, infer it from the folder structure - if not file_lang or file_lang not in languages: - if folder_language: - inferred_lang = next((part for part in po_file_path.split(os.sep) if part in languages), None) - if inferred_lang: - logging.info("Inferred language for .po file: %s as %s", po_file_path, inferred_lang) - return inferred_lang - return None - return file_lang + if normalized_lang in languages: + return normalized_lang + + if folder_language: + for part in po_file_path.split(os.sep): + norm_part = POFileHandler.normalize_language_code(part) + if norm_part in languages: + logging.info("Inferred language for .po file: %s as %s", po_file_path, norm_part) + return norm_part + + return None + + @staticmethod + def normalize_language_code(lang): + """Convert language name or code to ISO 639-1 code.""" + # Try direct lookup + if len(lang) == 2: + try: + return pycountry.languages.get(alpha_2=lang.lower()).alpha_2 + except AttributeError: + pass + + # Try by name + try: + return pycountry.languages.get(name=lang.title()).alpha_2 + except AttributeError: + pass + + # Try by native name + for language in pycountry.languages: + if hasattr(language, 'inverted_name') and language.inverted_name.lower() == lang.lower(): + return language.alpha_2 + + return None @staticmethod def log_translation_status(po_file_path, original_texts, translations): diff --git a/requirements.txt b/requirements.txt index 59408a2..8c9fa1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ polib==1.2.0 -openai==v1.42.0 +openai==1.58.1 python-dotenv==1.0.0 pytest==8.2.2 tenacity==9.0.0 -setuptools-scm==8.1.0 \ No newline at end of file +setuptools-scm==8.1.0 +pycountry==24.6.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 6956bb3..551997a 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ include_package_data=True, install_requires=[ 'polib==1.2.0', - 'openai==v1.42.0', + 'openai==1.58.1', 'python-dotenv==1.0.0', 'tenacity==9.0.0', ],