From 076022d1d77424d99a01ea09cf7ad13c55fadc08 Mon Sep 17 00:00:00 2001 From: bram Date: Mon, 23 Dec 2024 16:29:35 +0100 Subject: [PATCH 1/2] Added data class --- .gitignore | 3 ++- python_gpt_po/po_translator.py | 23 +++++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 05ab914..2c4675e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ dist/ *.egg-info/ __pycache__ build/ -db.sqlite3 \ No newline at end of file +db.sqlite3 +.venv \ No newline at end of file diff --git a/python_gpt_po/po_translator.py b/python_gpt_po/po_translator.py index bdaaaed..e090c96 100644 --- a/python_gpt_po/po_translator.py +++ b/python_gpt_po/po_translator.py @@ -7,6 +7,7 @@ import json import logging import os +from dataclasses import dataclass import polib from dotenv import load_dotenv @@ -99,14 +100,14 @@ def update_po_entry(po_file, original_text, translated_text): logging.warning("Original text '%s' not found in the .po file.", original_text) +@dataclass class TranslationConfig: """ Class to hold configuration parameters for the translation service. """ - def __init__(self, client, model, bulk_mode=False, fuzzy=False, folder_language=False): # pylint: disable=R0913 - self.client = client - self.model = model - self.bulk_mode = bulk_mode - self.fuzzy = fuzzy - self.folder_language = folder_language + client: object + model: str + bulk_mode: bool = False + fuzzy: bool = False + folder_language: bool = False class TranslationService: @@ -473,8 +474,14 @@ def main(): else: detail_langs = [None] * len(lang_codes) # If no detailed language is provided, default to None - # Create a configuration object - config = TranslationConfig(client, args.model, args.bulk, args.fuzzy, args.folder_language) + # And in main(): + config = TranslationConfig( + client=client, + model=args.model, + bulk_mode=args.bulk, # Changed bulk to bulk_mode + fuzzy=args.fuzzy, + folder_language=args.folder_language + ) # Initialize the translation service with the configuration object translation_service = TranslationService(config, args.bulksize) From 3f94342f8b75dad88e7ce9f8f64b109ea7dc4ef2 Mon Sep 17 00:00:00 2001 From: bram Date: Mon, 23 Dec 2024 16:45:18 +0100 Subject: [PATCH 2/2] Added language translation --- python_gpt_po/po_translator.py | 46 ++++++++++++++++++++++++++-------- requirements.txt | 5 ++-- setup.py | 2 +- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/python_gpt_po/po_translator.py b/python_gpt_po/po_translator.py index e090c96..2d4e312 100644 --- a/python_gpt_po/po_translator.py +++ b/python_gpt_po/po_translator.py @@ -10,6 +10,7 @@ from dataclasses import dataclass import polib +import pycountry from dotenv import load_dotenv from openai import OpenAI from pkg_resources import DistributionNotFound, get_distribution @@ -58,18 +59,43 @@ def disable_fuzzy_translations(po_file_path): @staticmethod def get_file_language(po_file_path, po_file, languages, folder_language): """Determines the language for a .po file.""" - # Attempt to get language from the file metadata first file_lang = po_file.metadata.get('Language', '') + normalized_lang = POFileHandler.normalize_language_code(file_lang) - # If the file's language is not valid, infer it from the folder structure - if not file_lang or file_lang not in languages: - if folder_language: - inferred_lang = next((part for part in po_file_path.split(os.sep) if part in languages), None) - if inferred_lang: - logging.info("Inferred language for .po file: %s as %s", po_file_path, inferred_lang) - return inferred_lang - return None - return file_lang + if normalized_lang in languages: + return normalized_lang + + if folder_language: + for part in po_file_path.split(os.sep): + norm_part = POFileHandler.normalize_language_code(part) + if norm_part in languages: + logging.info("Inferred language for .po file: %s as %s", po_file_path, norm_part) + return norm_part + + return None + + @staticmethod + def normalize_language_code(lang): + """Convert language name or code to ISO 639-1 code.""" + # Try direct lookup + if len(lang) == 2: + try: + return pycountry.languages.get(alpha_2=lang.lower()).alpha_2 + except AttributeError: + pass + + # Try by name + try: + return pycountry.languages.get(name=lang.title()).alpha_2 + except AttributeError: + pass + + # Try by native name + for language in pycountry.languages: + if hasattr(language, 'inverted_name') and language.inverted_name.lower() == lang.lower(): + return language.alpha_2 + + return None @staticmethod def log_translation_status(po_file_path, original_texts, translations): diff --git a/requirements.txt b/requirements.txt index 59408a2..8c9fa1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ polib==1.2.0 -openai==v1.42.0 +openai==1.58.1 python-dotenv==1.0.0 pytest==8.2.2 tenacity==9.0.0 -setuptools-scm==8.1.0 \ No newline at end of file +setuptools-scm==8.1.0 +pycountry==24.6.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 6956bb3..551997a 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ include_package_data=True, install_requires=[ 'polib==1.2.0', - 'openai==v1.42.0', + 'openai==1.58.1', 'python-dotenv==1.0.0', 'tenacity==9.0.0', ],