Skip to content

Commit

Permalink
Merge pull request #5 from pescheckit/feature_added-data-class
Browse files Browse the repository at this point in the history
fix: Enhanced language code handling with pycountry lib
  • Loading branch information
pescheck-bram authored Dec 23, 2024
2 parents ea6872d + 3f94342 commit 52bab8b
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 22 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ dist/
*.egg-info/
__pycache__
build/
db.sqlite3
db.sqlite3
.venv
69 changes: 51 additions & 18 deletions python_gpt_po/po_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import json
import logging
import os
from dataclasses import dataclass

import polib
import pycountry
from dotenv import load_dotenv
from openai import OpenAI
from pkg_resources import DistributionNotFound, get_distribution
Expand Down Expand Up @@ -57,18 +59,43 @@ def disable_fuzzy_translations(po_file_path):
@staticmethod
def get_file_language(po_file_path, po_file, languages, folder_language):
"""Determines the language for a .po file."""
# Attempt to get language from the file metadata first
file_lang = po_file.metadata.get('Language', '')
normalized_lang = POFileHandler.normalize_language_code(file_lang)

# If the file's language is not valid, infer it from the folder structure
if not file_lang or file_lang not in languages:
if folder_language:
inferred_lang = next((part for part in po_file_path.split(os.sep) if part in languages), None)
if inferred_lang:
logging.info("Inferred language for .po file: %s as %s", po_file_path, inferred_lang)
return inferred_lang
return None
return file_lang
if normalized_lang in languages:
return normalized_lang

if folder_language:
for part in po_file_path.split(os.sep):
norm_part = POFileHandler.normalize_language_code(part)
if norm_part in languages:
logging.info("Inferred language for .po file: %s as %s", po_file_path, norm_part)
return norm_part

return None

@staticmethod
def normalize_language_code(lang):
"""Convert language name or code to ISO 639-1 code."""
# Try direct lookup
if len(lang) == 2:
try:
return pycountry.languages.get(alpha_2=lang.lower()).alpha_2
except AttributeError:
pass

# Try by name
try:
return pycountry.languages.get(name=lang.title()).alpha_2
except AttributeError:
pass

# Try by native name
for language in pycountry.languages:
if hasattr(language, 'inverted_name') and language.inverted_name.lower() == lang.lower():
return language.alpha_2

return None

@staticmethod
def log_translation_status(po_file_path, original_texts, translations):
Expand Down Expand Up @@ -99,14 +126,14 @@ def update_po_entry(po_file, original_text, translated_text):
logging.warning("Original text '%s' not found in the .po file.", original_text)


@dataclass
class TranslationConfig:
""" Class to hold configuration parameters for the translation service. """
def __init__(self, client, model, bulk_mode=False, fuzzy=False, folder_language=False): # pylint: disable=R0913
self.client = client
self.model = model
self.bulk_mode = bulk_mode
self.fuzzy = fuzzy
self.folder_language = folder_language
client: object
model: str
bulk_mode: bool = False
fuzzy: bool = False
folder_language: bool = False


class TranslationService:
Expand Down Expand Up @@ -473,8 +500,14 @@ def main():
else:
detail_langs = [None] * len(lang_codes) # If no detailed language is provided, default to None

# Create a configuration object
config = TranslationConfig(client, args.model, args.bulk, args.fuzzy, args.folder_language)
# And in main():
config = TranslationConfig(
client=client,
model=args.model,
bulk_mode=args.bulk, # Changed bulk to bulk_mode
fuzzy=args.fuzzy,
folder_language=args.folder_language
)

# Initialize the translation service with the configuration object
translation_service = TranslationService(config, args.bulksize)
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
polib==1.2.0
openai==v1.42.0
openai==1.58.1
python-dotenv==1.0.0
pytest==8.2.2
tenacity==9.0.0
setuptools-scm==8.1.0
setuptools-scm==8.1.0
pycountry==24.6.1
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
include_package_data=True,
install_requires=[
'polib==1.2.0',
'openai==v1.42.0',
'openai==1.58.1',
'python-dotenv==1.0.0',
'tenacity==9.0.0',
],
Expand Down

0 comments on commit 52bab8b

Please sign in to comment.