Skip to content

Commit

Permalink
fixup! fixup! feat: mark invalid entries as fuzzy
Browse files Browse the repository at this point in the history
  • Loading branch information
OmarIthawi committed Nov 4, 2023
1 parent 5d46ef5 commit 4876753
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 158 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/validate-translation-files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
:warning: There are errors in the translation files:
```
${{ steps.validate_translation_files.outputs.VALIDATION_ERRORS }}
${{ steps.validate_translation_files.outputs.VALIDATION_ERRORS || 'No errors were reported.' }}
```
This comment has been posted by the `validate-translation-files.yml` GitHub Actions workflow.
5 changes: 0 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,6 @@ test: ## Run scripts tests
validate_translation_files: ## Run basic validation to ensure translation files are valid
python scripts/validate_translation_files.py


validate_translation_files_mark_fuzzy: ## Run basic validation and mark invalid entries as fuzzy
python scripts/validate_translation_files.py --mark-fuzzy


sync_translations: ## Syncs from the old projects to the new openedx-translations project
python scripts/sync_translations.py $(SYNC_ARGS)

Expand Down
48 changes: 0 additions & 48 deletions scripts/tests/test_validate_translation_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import os.path
import re
import shutil

from ..validate_translation_files import (
get_translation_files,
Expand Down Expand Up @@ -47,7 +46,6 @@ def test_main_on_invalid_files(capsys):
assert 'en/LC_MESSAGES/django.po' not in out, 'Source file should not be validated'

assert re.match(r'INVALID: .*hi/LC_MESSAGES/django.po', err)
assert 'Errors:' in err
assert '\'msgstr\' is not a valid Python brace format string, unlike \'msgid\'' in err
assert 'FAILURE: Some translations are invalid.' in err

Expand All @@ -65,51 +63,5 @@ def test_main_on_valid_files(capsys):
assert 'VALID:' in out, 'Valid files should be printed in stdout'
assert 'ar/LC_MESSAGES/django.po' in out, 'Arabic translation file is valid'
assert 'SUCCESS: All translation files are valid.' in out
assert 'Errors:' not in out, 'There should be no errors'
assert not err.strip(), 'The stderr should be empty'
assert exit_code == 0, 'Should succeed due in validating the ar/LC_MESSAGES/django.po file'


def test_main_on_valid_files_with_mark_fuzzy(capsys, tmp_path):
"""
Test the `main` function with the --mark-fuzzy option.
"""
original_mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir')
mock_translations_dir = tmp_path / 'translations'
shutil.copytree(original_mock_translations_dir, mock_translations_dir)

exit_code = validate_translation_files(translations_dir=mock_translations_dir, mark_fuzzy=True)
out, err = capsys.readouterr()

assert 'VALID:' in out, 'Valid files should be printed in stdout'
assert re.match(r'FIXED: .*hi/LC_MESSAGES/django.po', err), 'Should print the FIXED files in stderr'
assert 'ar/LC_MESSAGES/django.po' in out, 'Arabic translation file is valid regardless'
assert 'NOTICE: Some translations were fixed.' in err, 'Should print NOTICE in stderr'
assert 'SUCCESS: All translation files are now valid.' in err, 'Should print SUCCESS in stderr'
assert 'Previous errors:' in err, 'Original errors should be printed'
assert 'New output:' in err, 'New output should be printed after the fix'
assert exit_code == 0, (
'Should succeed even though the in/LC_MESSAGES/django.po is invalid, because it was marked as fuzzy'
)


def test_main_on_valid_files_with_mark_fuzzy_unfixable_files(capsys, tmp_path):
"""
Test the `main` function with the --mark-fuzzy option but the file is broken in an unknown way.
"""
original_mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir')
mock_translations_dir = tmp_path / 'translations'
shutil.copytree(original_mock_translations_dir, mock_translations_dir)
hi_language_file = mock_translations_dir / 'demo-xblock/conf/locale/hi/LC_MESSAGES/django.po'

with open(hi_language_file, 'a') as f:
f.write('<<<<Some random string to break the file>>>>')

exit_code = validate_translation_files(translations_dir=mock_translations_dir, mark_fuzzy=True)
out, err = capsys.readouterr()

assert 'VALID:' in out, 'Valid files should be printed in stdout regardless'
assert re.match(r'INVALID: .*hi/LC_MESSAGES/django.po', err), 'Should print the Error files in stderr'
assert 'Previous errors:' in err, 'Original errors should be printed'
assert 'New errors:' in err, 'New errors should be printed after the fix'
assert exit_code == 1, 'Should fail due to unfixable hi/LC_MESSAGES/django.po file'
117 changes: 13 additions & 104 deletions scripts/validate_translation_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,69 +42,13 @@ def validate_translation_file(po_file):
stderr = completed_process.stderr.decode(encoding='utf-8', errors='replace')

return {
'po_file': po_file,
'valid': completed_process.returncode == 0,
'output': f'{stdout}\n{stderr}'.strip(),
'stdout': stdout,
'stderr': stderr,
'output': f'{stdout}\n{stderr}',
}


def get_invalid_msgstr_lines(validation_stderr):
"""
Parse the error output of `msgfmt` and return line numbers of invalid msgstr lines.
"""
invalid_msgstr_lines = []
for line in validation_stderr.splitlines():
if '.po:' in line:
_file_name, line_number, _rest = line.split(':', 2)
invalid_msgstr_lines.append(int(line_number))
return invalid_msgstr_lines


def get_invalid_msgid_lines(po_file, invalid_msgstr_lines):
"""
Get the line numbers of invalid msgid lines by their msgstr line numbers.
"""
with open(po_file, mode='r', encoding='utf-8') as f:
pofile_lines = f.readlines()

invalid_msgid_lines = []
last_msgid_line = None
for line_number, line_text in enumerate(pofile_lines, start=1):
if line_text.startswith('msgid'):
last_msgid_line = line_number

if line_text.startswith('msgstr') and line_number in invalid_msgstr_lines:
invalid_msgid_lines.append(last_msgid_line)

return invalid_msgid_lines


def mark_invalid_entries_as_fuzzy(validation_result):
"""
Mark invalid entries as fuzzy.
"""
# Get line numbers of invalid msgstr lines
invalid_msgstr_lines = get_invalid_msgstr_lines(validation_result['stderr'])
invalid_msgid_lines = get_invalid_msgid_lines(
validation_result['po_file'],
invalid_msgstr_lines,
)

with open(validation_result['po_file'], mode='r', encoding='utf-8') as f:
pofile_lines = f.readlines()

with open(validation_result['po_file'], mode='w', encoding='utf-8') as f:
for line_number, line_text in enumerate(pofile_lines, start=1):
if line_number in invalid_msgid_lines:
f.write('#, fuzzy\n')
f.write(line_text)


def validate_translation_files(
translations_dir='translations',
mark_fuzzy=False,
):
"""
Run GNU gettext `msgfmt` and print errors to stderr.
Expand All @@ -115,79 +59,44 @@ def validate_translation_files(
return 1 for invalid translations.
"""
translations_valid = True
translations_fixed = False

stderr_lines = []
invalid_lines = []

po_files = get_translation_files(translations_dir)
for po_file in po_files:
first_attempt = validate_translation_file(po_file)
result = validate_translation_file(po_file)

if first_attempt['valid']:
if result['valid']:
print('VALID: ' + po_file)
print(first_attempt['output'], '\n' * 2)
print(result['output'], '\n' * 2)
else:
if mark_fuzzy:
mark_invalid_entries_as_fuzzy(first_attempt)

second_attempt = validate_translation_file(po_file)

if second_attempt['valid']:
translations_fixed = True
stderr_lines.append('FIXED: ' + po_file)
stderr_lines.append('This file was invalid, but it was fixed by`make validate_translation_files`.')
stderr_lines.append('Previous errors:')
stderr_lines.append(first_attempt['output'] + '\n')
stderr_lines.append('New output:')
stderr_lines.append(second_attempt['output'] + '\n' * 2)
else:
stderr_lines.append('INVALID: ' + po_file)
stderr_lines.append('Attempted to fix the file, but it is still invalid.')

if mark_fuzzy:
stderr_lines.append('Previous errors:')
else:
stderr_lines.append('Errors:')

stderr_lines.append(first_attempt['output'] + '\n')

if mark_fuzzy:
stderr_lines.append('New errors:')
stderr_lines.append(second_attempt['output'] + '\n' * 2)
translations_valid = False
invalid_lines.append('INVALID: ' + po_file)
invalid_lines.append(result['output'] + '\n' * 2)
translations_valid = False

# Print validation errors in the bottom for easy reading
print('\n'.join(stderr_lines), file=sys.stderr)
print('\n'.join(invalid_lines), file=sys.stderr)

if translations_valid:
print('-----------------------------------------')
print('SUCCESS: All translation files are valid.')
print('-----------------------------------------')
exit_code = 0

if translations_fixed:
print('---------------------------------------------', file=sys.stderr)
print('NOTICE: Some translations were fixed.', file=sys.stderr)
print('SUCCESS: All translation files are now valid.', file=sys.stderr)
print('---------------------------------------------', file=sys.stderr)
else:
print('-----------------------------------------')
print('SUCCESS: All translation files are valid.')
print('-----------------------------------------')
else:
exit_code = 1
print('---------------------------------------', file=sys.stderr)
print('FAILURE: Some translations are invalid.', file=sys.stderr)
print('---------------------------------------', file=sys.stderr)
exit_code = 1

return exit_code


def main(): # pragma: no cover
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--mark-fuzzy', dest='mark_fuzzy', action='store_true', default=False)
parser.add_argument('--dir', action='store', type=str, default='translations')
args = parser.parse_args()
sys.exit(validate_translation_files(
translations_dir=args.dir,
mark_fuzzy=args.mark_fuzzy,
))


Expand Down

0 comments on commit 4876753

Please sign in to comment.