From 4876753abe8663edfd08e509d74589321650bfe9 Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Sat, 4 Nov 2023 17:57:49 +0300 Subject: [PATCH] fixup! fixup! feat: mark invalid entries as fuzzy --- .../workflows/validate-translation-files.yml | 2 +- Makefile | 5 - .../tests/test_validate_translation_files.py | 48 ------- scripts/validate_translation_files.py | 117 ++---------------- 4 files changed, 14 insertions(+), 158 deletions(-) diff --git a/.github/workflows/validate-translation-files.yml b/.github/workflows/validate-translation-files.yml index 79eaa78ab42..c78a62764e3 100644 --- a/.github/workflows/validate-translation-files.yml +++ b/.github/workflows/validate-translation-files.yml @@ -50,7 +50,7 @@ jobs: :warning: There are errors in the translation files: ``` - ${{ steps.validate_translation_files.outputs.VALIDATION_ERRORS }} + ${{ steps.validate_translation_files.outputs.VALIDATION_ERRORS || 'No errors were reported.' }} ``` This comment has been posted by the `validate-translation-files.yml` GitHub Actions workflow. diff --git a/Makefile b/Makefile index 4373b93c99d..ebd1aa73ec9 100644 --- a/Makefile +++ b/Makefile @@ -40,11 +40,6 @@ test: ## Run scripts tests validate_translation_files: ## Run basic validation to ensure translation files are valid python scripts/validate_translation_files.py - -validate_translation_files_mark_fuzzy: ## Run basic validation and mark invalid entries as fuzzy - python scripts/validate_translation_files.py --mark-fuzzy - - sync_translations: ## Syncs from the old projects to the new openedx-translations project python scripts/sync_translations.py $(SYNC_ARGS) diff --git a/scripts/tests/test_validate_translation_files.py b/scripts/tests/test_validate_translation_files.py index d425fedb1f2..233f6ab9e9e 100644 --- a/scripts/tests/test_validate_translation_files.py +++ b/scripts/tests/test_validate_translation_files.py @@ -4,7 +4,6 @@ import os.path import re -import shutil from ..validate_translation_files import ( get_translation_files, @@ -47,7 +46,6 @@ def test_main_on_invalid_files(capsys): assert 'en/LC_MESSAGES/django.po' not in out, 'Source file should not be validated' assert re.match(r'INVALID: .*hi/LC_MESSAGES/django.po', err) - assert 'Errors:' in err assert '\'msgstr\' is not a valid Python brace format string, unlike \'msgid\'' in err assert 'FAILURE: Some translations are invalid.' in err @@ -65,51 +63,5 @@ def test_main_on_valid_files(capsys): assert 'VALID:' in out, 'Valid files should be printed in stdout' assert 'ar/LC_MESSAGES/django.po' in out, 'Arabic translation file is valid' assert 'SUCCESS: All translation files are valid.' in out - assert 'Errors:' not in out, 'There should be no errors' assert not err.strip(), 'The stderr should be empty' assert exit_code == 0, 'Should succeed due in validating the ar/LC_MESSAGES/django.po file' - - -def test_main_on_valid_files_with_mark_fuzzy(capsys, tmp_path): - """ - Test the `main` function with the --mark-fuzzy option. - """ - original_mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir') - mock_translations_dir = tmp_path / 'translations' - shutil.copytree(original_mock_translations_dir, mock_translations_dir) - - exit_code = validate_translation_files(translations_dir=mock_translations_dir, mark_fuzzy=True) - out, err = capsys.readouterr() - - assert 'VALID:' in out, 'Valid files should be printed in stdout' - assert re.match(r'FIXED: .*hi/LC_MESSAGES/django.po', err), 'Should print the FIXED files in stderr' - assert 'ar/LC_MESSAGES/django.po' in out, 'Arabic translation file is valid regardless' - assert 'NOTICE: Some translations were fixed.' in err, 'Should print NOTICE in stderr' - assert 'SUCCESS: All translation files are now valid.' in err, 'Should print SUCCESS in stderr' - assert 'Previous errors:' in err, 'Original errors should be printed' - assert 'New output:' in err, 'New output should be printed after the fix' - assert exit_code == 0, ( - 'Should succeed even though the in/LC_MESSAGES/django.po is invalid, because it was marked as fuzzy' - ) - - -def test_main_on_valid_files_with_mark_fuzzy_unfixable_files(capsys, tmp_path): - """ - Test the `main` function with the --mark-fuzzy option but the file is broken in an unknown way. - """ - original_mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir') - mock_translations_dir = tmp_path / 'translations' - shutil.copytree(original_mock_translations_dir, mock_translations_dir) - hi_language_file = mock_translations_dir / 'demo-xblock/conf/locale/hi/LC_MESSAGES/django.po' - - with open(hi_language_file, 'a') as f: - f.write('<<<>>>') - - exit_code = validate_translation_files(translations_dir=mock_translations_dir, mark_fuzzy=True) - out, err = capsys.readouterr() - - assert 'VALID:' in out, 'Valid files should be printed in stdout regardless' - assert re.match(r'INVALID: .*hi/LC_MESSAGES/django.po', err), 'Should print the Error files in stderr' - assert 'Previous errors:' in err, 'Original errors should be printed' - assert 'New errors:' in err, 'New errors should be printed after the fix' - assert exit_code == 1, 'Should fail due to unfixable hi/LC_MESSAGES/django.po file' diff --git a/scripts/validate_translation_files.py b/scripts/validate_translation_files.py index 7082b2738ee..0e51482d8a0 100644 --- a/scripts/validate_translation_files.py +++ b/scripts/validate_translation_files.py @@ -42,69 +42,13 @@ def validate_translation_file(po_file): stderr = completed_process.stderr.decode(encoding='utf-8', errors='replace') return { - 'po_file': po_file, 'valid': completed_process.returncode == 0, - 'output': f'{stdout}\n{stderr}'.strip(), - 'stdout': stdout, - 'stderr': stderr, + 'output': f'{stdout}\n{stderr}', } -def get_invalid_msgstr_lines(validation_stderr): - """ - Parse the error output of `msgfmt` and return line numbers of invalid msgstr lines. - """ - invalid_msgstr_lines = [] - for line in validation_stderr.splitlines(): - if '.po:' in line: - _file_name, line_number, _rest = line.split(':', 2) - invalid_msgstr_lines.append(int(line_number)) - return invalid_msgstr_lines - - -def get_invalid_msgid_lines(po_file, invalid_msgstr_lines): - """ - Get the line numbers of invalid msgid lines by their msgstr line numbers. - """ - with open(po_file, mode='r', encoding='utf-8') as f: - pofile_lines = f.readlines() - - invalid_msgid_lines = [] - last_msgid_line = None - for line_number, line_text in enumerate(pofile_lines, start=1): - if line_text.startswith('msgid'): - last_msgid_line = line_number - - if line_text.startswith('msgstr') and line_number in invalid_msgstr_lines: - invalid_msgid_lines.append(last_msgid_line) - - return invalid_msgid_lines - - -def mark_invalid_entries_as_fuzzy(validation_result): - """ - Mark invalid entries as fuzzy. - """ - # Get line numbers of invalid msgstr lines - invalid_msgstr_lines = get_invalid_msgstr_lines(validation_result['stderr']) - invalid_msgid_lines = get_invalid_msgid_lines( - validation_result['po_file'], - invalid_msgstr_lines, - ) - - with open(validation_result['po_file'], mode='r', encoding='utf-8') as f: - pofile_lines = f.readlines() - - with open(validation_result['po_file'], mode='w', encoding='utf-8') as f: - for line_number, line_text in enumerate(pofile_lines, start=1): - if line_number in invalid_msgid_lines: - f.write('#, fuzzy\n') - f.write(line_text) - - def validate_translation_files( translations_dir='translations', - mark_fuzzy=False, ): """ Run GNU gettext `msgfmt` and print errors to stderr. @@ -115,79 +59,44 @@ def validate_translation_files( return 1 for invalid translations. """ translations_valid = True - translations_fixed = False - stderr_lines = [] + invalid_lines = [] po_files = get_translation_files(translations_dir) for po_file in po_files: - first_attempt = validate_translation_file(po_file) + result = validate_translation_file(po_file) - if first_attempt['valid']: + if result['valid']: print('VALID: ' + po_file) - print(first_attempt['output'], '\n' * 2) + print(result['output'], '\n' * 2) else: - if mark_fuzzy: - mark_invalid_entries_as_fuzzy(first_attempt) - - second_attempt = validate_translation_file(po_file) - - if second_attempt['valid']: - translations_fixed = True - stderr_lines.append('FIXED: ' + po_file) - stderr_lines.append('This file was invalid, but it was fixed by`make validate_translation_files`.') - stderr_lines.append('Previous errors:') - stderr_lines.append(first_attempt['output'] + '\n') - stderr_lines.append('New output:') - stderr_lines.append(second_attempt['output'] + '\n' * 2) - else: - stderr_lines.append('INVALID: ' + po_file) - stderr_lines.append('Attempted to fix the file, but it is still invalid.') - - if mark_fuzzy: - stderr_lines.append('Previous errors:') - else: - stderr_lines.append('Errors:') - - stderr_lines.append(first_attempt['output'] + '\n') - - if mark_fuzzy: - stderr_lines.append('New errors:') - stderr_lines.append(second_attempt['output'] + '\n' * 2) - translations_valid = False + invalid_lines.append('INVALID: ' + po_file) + invalid_lines.append(result['output'] + '\n' * 2) + translations_valid = False # Print validation errors in the bottom for easy reading - print('\n'.join(stderr_lines), file=sys.stderr) + print('\n'.join(invalid_lines), file=sys.stderr) if translations_valid: + print('-----------------------------------------') + print('SUCCESS: All translation files are valid.') + print('-----------------------------------------') exit_code = 0 - - if translations_fixed: - print('---------------------------------------------', file=sys.stderr) - print('NOTICE: Some translations were fixed.', file=sys.stderr) - print('SUCCESS: All translation files are now valid.', file=sys.stderr) - print('---------------------------------------------', file=sys.stderr) - else: - print('-----------------------------------------') - print('SUCCESS: All translation files are valid.') - print('-----------------------------------------') else: - exit_code = 1 print('---------------------------------------', file=sys.stderr) print('FAILURE: Some translations are invalid.', file=sys.stderr) print('---------------------------------------', file=sys.stderr) + exit_code = 1 return exit_code def main(): # pragma: no cover parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('--mark-fuzzy', dest='mark_fuzzy', action='store_true', default=False) parser.add_argument('--dir', action='store', type=str, default='translations') args = parser.parse_args() sys.exit(validate_translation_files( translations_dir=args.dir, - mark_fuzzy=args.mark_fuzzy, ))