From 39975dbb7750984945f933b8266a4cbd277ef04e Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Sun, 19 Nov 2023 17:04:30 +0300 Subject: [PATCH] fix: avoid overriding manual edits for newer entries --- scripts/sync_translations.py | 70 ++++++-- scripts/tests/test_sync_translations.py | 220 +++++++++++++++++++++++- 2 files changed, 276 insertions(+), 14 deletions(-) diff --git a/scripts/sync_translations.py b/scripts/sync_translations.py index f8c29a4e4fd..f427d812c98 100644 --- a/scripts/sync_translations.py +++ b/scripts/sync_translations.py @@ -30,6 +30,19 @@ ORGANIZATION_SLUG = 'open-edx' +def parse_tx_date(date_str): + """ + Parse a date string coming from Transifex into a datetime object. + """ + if date_str: + if date_str.endswith('Z'): + date_str = date_str.replace('Z', '+00:00') + + return datetime.fromisoformat(date_str) + + return None + + class Command: workflow_file_path = '.github/workflows/sync-translations.yml' @@ -118,25 +131,56 @@ def sync_translations(self, language_code, old_resource, new_resource): Sync specific language translations into the new Transifex resource. """ print(' syncing', language_code, '...') - old_translations = { + translations_from_old_project = { self.get_translation_id(translation): translation for translation in self.get_translations(language_code=language_code, resource=old_resource) } - for new_translation in self.get_translations(language_code=language_code, resource=new_resource): - translation_id = self.get_translation_id(new_translation) - if old_translation := old_translations.get(translation_id): - updates = {} - for attr in ['reviewed', 'proofread', 'strings']: - if old_attr_value := getattr(old_translation, attr, None): - if old_attr_value != getattr(new_translation, attr, None): - updates[attr] = old_attr_value + for current_translation in self.get_translations(language_code=language_code, resource=new_resource): + translation_id = self.get_translation_id(current_translation) + if translation_from_old_project := translations_from_old_project.get(translation_id): + self.sync_translation_entry( + translation_from_old_project=translation_from_old_project, + current_translation=current_translation, + ) - if updates: - print(translation_id, updates) + def sync_translation_entry(self, translation_from_old_project, current_translation): + """ + Sync a single translation entry from the old project to the new one. - if not self.is_dry_run(): - new_translation.save(**updates) + Return: + str: status code + - updated: if the entry was updated + - skipped: if the entry was skipped + - updated-dry-run: if the entry was updated in dry-run mode + """ + translation_id = self.get_translation_id(current_translation) + + updates = {} + for attr in ['reviewed', 'proofread', 'strings']: + if old_attr_value := getattr(translation_from_old_project, attr, None): + if old_attr_value != getattr(current_translation, attr, None): + updates[attr] = old_attr_value + + # Avoid overwriting more recent translations in the open-edx/openedx-translations project + newer_translation_found = False + old_project_translation_time = parse_tx_date(translation_from_old_project.datetime_translated) + new_project_translation_time = parse_tx_date(current_translation.datetime_translated) + + if old_project_translation_time and new_project_translation_time: + newer_translation_found = new_project_translation_time > old_project_translation_time + + if updates: + if newer_translation_found: + print(translation_id, updates, '[Skipped: new project translation is more recent]') + return 'skipped' + else: + print(translation_id, updates, '[Dry run]' if self.is_dry_run() else '') + if self.is_dry_run(): + return 'updated-dry-run' + else: + current_translation.save(**updates) + return 'updated' def sync_tags(self, old_resource, new_resource): """ diff --git a/scripts/tests/test_sync_translations.py b/scripts/tests/test_sync_translations.py index 3dfd7ccf3d9..199c0e3fd2c 100644 --- a/scripts/tests/test_sync_translations.py +++ b/scripts/tests/test_sync_translations.py @@ -14,7 +14,11 @@ from transifex.api.jsonapi.auth import BearerAuthentication from . import response_data -from ..sync_translations import Command, ORGANIZATION_SLUG +from ..sync_translations import ( + Command, + ORGANIZATION_SLUG, + parse_tx_date, +) HOST = transifex_api.HOST @@ -34,6 +38,70 @@ def sync_command(**kwargs): return result +@dataclass +class ResourceStringMock: + """ + String entry in Transifex. + + Mock class for the transifex.api.ResourceString class. + """ + key: str + context: str = '' + + +@dataclass +class ResourceTranslationMock: + """ + Translation for an entry in Transifex. + + Mock class for the transifex.api.ResourceTranslation class. + """ + resource_string: ResourceStringMock + strings: dict + reviewed: bool + proofread: bool + datetime_translated: str + + _updates: dict = None # Last updates applied via `save()` + + def save(self, **updates): + """ + Mock ResourceTranslation.save() method. + """ + self._updates = updates + + @property + def updates(self): + """ + Return the last updates applied via `save()`. + """ + return self._updates + + @classmethod + def factory( + cls, + key='key', + context='', + translation: Union[str, None] = 'dummy translation', + **kwargs + ): + mock_kwargs = dict( + resource_string=ResourceStringMock( + key=key, + context=context + ), + strings={ + key: translation, + }, + reviewed=False, + proofread=False, + datetime_translated='2021-01-01T00:00:00Z', + ) + + mock_kwargs.update(kwargs) + return cls(**mock_kwargs) + + @responses.activate def test_get_transifex_organization_projects(): """ @@ -96,3 +164,153 @@ def test_get_translations(): items = list(data) assert len(items) == 1 assert items[0].id == response_data.RESPONSE_GET_LANGUAGE['data'][0]['id'] + + +def test_translations_entry_update_empty_translation(): + """ + Test updating an entry from old project where `current_translation` is empty. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated='2023-01-01T00:00:00Z', + ) + + # Current translation is empty + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=None, + datetime_translated=None, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated' + assert current_translation.updates == { + 'strings': { + 'test_key': 'old translation' + }, + 'reviewed': True, + } + + +@pytest.mark.parametrize( + 'old_project_date, current_translation_date, new_translation_str', + [ + # As long as the current translation is _not_ more recent, it should be updated + (None, '2023-01-01T00:00:00Z', None), + (None, '2023-01-01T00:00:00Z', 'some translation'), + ('2023-01-01T00:00:00Z', '2023-01-01T00:00:00Z', 'some translation'), # Same date + ('2023-01-01T00:00:00Z', '2021-01-01T00:00:00Z', 'some translation'), # New project has newer date + ('2023-01-01T00:00:00Z', None, 'some translation'), + ('2023-01-01T00:00:00Z', None, None), + ] +) +def test_translations_entry_update_translation(old_project_date, current_translation_date, new_translation_str): + """ + Test updating an entry from old project where `current_translation` is has outdated translation. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated=old_project_date, + ) + + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=new_translation_str, + datetime_translated=current_translation_date, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated' + assert current_translation.updates == { + 'strings': { + 'test_key': 'old translation' + }, + 'reviewed': True, + } + + +def test_translations_entry_more_recent_translation(): + """ + Verify that the more recent translations in the open-edx/openedx-translations project are not overridden. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='one translation', + reviewed=True, + datetime_translated='2019-01-01T00:00:00Z', + ) + + # Current translation is empty + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation='more recent translation', + datetime_translated='2023-01-01T00:00:00Z', + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'skipped' + assert not current_translation.updates, 'save() should not be called' + + +def test_translations_entry_dry_run(): + """ + Verify that --dry-run option skips the save() call. + """ + command = sync_command(dry_run=True) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated='2023-01-01T00:00:00Z', + ) + + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=None, + datetime_translated=None, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated-dry-run' + assert not current_translation.updates, 'save() should not be called in --dry-run mode' + + +@pytest.mark.parametrize( + "date_str, parse_result, test_message", + [ + (None, None, 'None cannot be parsed'), + ('2023-01-01T00:00:00Z', datetime(2023, 1, 1, 0, 0, tzinfo=timezone.utc), + 'Z suffix is replaced with the explict "+00:00" timezone'), + ('2023-01-01T00:00:00', datetime(2023, 1, 1, 0, 0), + 'If there is no Z suffix, no timezone is added'), + ] +) +def test_parse_tx_date(date_str, parse_result, test_message): + """ + Tests for parse_tx_date() helper function. + """ + assert parse_tx_date(date_str) == parse_result, test_message +