From 4de48d889d009c5c5b08ef991b5283db64a5e2c3 Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Mon, 20 Nov 2023 15:59:15 +0300 Subject: [PATCH] fix: avoid overriding manual edits for newer entries (#2177) * feat: add edx-platform to the sync, removing a duplicate entry * fix: typo in github action matrix name * feat: use argparse * fix: avoid overriding manual edits for newer entries --- .github/workflows/sync-translations.yml | 54 +++++- scripts/sync_translations.py | 126 ++++++++++--- scripts/tests/test_sync_translations.py | 240 +++++++++++++++++++++++- 3 files changed, 388 insertions(+), 32 deletions(-) diff --git a/.github/workflows/sync-translations.yml b/.github/workflows/sync-translations.yml index 928574124a4..7c6fdd3ced7 100644 --- a/.github/workflows/sync-translations.yml +++ b/.github/workflows/sync-translations.yml @@ -44,10 +44,6 @@ jobs: old_slug: course_discovery old_project_slug: edx-platform - - new_slug: course-discovery - old_slug: course_discovery - old_project_slug: edx-platform - - new_slug: course-discovery-js old_slug: course_discovery-js old_project_slug: edx-platform @@ -116,6 +112,56 @@ jobs: old_slug: paragon old_project_slug: edx-platform + # Start: edx-platform repo resources + # The edx-platform repo resources has been consolidated into a two resources + # - https://github.com/openedx/edx-platform/blob/master/docs/decisions/0018-standarize-django-po-files.rst + + - new_slug: edx-platform + old_slug: django-partial + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: django-studio + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: edx_proctoring_proctortrack + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: mako + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: mako-studio + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: wiki + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: underscore + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: djangojs-studio + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: underscore-studio + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: djangojs-account-settings-view + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: djangojs-partial + old_project_slug: edx-platform + + # End: edx-platform repo resources + steps: - uses: actions/checkout@v3 - name: setup python diff --git a/scripts/sync_translations.py b/scripts/sync_translations.py index bebf3d8c002..1bf06bc3166 100644 --- a/scripts/sync_translations.py +++ b/scripts/sync_translations.py @@ -1,6 +1,25 @@ +""" +Sync translations from the deprecated Transifex projects into the new openedx-translations project. + + - Old projects links: + * edX Platform Core: https://app.transifex.com/open-edx/edx-platform/ + * XBlocks: https://app.transifex.com/open-edx/xblocks/ + + - New project link: + * https://app.transifex.com/open-edx/openedx-translations/ + + +Variable names meaning: + + - current_translation: translation in the new "open-edx/openedx-translations" project + - translation_from_old_project: translation in the old "open-edx/edx-platform" or "open-edx/xblocks" projects + +""" + +import argparse import configparser +from datetime import datetime import os -import sys from os.path import expanduser import yaml @@ -11,12 +30,26 @@ ORGANIZATION_SLUG = 'open-edx' +def parse_tx_date(date_str): + """ + Parse a date string coming from Transifex into a datetime object. + """ + if date_str: + if date_str.endswith('Z'): + date_str = date_str.replace('Z', '+00:00') + + return datetime.fromisoformat(date_str) + + return None + + class Command: workflow_file_path = '.github/workflows/sync-translations.yml' - def __init__(self, argv, tx_api, environ): - self.argv = argv + def __init__(self, tx_api, dry_run, simulate_github_workflow, environ): + self.dry_run = dry_run + self.simulate_github_workflow = simulate_github_workflow self.tx_api = tx_api self.environ = environ @@ -24,13 +57,13 @@ def is_dry_run(self): """ Check if the script is running in dry-run mode. """ - return '--dry-run' in self.argv + return self.dry_run def is_simulated_github_actions(self): """ Check if the script is running in simulated GitHub Actions mode. """ - return '--simulate-github-workflow' in self.argv + return self.simulate_github_workflow def get_resource_url(self, resource, project_slug): return f'https://www.transifex.com/{ORGANIZATION_SLUG}/{project_slug}/{resource.slug}' @@ -98,25 +131,61 @@ def sync_translations(self, language_code, old_resource, new_resource): Sync specific language translations into the new Transifex resource. """ print(' syncing', language_code, '...') - old_translations = { + translations_from_old_project = { self.get_translation_id(translation): translation for translation in self.get_translations(language_code=language_code, resource=old_resource) } - for new_translation in self.get_translations(language_code=language_code, resource=new_resource): - translation_id = self.get_translation_id(new_translation) - if old_translation := old_translations.get(translation_id): - updates = {} - for attr in ['reviewed', 'proofread', 'strings']: - if old_attr_value := getattr(old_translation, attr, None): - if old_attr_value != getattr(new_translation, attr, None): - updates[attr] = old_attr_value + for current_translation in self.get_translations(language_code=language_code, resource=new_resource): + translation_id = self.get_translation_id(current_translation) + if translation_from_old_project := translations_from_old_project.get(translation_id): + self.sync_translation_entry( + translation_from_old_project=translation_from_old_project, + current_translation=current_translation, + ) - if updates: - print(translation_id, updates) + def sync_translation_entry(self, translation_from_old_project, current_translation): + """ + Sync a single translation entry from the old project to the new one. - if not self.is_dry_run(): - new_translation.save(**updates) + Return: + str: status code + - updated: if the entry was updated + - skipped: if the entry was skipped + - updated-dry-run: if the entry was updated in dry-run mode + """ + translation_id = self.get_translation_id(current_translation) + + updates = {} + for attr in ['reviewed', 'proofread', 'strings']: + if old_attr_value := getattr(translation_from_old_project, attr, None): + if old_attr_value != getattr(current_translation, attr, None): + updates[attr] = old_attr_value + + # Avoid overwriting more recent translations in the open-edx/openedx-translations project + newer_translation_found = False + old_project_translation_time = parse_tx_date(translation_from_old_project.datetime_translated) + current_translation_time = parse_tx_date(current_translation.datetime_translated) + + if old_project_translation_time and current_translation_time: + newer_translation_found = current_translation_time > old_project_translation_time + + if updates: + if newer_translation_found: + print(translation_id, updates, + ( + f'[Skipped: current translation "{current_translation_time}" ' + f'is more recent than "{old_project_translation_time}"]' + ) + ) + return 'skipped' + else: + print(translation_id, updates, '[Dry run]' if self.is_dry_run() else '') + if self.is_dry_run(): + return 'updated-dry-run' + else: + current_translation.save(**updates) + return 'updated' def sync_tags(self, old_resource, new_resource): """ @@ -181,7 +250,7 @@ def run_from_workflow_yaml_file(self, workflow_configs): """ Run the script from a GitHub Actions migrate-from-transifex-old-project.yml workflow file. """ - pairs_list = workflow_configs['jobs']['migrate-translations']['strategy']['matrix']['batch'] + pairs_list = workflow_configs['jobs']['migrate-translations']['strategy']['matrix']['resource'] print('Verifying existence of resource pairs...') for pair in pairs_list: @@ -213,6 +282,21 @@ def run(self): ) -if __name__ == '__main__': - command = Command(sys.argv, environ=os.environ, tx_api=transifex_api) +def main(): # pragma: no cover + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--simulate-github-workflow', action='store_true', + dest='simulate_github_workflow') + parser.add_argument('--dry-run', action='store_true', dest='dry_run') + argparse_args = parser.parse_args() + + command = Command( + tx_api=transifex_api, + environ=os.environ, + dry_run=argparse_args.dry_run, + simulate_github_workflow=argparse_args.simulate_github_workflow, + ) command.run() + + +if __name__ == '__main__': + main() # pragma: no cover diff --git a/scripts/tests/test_sync_translations.py b/scripts/tests/test_sync_translations.py index 5f752c4c943..199c0e3fd2c 100644 --- a/scripts/tests/test_sync_translations.py +++ b/scripts/tests/test_sync_translations.py @@ -1,31 +1,107 @@ """ Tests for sync_translations.py """ +from dataclasses import dataclass +from datetime import datetime, timezone import types +from typing import Union +import pytest import responses + from transifex.api import transifex_api, Project from transifex.api.jsonapi import Resource from transifex.api.jsonapi.auth import BearerAuthentication from . import response_data -from ..sync_translations import Command, ORGANIZATION_SLUG +from ..sync_translations import ( + Command, + ORGANIZATION_SLUG, + parse_tx_date, +) HOST = transifex_api.HOST -def sync_command(): - result = Command( - argv=[], - tx_api=transifex_api, - environ={ +def sync_command(**kwargs): + command_args = { + 'tx_api': transifex_api, + 'dry_run': True, + 'simulate_github_workflow': False, + 'environ': { 'TX_API_TOKEN': 'dummy-token' } - ) + } + command_args.update(kwargs) + result = Command(**command_args) result.tx_api.make_auth_headers = BearerAuthentication('dummy-token') return result +@dataclass +class ResourceStringMock: + """ + String entry in Transifex. + + Mock class for the transifex.api.ResourceString class. + """ + key: str + context: str = '' + + +@dataclass +class ResourceTranslationMock: + """ + Translation for an entry in Transifex. + + Mock class for the transifex.api.ResourceTranslation class. + """ + resource_string: ResourceStringMock + strings: dict + reviewed: bool + proofread: bool + datetime_translated: str + + _updates: dict = None # Last updates applied via `save()` + + def save(self, **updates): + """ + Mock ResourceTranslation.save() method. + """ + self._updates = updates + + @property + def updates(self): + """ + Return the last updates applied via `save()`. + """ + return self._updates + + @classmethod + def factory( + cls, + key='key', + context='', + translation: Union[str, None] = 'dummy translation', + **kwargs + ): + mock_kwargs = dict( + resource_string=ResourceStringMock( + key=key, + context=context + ), + strings={ + key: translation, + }, + reviewed=False, + proofread=False, + datetime_translated='2021-01-01T00:00:00Z', + ) + + mock_kwargs.update(kwargs) + return cls(**mock_kwargs) + + @responses.activate def test_get_transifex_organization_projects(): """ @@ -88,3 +164,153 @@ def test_get_translations(): items = list(data) assert len(items) == 1 assert items[0].id == response_data.RESPONSE_GET_LANGUAGE['data'][0]['id'] + + +def test_translations_entry_update_empty_translation(): + """ + Test updating an entry from old project where `current_translation` is empty. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated='2023-01-01T00:00:00Z', + ) + + # Current translation is empty + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=None, + datetime_translated=None, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated' + assert current_translation.updates == { + 'strings': { + 'test_key': 'old translation' + }, + 'reviewed': True, + } + + +@pytest.mark.parametrize( + 'old_project_date, current_translation_date, new_translation_str', + [ + # As long as the current translation is _not_ more recent, it should be updated + (None, '2023-01-01T00:00:00Z', None), + (None, '2023-01-01T00:00:00Z', 'some translation'), + ('2023-01-01T00:00:00Z', '2023-01-01T00:00:00Z', 'some translation'), # Same date + ('2023-01-01T00:00:00Z', '2021-01-01T00:00:00Z', 'some translation'), # New project has newer date + ('2023-01-01T00:00:00Z', None, 'some translation'), + ('2023-01-01T00:00:00Z', None, None), + ] +) +def test_translations_entry_update_translation(old_project_date, current_translation_date, new_translation_str): + """ + Test updating an entry from old project where `current_translation` is has outdated translation. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated=old_project_date, + ) + + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=new_translation_str, + datetime_translated=current_translation_date, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated' + assert current_translation.updates == { + 'strings': { + 'test_key': 'old translation' + }, + 'reviewed': True, + } + + +def test_translations_entry_more_recent_translation(): + """ + Verify that the more recent translations in the open-edx/openedx-translations project are not overridden. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='one translation', + reviewed=True, + datetime_translated='2019-01-01T00:00:00Z', + ) + + # Current translation is empty + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation='more recent translation', + datetime_translated='2023-01-01T00:00:00Z', + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'skipped' + assert not current_translation.updates, 'save() should not be called' + + +def test_translations_entry_dry_run(): + """ + Verify that --dry-run option skips the save() call. + """ + command = sync_command(dry_run=True) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated='2023-01-01T00:00:00Z', + ) + + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=None, + datetime_translated=None, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated-dry-run' + assert not current_translation.updates, 'save() should not be called in --dry-run mode' + + +@pytest.mark.parametrize( + "date_str, parse_result, test_message", + [ + (None, None, 'None cannot be parsed'), + ('2023-01-01T00:00:00Z', datetime(2023, 1, 1, 0, 0, tzinfo=timezone.utc), + 'Z suffix is replaced with the explict "+00:00" timezone'), + ('2023-01-01T00:00:00', datetime(2023, 1, 1, 0, 0), + 'If there is no Z suffix, no timezone is added'), + ] +) +def test_parse_tx_date(date_str, parse_result, test_message): + """ + Tests for parse_tx_date() helper function. + """ + assert parse_tx_date(date_str) == parse_result, test_message +