From 3e3ef4fb82f75f42b6481c81aa4cd86ac3686862 Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Sun, 19 Nov 2023 16:56:42 +0300 Subject: [PATCH 1/4] feat: add edx-platform to the sync, removing a duplicate entry --- .github/workflows/sync-translations.yml | 54 +++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/.github/workflows/sync-translations.yml b/.github/workflows/sync-translations.yml index 928574124a4..7c6fdd3ced7 100644 --- a/.github/workflows/sync-translations.yml +++ b/.github/workflows/sync-translations.yml @@ -44,10 +44,6 @@ jobs: old_slug: course_discovery old_project_slug: edx-platform - - new_slug: course-discovery - old_slug: course_discovery - old_project_slug: edx-platform - - new_slug: course-discovery-js old_slug: course_discovery-js old_project_slug: edx-platform @@ -116,6 +112,56 @@ jobs: old_slug: paragon old_project_slug: edx-platform + # Start: edx-platform repo resources + # The edx-platform repo resources has been consolidated into a two resources + # - https://github.com/openedx/edx-platform/blob/master/docs/decisions/0018-standarize-django-po-files.rst + + - new_slug: edx-platform + old_slug: django-partial + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: django-studio + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: edx_proctoring_proctortrack + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: mako + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: mako-studio + old_project_slug: edx-platform + + - new_slug: edx-platform + old_slug: wiki + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: underscore + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: djangojs-studio + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: underscore-studio + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: djangojs-account-settings-view + old_project_slug: edx-platform + + - new_slug: edx-platform-js + old_slug: djangojs-partial + old_project_slug: edx-platform + + # End: edx-platform repo resources + steps: - uses: actions/checkout@v3 - name: setup python From 7805789a22f9dbd8142aff0dba7149b0c505146a Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Sun, 19 Nov 2023 16:57:55 +0300 Subject: [PATCH 2/4] fix: typo in github action matrix name --- scripts/sync_translations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync_translations.py b/scripts/sync_translations.py index bebf3d8c002..333a6f930c5 100644 --- a/scripts/sync_translations.py +++ b/scripts/sync_translations.py @@ -181,7 +181,7 @@ def run_from_workflow_yaml_file(self, workflow_configs): """ Run the script from a GitHub Actions migrate-from-transifex-old-project.yml workflow file. """ - pairs_list = workflow_configs['jobs']['migrate-translations']['strategy']['matrix']['batch'] + pairs_list = workflow_configs['jobs']['migrate-translations']['strategy']['matrix']['resource'] print('Verifying existence of resource pairs...') for pair in pairs_list: From 24673d29b745425962caffede280f8f2a3c80751 Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Sun, 19 Nov 2023 16:59:59 +0300 Subject: [PATCH 3/4] feat: use argparse --- scripts/sync_translations.py | 49 +++++++++++++++++++++---- scripts/tests/test_sync_translations.py | 20 +++++++--- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/scripts/sync_translations.py b/scripts/sync_translations.py index 333a6f930c5..f8c29a4e4fd 100644 --- a/scripts/sync_translations.py +++ b/scripts/sync_translations.py @@ -1,6 +1,25 @@ +""" +Sync translations from the deprecated Transifex projects into the new openedx-translations project. + + - Old projects links: + * edX Platform Core: https://app.transifex.com/open-edx/edx-platform/ + * XBlocks: https://app.transifex.com/open-edx/xblocks/ + + - New project link: + * https://app.transifex.com/open-edx/openedx-translations/ + + +Variable names meaning: + + - current_translation: translation in the new "open-edx/openedx-translations" project + - translation_from_old_project: translation in the old "open-edx/edx-platform" or "open-edx/xblocks" projects + +""" + +import argparse import configparser +from datetime import datetime import os -import sys from os.path import expanduser import yaml @@ -15,8 +34,9 @@ class Command: workflow_file_path = '.github/workflows/sync-translations.yml' - def __init__(self, argv, tx_api, environ): - self.argv = argv + def __init__(self, tx_api, dry_run, simulate_github_workflow, environ): + self.dry_run = dry_run + self.simulate_github_workflow = simulate_github_workflow self.tx_api = tx_api self.environ = environ @@ -24,13 +44,13 @@ def is_dry_run(self): """ Check if the script is running in dry-run mode. """ - return '--dry-run' in self.argv + return self.dry_run def is_simulated_github_actions(self): """ Check if the script is running in simulated GitHub Actions mode. """ - return '--simulate-github-workflow' in self.argv + return self.simulate_github_workflow def get_resource_url(self, resource, project_slug): return f'https://www.transifex.com/{ORGANIZATION_SLUG}/{project_slug}/{resource.slug}' @@ -213,6 +233,21 @@ def run(self): ) -if __name__ == '__main__': - command = Command(sys.argv, environ=os.environ, tx_api=transifex_api) +def main(): # pragma: no cover + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--simulate-github-workflow', action='store_true', + dest='simulate_github_workflow') + parser.add_argument('--dry-run', action='store_true', dest='dry_run') + argparse_args = parser.parse_args() + + command = Command( + tx_api=transifex_api, + environ=os.environ, + dry_run=argparse_args.dry_run, + simulate_github_workflow=argparse_args.simulate_github_workflow, + ) command.run() + + +if __name__ == '__main__': + main() # pragma: no cover diff --git a/scripts/tests/test_sync_translations.py b/scripts/tests/test_sync_translations.py index 5f752c4c943..3dfd7ccf3d9 100644 --- a/scripts/tests/test_sync_translations.py +++ b/scripts/tests/test_sync_translations.py @@ -1,9 +1,14 @@ """ Tests for sync_translations.py """ +from dataclasses import dataclass +from datetime import datetime, timezone import types +from typing import Union +import pytest import responses + from transifex.api import transifex_api, Project from transifex.api.jsonapi import Resource from transifex.api.jsonapi.auth import BearerAuthentication @@ -14,14 +19,17 @@ HOST = transifex_api.HOST -def sync_command(): - result = Command( - argv=[], - tx_api=transifex_api, - environ={ +def sync_command(**kwargs): + command_args = { + 'tx_api': transifex_api, + 'dry_run': True, + 'simulate_github_workflow': False, + 'environ': { 'TX_API_TOKEN': 'dummy-token' } - ) + } + command_args.update(kwargs) + result = Command(**command_args) result.tx_api.make_auth_headers = BearerAuthentication('dummy-token') return result From bd896e1cc1f147b683fd81e5f251d76ed042aaa8 Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Sun, 19 Nov 2023 17:04:30 +0300 Subject: [PATCH 4/4] fix: avoid overriding manual edits for newer entries --- scripts/sync_translations.py | 75 ++++++-- scripts/tests/test_sync_translations.py | 220 +++++++++++++++++++++++- 2 files changed, 281 insertions(+), 14 deletions(-) diff --git a/scripts/sync_translations.py b/scripts/sync_translations.py index f8c29a4e4fd..1bf06bc3166 100644 --- a/scripts/sync_translations.py +++ b/scripts/sync_translations.py @@ -30,6 +30,19 @@ ORGANIZATION_SLUG = 'open-edx' +def parse_tx_date(date_str): + """ + Parse a date string coming from Transifex into a datetime object. + """ + if date_str: + if date_str.endswith('Z'): + date_str = date_str.replace('Z', '+00:00') + + return datetime.fromisoformat(date_str) + + return None + + class Command: workflow_file_path = '.github/workflows/sync-translations.yml' @@ -118,25 +131,61 @@ def sync_translations(self, language_code, old_resource, new_resource): Sync specific language translations into the new Transifex resource. """ print(' syncing', language_code, '...') - old_translations = { + translations_from_old_project = { self.get_translation_id(translation): translation for translation in self.get_translations(language_code=language_code, resource=old_resource) } - for new_translation in self.get_translations(language_code=language_code, resource=new_resource): - translation_id = self.get_translation_id(new_translation) - if old_translation := old_translations.get(translation_id): - updates = {} - for attr in ['reviewed', 'proofread', 'strings']: - if old_attr_value := getattr(old_translation, attr, None): - if old_attr_value != getattr(new_translation, attr, None): - updates[attr] = old_attr_value + for current_translation in self.get_translations(language_code=language_code, resource=new_resource): + translation_id = self.get_translation_id(current_translation) + if translation_from_old_project := translations_from_old_project.get(translation_id): + self.sync_translation_entry( + translation_from_old_project=translation_from_old_project, + current_translation=current_translation, + ) - if updates: - print(translation_id, updates) + def sync_translation_entry(self, translation_from_old_project, current_translation): + """ + Sync a single translation entry from the old project to the new one. - if not self.is_dry_run(): - new_translation.save(**updates) + Return: + str: status code + - updated: if the entry was updated + - skipped: if the entry was skipped + - updated-dry-run: if the entry was updated in dry-run mode + """ + translation_id = self.get_translation_id(current_translation) + + updates = {} + for attr in ['reviewed', 'proofread', 'strings']: + if old_attr_value := getattr(translation_from_old_project, attr, None): + if old_attr_value != getattr(current_translation, attr, None): + updates[attr] = old_attr_value + + # Avoid overwriting more recent translations in the open-edx/openedx-translations project + newer_translation_found = False + old_project_translation_time = parse_tx_date(translation_from_old_project.datetime_translated) + current_translation_time = parse_tx_date(current_translation.datetime_translated) + + if old_project_translation_time and current_translation_time: + newer_translation_found = current_translation_time > old_project_translation_time + + if updates: + if newer_translation_found: + print(translation_id, updates, + ( + f'[Skipped: current translation "{current_translation_time}" ' + f'is more recent than "{old_project_translation_time}"]' + ) + ) + return 'skipped' + else: + print(translation_id, updates, '[Dry run]' if self.is_dry_run() else '') + if self.is_dry_run(): + return 'updated-dry-run' + else: + current_translation.save(**updates) + return 'updated' def sync_tags(self, old_resource, new_resource): """ diff --git a/scripts/tests/test_sync_translations.py b/scripts/tests/test_sync_translations.py index 3dfd7ccf3d9..199c0e3fd2c 100644 --- a/scripts/tests/test_sync_translations.py +++ b/scripts/tests/test_sync_translations.py @@ -14,7 +14,11 @@ from transifex.api.jsonapi.auth import BearerAuthentication from . import response_data -from ..sync_translations import Command, ORGANIZATION_SLUG +from ..sync_translations import ( + Command, + ORGANIZATION_SLUG, + parse_tx_date, +) HOST = transifex_api.HOST @@ -34,6 +38,70 @@ def sync_command(**kwargs): return result +@dataclass +class ResourceStringMock: + """ + String entry in Transifex. + + Mock class for the transifex.api.ResourceString class. + """ + key: str + context: str = '' + + +@dataclass +class ResourceTranslationMock: + """ + Translation for an entry in Transifex. + + Mock class for the transifex.api.ResourceTranslation class. + """ + resource_string: ResourceStringMock + strings: dict + reviewed: bool + proofread: bool + datetime_translated: str + + _updates: dict = None # Last updates applied via `save()` + + def save(self, **updates): + """ + Mock ResourceTranslation.save() method. + """ + self._updates = updates + + @property + def updates(self): + """ + Return the last updates applied via `save()`. + """ + return self._updates + + @classmethod + def factory( + cls, + key='key', + context='', + translation: Union[str, None] = 'dummy translation', + **kwargs + ): + mock_kwargs = dict( + resource_string=ResourceStringMock( + key=key, + context=context + ), + strings={ + key: translation, + }, + reviewed=False, + proofread=False, + datetime_translated='2021-01-01T00:00:00Z', + ) + + mock_kwargs.update(kwargs) + return cls(**mock_kwargs) + + @responses.activate def test_get_transifex_organization_projects(): """ @@ -96,3 +164,153 @@ def test_get_translations(): items = list(data) assert len(items) == 1 assert items[0].id == response_data.RESPONSE_GET_LANGUAGE['data'][0]['id'] + + +def test_translations_entry_update_empty_translation(): + """ + Test updating an entry from old project where `current_translation` is empty. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated='2023-01-01T00:00:00Z', + ) + + # Current translation is empty + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=None, + datetime_translated=None, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated' + assert current_translation.updates == { + 'strings': { + 'test_key': 'old translation' + }, + 'reviewed': True, + } + + +@pytest.mark.parametrize( + 'old_project_date, current_translation_date, new_translation_str', + [ + # As long as the current translation is _not_ more recent, it should be updated + (None, '2023-01-01T00:00:00Z', None), + (None, '2023-01-01T00:00:00Z', 'some translation'), + ('2023-01-01T00:00:00Z', '2023-01-01T00:00:00Z', 'some translation'), # Same date + ('2023-01-01T00:00:00Z', '2021-01-01T00:00:00Z', 'some translation'), # New project has newer date + ('2023-01-01T00:00:00Z', None, 'some translation'), + ('2023-01-01T00:00:00Z', None, None), + ] +) +def test_translations_entry_update_translation(old_project_date, current_translation_date, new_translation_str): + """ + Test updating an entry from old project where `current_translation` is has outdated translation. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated=old_project_date, + ) + + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=new_translation_str, + datetime_translated=current_translation_date, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated' + assert current_translation.updates == { + 'strings': { + 'test_key': 'old translation' + }, + 'reviewed': True, + } + + +def test_translations_entry_more_recent_translation(): + """ + Verify that the more recent translations in the open-edx/openedx-translations project are not overridden. + """ + command = sync_command(dry_run=False) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='one translation', + reviewed=True, + datetime_translated='2019-01-01T00:00:00Z', + ) + + # Current translation is empty + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation='more recent translation', + datetime_translated='2023-01-01T00:00:00Z', + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'skipped' + assert not current_translation.updates, 'save() should not be called' + + +def test_translations_entry_dry_run(): + """ + Verify that --dry-run option skips the save() call. + """ + command = sync_command(dry_run=True) + + translation_from_old_project = ResourceTranslationMock.factory( + key='test_key', + translation='old translation', + reviewed=True, + datetime_translated='2023-01-01T00:00:00Z', + ) + + current_translation = ResourceTranslationMock.factory( + key='test_key', + translation=None, + datetime_translated=None, + ) + + status = command.sync_translation_entry( + translation_from_old_project, current_translation + ) + + assert status == 'updated-dry-run' + assert not current_translation.updates, 'save() should not be called in --dry-run mode' + + +@pytest.mark.parametrize( + "date_str, parse_result, test_message", + [ + (None, None, 'None cannot be parsed'), + ('2023-01-01T00:00:00Z', datetime(2023, 1, 1, 0, 0, tzinfo=timezone.utc), + 'Z suffix is replaced with the explict "+00:00" timezone'), + ('2023-01-01T00:00:00', datetime(2023, 1, 1, 0, 0), + 'If there is no Z suffix, no timezone is added'), + ] +) +def test_parse_tx_date(date_str, parse_result, test_message): + """ + Tests for parse_tx_date() helper function. + """ + assert parse_tx_date(date_str) == parse_result, test_message +