From e64f0cc401e77dd3dd5332f881613d1f17b9d905 Mon Sep 17 00:00:00 2001 From: Omar Al-Ithawi Date: Sun, 20 Aug 2023 15:37:19 +0300 Subject: [PATCH] feat: sync from old transifex project This is a on-demand GitHub Actions workflow which will sync traslations and their status from the `open-edx/edx-platform` Transifex project (old) into `open-edx/openedx-translations` OEP-58 project (new). Refs: FC-0012 OEP-58 --- .github/workflows/sync-translations.yml | 134 +++++++++++++++ Makefile | 19 ++- requirements/pip.txt | 2 +- requirements/pip_tools.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 12 +- requirements/transifex.txt | 4 +- requirements/translations.in | 1 + requirements/translations.txt | 8 +- scripts/sync_translations.py | 217 ++++++++++++++++++++++++ scripts/tests/response_data.py | 80 +++++++++ scripts/tests/test_sync_translations.py | 92 ++++++++++ 12 files changed, 562 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/sync-translations.yml create mode 100644 scripts/sync_translations.py create mode 100644 scripts/tests/response_data.py create mode 100644 scripts/tests/test_sync_translations.py diff --git a/.github/workflows/sync-translations.yml b/.github/workflows/sync-translations.yml new file mode 100644 index 00000000000..9535341d0ba --- /dev/null +++ b/.github/workflows/sync-translations.yml @@ -0,0 +1,134 @@ +name: Migrate translations from the old Transifex project + +on: + workflow_dispatch: + + +jobs: + migrate-translations: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + resource: + - new_slug: edx-ora2 + old_slug: openassessment + old_project_slug: edx-platform + + - new_slug: edx-ora2-js + old_slug: openassessment-js + old_project_slug: edx-platform + + - new_slug: edx-proctoring + old_slug: edx-proctoring + old_project_slug: edx-platform + + - new_slug: studio-frontend + old_slug: studio-frontend + old_project_slug: edx-platform + + - new_slug: donexblock + old_slug: xblock-done + old_project_slug: xblocks + + - new_slug: xblock-drag-and-drop-v2 + old_slug: drag-and-drop-v2 + old_project_slug: xblocks + + - new_slug: xblock-free-text-response + old_slug: xblock-free-text-response + old_project_slug: xblocks + + - new_slug: course-discovery + old_slug: course_discovery + old_project_slug: edx-platform + + - new_slug: course-discovery + old_slug: course_discovery + old_project_slug: edx-platform + + - new_slug: course-discovery-js + old_slug: course_discovery-js + old_project_slug: edx-platform + + - new_slug: credentials-js + old_slug: credentials-js + old_project_slug: edx-platform + + - new_slug: credentials + old_slug: credentials + old_project_slug: edx-platform + + - new_slug: frontend-app-account + old_slug: frontend-app-account + old_project_slug: edx-platform + + - new_slug: frontend-app-authn + old_slug: frontend-app-authn + old_project_slug: edx-platform + + - new_slug: frontend-app-course-authoring + old_slug: frontend-app-course-authoring + old_project_slug: edx-platform + + - new_slug: frontend-app-discussions + old_slug: frontend-app-discussions + old_project_slug: edx-platform + + - new_slug: frontend-app-ecommerce + old_slug: frontend-app-ecommerce + old_project_slug: edx-platform + + - new_slug: frontend-app-gradebook + old_slug: frontend-app-gradebook + old_project_slug: edx-platform + + - new_slug: frontend-app-learner-dashboard + old_slug: frontend-app-learner-dashboard + old_project_slug: edx-platform + + - new_slug: frontend-app-learner-record + old_slug: frontend-app-learner-record + old_project_slug: edx-platform + + - new_slug: frontend-app-learning + old_slug: frontend-app-learning + old_project_slug: edx-platform + + - new_slug: frontend-app-profile + old_slug: frontend-app-profile + old_project_slug: edx-platform + + - new_slug: frontend-app-program-console + old_slug: frontend-app-program-manager + old_project_slug: edx-platform + + - new_slug: frontend-component-footer + old_slug: frontend-component-footer-edx + old_project_slug: edx-platform + + - new_slug: frontend-component-header + old_slug: frontend-component-header + old_project_slug: edx-platform + + - new_slug: paragon + old_slug: paragon + old_project_slug: edx-platform + + steps: + - uses: actions/checkout@v3 + - name: setup python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install Python dependencies + run: make sync_requirements + + - name: Sync + env: + # `TX_LANGUAGES` list of languages is set in the `Makefile` + TX_NEW_SLUG: ${{ matrix.resource.new_slug }} + TX_OLD_SLUG: ${{ matrix.resource.old_slug }} + TX_OLD_PROJECT_SLUG: ${{ matrix.resource.old_project_slug }} + TX_API_TOKEN: ${{ secrets.TRANSIFEX_API_TOKEN }} + run: make sync_translations diff --git a/Makefile b/Makefile index f5c0b5c01cf..83a2b4bbe0d 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,11 @@ -.PHONY: piptools upgrade fix_transifex_resource_names transifex_resources_requirements validate_translation_files +.PHONY: piptools upgrade fix_transifex_resource_names transifex_resources_requirements validate_translation_files \ +sync_translations sync_translations_github_workflow + + +# Default languages for the sync_translations.py file +# Eventually, this should be all the languages Open edX supports. We're starting with a subset for now. +export TX_LANGUAGES := ar,de,fr_CA + piptools: pip install -q -r requirements/pip_tools.txt @@ -37,3 +44,13 @@ validate_translation_files: ## Run basic validation to ensure files are compila @echo '-----------------------------------------' @echo 'Congratulations! Translation files are valid.' @echo '-----------------------------------------' + +sync_requirements: ## install translations.txt requirements + pip install -q -r requirements/translations.txt + +sync_translations: ## Syncs from the old projects to the new openedx-translations project + python scripts/sync_translations.py $(SYNC_ARGS) + +sync_translations_github_workflow: ## Run with parameters from .github/workflows/sync-translations.yml + make SYNC_ARGS="--simulate-github-workflow $(SYNC_ARGS)" sync_translations + diff --git a/requirements/pip.txt b/requirements/pip.txt index da0741c57b7..3e7d8f4a813 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -10,5 +10,5 @@ wheel==0.41.2 # The following packages are considered to be unsafe in a requirements file: pip==23.2.1 # via -r requirements/pip.in -setuptools==68.2.0 +setuptools==68.2.2 # via -r requirements/pip.in diff --git a/requirements/pip_tools.txt b/requirements/pip_tools.txt index 3a6a0e2404e..05d07313e09 100644 --- a/requirements/pip_tools.txt +++ b/requirements/pip_tools.txt @@ -23,7 +23,7 @@ tomli==2.0.1 # pyproject-hooks wheel==0.41.2 # via pip-tools -zipp==3.16.2 +zipp==3.17.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/test.in b/requirements/test.in index 35350a6e598..4b5c70cc174 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -3,4 +3,4 @@ pytest pytest-cov - +responses diff --git a/requirements/test.txt b/requirements/test.txt index 10d0cb9ec89..833f501f9ca 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -32,7 +32,7 @@ gitdb==4.0.10 # via # -r requirements/transifex.txt # gitpython -gitpython==3.1.35 +gitpython==3.1.36 # via # -r requirements/transifex.txt # transifex-client @@ -68,6 +68,8 @@ pytz==2023.3.post1 # via # -r requirements/transifex.txt # transifex-python +pyyaml==6.0.1 + # via responses regex==2023.8.8 # via # -r requirements/transifex.txt @@ -75,14 +77,17 @@ regex==2023.8.8 requests==2.31.0 # via # -r requirements/transifex.txt + # responses # transifex-client # transifex-python +responses==0.23.3 + # via -r requirements/test.in six==1.16.0 # via # -r requirements/transifex.txt # asttokens # transifex-client -smmap==5.0.0 +smmap==5.0.1 # via # -r requirements/transifex.txt # gitdb @@ -102,8 +107,11 @@ transifex-client==0.14.4 # via -r requirements/transifex.txt transifex-python==3.4.0 # via -r requirements/transifex.txt +types-pyyaml==6.0.12.11 + # via responses urllib3==1.26.16 # via # -r requirements/transifex.txt # requests + # responses # transifex-client diff --git a/requirements/transifex.txt b/requirements/transifex.txt index fc27438235b..ac0d79e029c 100644 --- a/requirements/transifex.txt +++ b/requirements/transifex.txt @@ -16,7 +16,7 @@ future==0.18.3 # via pyseeyou gitdb==4.0.10 # via gitpython -gitpython==3.1.35 +gitpython==3.1.36 # via transifex-client idna==3.4 # via requests @@ -39,7 +39,7 @@ six==1.16.0 # via # asttokens # transifex-client -smmap==5.0.0 +smmap==5.0.1 # via gitdb text-unidecode==1.3 # via python-slugify diff --git a/requirements/translations.in b/requirements/translations.in index 89d52cc7813..2e3d711479a 100644 --- a/requirements/translations.in +++ b/requirements/translations.in @@ -2,3 +2,4 @@ # i18n edx-i18n-tools +pyyaml diff --git a/requirements/translations.txt b/requirements/translations.txt index 52803c8c9b6..387620b5674 100644 --- a/requirements/translations.txt +++ b/requirements/translations.txt @@ -10,7 +10,7 @@ django==3.2.21 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # edx-i18n-tools -edx-i18n-tools==1.1.0 +edx-i18n-tools==1.2.0 # via -r requirements/translations.in path==16.7.1 # via edx-i18n-tools @@ -19,8 +19,10 @@ polib==1.2.0 pytz==2023.3.post1 # via django pyyaml==6.0.1 - # via edx-i18n-tools + # via + # -r requirements/translations.in + # edx-i18n-tools sqlparse==0.4.4 # via django -typing-extensions==4.7.1 +typing-extensions==4.8.0 # via asgiref diff --git a/scripts/sync_translations.py b/scripts/sync_translations.py new file mode 100644 index 00000000000..aedcfaabd6d --- /dev/null +++ b/scripts/sync_translations.py @@ -0,0 +1,217 @@ +import configparser +import os +import sys +from os.path import expanduser +import yaml + +from transifex.api import transifex_api +from transifex.api.jsonapi import exceptions + +NEW_PROJECT_SLUG = 'openedx-translations' +ORGANIZATION_SLUG = 'open-edx' + + +class Command: + + workflow_file_path = '.github/workflows/sync-translations.yml' + + def __init__(self, argv, tx_api, environ): + self.argv = argv + self.tx_api = tx_api + self.environ = environ + + def is_dry_run(self): + """ + Check if the script is running in dry-run mode. + """ + return '--dry-run' in self.argv + + def is_simulated_github_actions(self): + """ + Check if the script is running in simulated GitHub Actions mode. + """ + return '--simulate-github-workflow' in self.argv + + def get_resource_url(self, resource, project_slug): + return f'https://www.transifex.com/{ORGANIZATION_SLUG}/{project_slug}/{resource.slug}' + + def get_transifex_organization_projects(self): + """ + Get openedx-translations project from Transifex. + """ + tx_api_token = self.environ.get('TX_API_TOKEN') + if not tx_api_token: + config = configparser.ConfigParser() + config.read(expanduser('~/.transifexrc')) + tx_api_token = config['https://www.transifex.com']['password'] + + if not tx_api_token: + raise Exception( + 'Error: No auth token found. ' + 'Set transifex API token via TX_API_TOKEN environment variable or via the ~/.transifexrc file.' + ) + + self.tx_api.setup(auth=tx_api_token) + return self.tx_api.Organization.get(slug=ORGANIZATION_SLUG).fetch('projects') + + def get_resources_pair(self, new_slug, old_slug, old_project_slug): + """ + Load the old and new Transifex resources pair. + """ + projects = self.get_transifex_organization_projects() + new_project = projects.get(slug=NEW_PROJECT_SLUG) + + new_resource_id = f'o:{ORGANIZATION_SLUG}:p:{new_project.slug}:r:{new_slug}' + print(f'new resource id: {new_resource_id}') + try: + new_resource = self.tx_api.Resource.get(id=new_resource_id) + except exceptions.JsonApiException as error: + print(f'Error: New resource error: {new_resource_id}. Error: {error}') + raise + + old_resource_id = f'o:{ORGANIZATION_SLUG}:p:{old_project_slug}:r:{old_slug}' + print(f'old resource id: {old_resource_id}') + try: + old_resource = self.tx_api.Resource.get(id=old_resource_id) + except exceptions.JsonApiException as error: + print(f'Error: Old resource error: {new_resource_id}. Error: {error}') + raise + + return { + 'old_resource': old_resource, + 'new_resource': new_resource, + } + + def get_translations(self, language_code, resource): + """ + Get a list of translations for a given language and resource. + """ + language = self.tx_api.Language.get(code=language_code) + translations = self.tx_api.ResourceTranslation. \ + filter(resource=resource, language=language). \ + include('resource_string') + + return translations.all() + + def sync_translations(self, language_code, old_resource, new_resource): + """ + Sync specific language translations into the new Transifex resource. + """ + print(' syncing', language_code, '...') + old_translations = { + self.get_translation_id(translation): translation + for translation in self.get_translations(language_code=language_code, resource=old_resource) + } + + for new_translation in self.get_translations(language_code=language_code, resource=new_resource): + translation_id = self.get_translation_id(new_translation) + if old_translation := old_translations.get(translation_id): + updates = {} + for attr in ['reviewed', 'proofread', 'strings']: + if old_attr_value := getattr(old_translation, attr, None): + if old_attr_value != getattr(new_translation, attr, None): + updates[attr] = old_attr_value + + if updates: + print(translation_id, updates) + + if not self.is_dry_run(): + new_translation.save(**updates) + + def sync_tags(self, old_resource, new_resource): + """ + Sync tags from the old Transifex resource into the new Transifex resource. This process is language independent. + """ + old_resource_str = self.tx_api.ResourceString.filter(resource=old_resource) + new_resource_str = self.tx_api.ResourceString.filter(resource=new_resource) + + old_quick_lookup = { + item['attributes']['string_hash']: item['attributes']['tags'] for item in old_resource_str.to_dict()['data'] + } + + for new_info in new_resource_str.all(): + old_tags = old_quick_lookup.get(new_info.string_hash) + new_tags = new_info.tags + + if old_tags is None: # in case of new changes are not synced yet + continue + if len(new_tags) == 0 and len(old_tags) == 0: # nothing to compare + continue + + if len(new_tags) != len(old_tags) or set(new_tags) != set(old_tags): + print(f' - found tag difference for {new_info.string_hash}. overwriting: {new_tags} with {old_tags}') + + if not self.is_dry_run(): + new_info.save(tags=old_tags) + + def get_translation_id(self, translation): + """ + Build a unique identifier for a translation entry. + """ + return f'context:{translation.resource_string.context}:key:{translation.resource_string.key}' + + def get_languages(self): + """ + Get a list of languages to sync translations for. + """ + return self.environ['TX_LANGUAGES'].split(',') + + def sync_pair_into_new_resource(self, new_slug, old_slug, old_project_slug): + """ + Sync translations from both the edx-platform and XBlock projects into the new openedx-translations project. + """ + languages = self.get_languages() + resource_pair = self.get_resources_pair(new_slug, old_slug, old_project_slug) + + print(f'Syncing {resource_pair["new_resource"].name} from {resource_pair["old_resource"].name}...') + print(f'Syncing: {languages}') + print(f' - from: {self.get_resource_url(resource_pair["old_resource"], old_project_slug)}') + print(f' - to: {self.get_resource_url(resource_pair["new_resource"], NEW_PROJECT_SLUG)}') + + for lang_code in languages: + self.sync_translations(language_code=lang_code, **resource_pair) + + print('Syncing tags...') + self.sync_tags(**resource_pair) + + print('-' * 80, '\n') + + def run_from_workflow_yaml_file(self, workflow_configs): + """ + Run the script from a GitHub Actions migrate-from-transifex-old-project.yml workflow file. + """ + pairs_list = workflow_configs['jobs']['migrate-translations']['strategy']['matrix']['batch'] + + print('Verifying existence of resource pairs...') + for pair in pairs_list: + self.get_resources_pair( + new_slug=pair['new_slug'], + old_slug=pair['old_slug'], + old_project_slug=pair['old_project_slug'], + ) + print('\n', '-' * 80, '\n') + + for pair in pairs_list: + self.sync_pair_into_new_resource( + new_slug=pair['new_slug'], + old_slug=pair['old_slug'], + old_project_slug=pair['old_project_slug'], + ) + + def run(self): + if self.is_simulated_github_actions(): + with open(self.workflow_file_path) as workflow_file: + self.run_from_workflow_yaml_file( + workflow_configs=yaml.safe_load(workflow_file.read()), + ) + else: + self.sync_pair_into_new_resource( + new_slug=self.environ['TX_NEW_SLUG'], + old_slug=self.environ['TX_OLD_SLUG'], + old_project_slug=self.environ['TX_OLD_PROJECT_SLUG'], + ) + + +if __name__ == '__main__': + command = Command(sys.argv, environ=os.environ, tx_api=transifex_api) + command.run() diff --git a/scripts/tests/response_data.py b/scripts/tests/response_data.py new file mode 100644 index 00000000000..51b11fddc80 --- /dev/null +++ b/scripts/tests/response_data.py @@ -0,0 +1,80 @@ +""" +Holds dummy data for tests +""" +RESPONSE_GET_ORGANIZATION = { + 'data': [ + { + 'id': 'o:open-edx', + 'type': 'organizations', + 'attributes': { + 'name': 'Open edX', + 'slug': 'open-edx', + 'private': False + }, + 'relationships': { + 'projects': { + 'links': { + 'related': 'https://rest.api.transifex.com/projects?filter[organization]=o:open-edx' + } + }, + } + } + ], +} + +RESPONSE_GET_PROJECTS = { + "data": [ + { + "id": "o:open-edx:p:openedx-translations", + "type": "projects", + "attributes": { + "slug": "openedx-translations", + "name": "openedx-translations", + "type": "file", + }, + "relationships": { + "source_language": { + "links": { + "related": "https://rest.api.transifex.com/languages/l:en" + }, + "data": { + "type": "languages", + "id": "l:en" + } + }, + "languages": { + "links": { + "self": "https://rest.api.transifex.com/projects/o:open-edx:p:openedx-translations/relationships/languages", + "related": "https://rest.api.transifex.com/projects/o:open-edx:p:openedx-translations/languages" + } + }, + }, + } + ], +} + +RESPONSE_GET_LANGUAGE = { + "data": [ + { + "id": "l:ar", + "type": "languages", + "attributes": { + "code": "ar", + "name": "Arabic", + "rtl": True, + "plural_equation": "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5", + "plural_rules": { + "zero": "n is 0", + "one": "n is 1", + "two": "n is 2", + "many": "n mod 100 in 11..99", + "few": "n mod 100 in 3..10", + "other": "everything else" + } + }, + "links": { + "self": "https://rest.api.transifex.com/languages/l:ar" + } + } + ] +} diff --git a/scripts/tests/test_sync_translations.py b/scripts/tests/test_sync_translations.py new file mode 100644 index 00000000000..3ec037093cc --- /dev/null +++ b/scripts/tests/test_sync_translations.py @@ -0,0 +1,92 @@ +""" +Tests for sync_translations.py +""" +import types + +import pytest +import responses +from transifex.api import transifex_api, Project +from transifex.api.jsonapi import Resource +from transifex.api.jsonapi.auth import BearerAuthentication + +from . import response_data +from ..sync_translations import Command, ORGANIZATION_SLUG + +HOST = transifex_api.HOST + + +@pytest.fixture +def sync_command(): + result = Command( + argv=[], + tx_api=transifex_api, + environ={ + 'TX_API_TOKEN': 'dummy-token' + } + ) + result.tx_api.make_auth_headers = BearerAuthentication('dummy-token') + return result + + +@responses.activate +def test_get_transifex_organization_projects(sync_command): + """ + Verify that the get_transifex_organization_projects() method returns the correct data. + """ + sync_command = sync_command + + # Mocking responses + responses.add( + responses.GET, + HOST + f'/organizations?filter[slug]={ORGANIZATION_SLUG}', + json=response_data.RESPONSE_GET_ORGANIZATION, + status=200 + ) + responses.add( + responses.GET, + HOST + f'/projects?filter[organization]={response_data.RESPONSE_GET_ORGANIZATION["data"][0]["id"]}', + json=response_data.RESPONSE_GET_PROJECTS, + status=200 + ) + + # Remove the make_auth_headers to verify later that transifex setup is called + delattr(sync_command.tx_api, 'make_auth_headers') + + data = sync_command.get_transifex_organization_projects() + assert hasattr(sync_command.tx_api, 'make_auth_headers') + assert isinstance(sync_command.tx_api.make_auth_headers, BearerAuthentication) + assert len(data) == 1 + assert isinstance(data[0], Project) + assert data[0].id == response_data.RESPONSE_GET_PROJECTS['data'][0]['id'] + + +@responses.activate +def test_get_translations(sync_command): + """ + Verify that the get_translations() method returns the correct data. + """ + sync_command = sync_command + resource_id = f'{response_data.RESPONSE_GET_PROJECTS["data"][0]["id"]}:r:ar' + print(f'resource_id: {resource_id}') + # Mocking responses + responses.add( + responses.GET, + HOST + f'/languages?filter[code]=ar', + json=response_data.RESPONSE_GET_LANGUAGE, + status=200 + ) + responses.add( + responses.GET, + HOST + f'/resource_translations?filter[resource]={resource_id}&filter[language]=l:ar&include=resource_string', + json=response_data.RESPONSE_GET_LANGUAGE, + status=200 + ) + + data = sync_command.get_translations( + language_code='ar', + resource=Resource(id=resource_id) + ) + assert isinstance(data, types.GeneratorType) + items = list(data) + assert len(items) == 1 + assert items[0].id == response_data.RESPONSE_GET_LANGUAGE['data'][0]['id']