Skip to content

Commit

Permalink
fix: avoid overriding manual edits for newer entries
Browse files Browse the repository at this point in the history
  • Loading branch information
OmarIthawi committed Nov 19, 2023
1 parent 24673d2 commit bd896e1
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 14 deletions.
75 changes: 62 additions & 13 deletions scripts/sync_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@
ORGANIZATION_SLUG = 'open-edx'


def parse_tx_date(date_str):
"""
Parse a date string coming from Transifex into a datetime object.
"""
if date_str:
if date_str.endswith('Z'):
date_str = date_str.replace('Z', '+00:00')

return datetime.fromisoformat(date_str)

return None


class Command:

workflow_file_path = '.github/workflows/sync-translations.yml'
Expand Down Expand Up @@ -118,25 +131,61 @@ def sync_translations(self, language_code, old_resource, new_resource):
Sync specific language translations into the new Transifex resource.
"""
print(' syncing', language_code, '...')
old_translations = {
translations_from_old_project = {
self.get_translation_id(translation): translation
for translation in self.get_translations(language_code=language_code, resource=old_resource)
}

for new_translation in self.get_translations(language_code=language_code, resource=new_resource):
translation_id = self.get_translation_id(new_translation)
if old_translation := old_translations.get(translation_id):
updates = {}
for attr in ['reviewed', 'proofread', 'strings']:
if old_attr_value := getattr(old_translation, attr, None):
if old_attr_value != getattr(new_translation, attr, None):
updates[attr] = old_attr_value
for current_translation in self.get_translations(language_code=language_code, resource=new_resource):
translation_id = self.get_translation_id(current_translation)
if translation_from_old_project := translations_from_old_project.get(translation_id):
self.sync_translation_entry(
translation_from_old_project=translation_from_old_project,
current_translation=current_translation,
)

if updates:
print(translation_id, updates)
def sync_translation_entry(self, translation_from_old_project, current_translation):
"""
Sync a single translation entry from the old project to the new one.
if not self.is_dry_run():
new_translation.save(**updates)
Return:
str: status code
- updated: if the entry was updated
- skipped: if the entry was skipped
- updated-dry-run: if the entry was updated in dry-run mode
"""
translation_id = self.get_translation_id(current_translation)

updates = {}
for attr in ['reviewed', 'proofread', 'strings']:
if old_attr_value := getattr(translation_from_old_project, attr, None):
if old_attr_value != getattr(current_translation, attr, None):
updates[attr] = old_attr_value

# Avoid overwriting more recent translations in the open-edx/openedx-translations project
newer_translation_found = False
old_project_translation_time = parse_tx_date(translation_from_old_project.datetime_translated)
current_translation_time = parse_tx_date(current_translation.datetime_translated)

if old_project_translation_time and current_translation_time:
newer_translation_found = current_translation_time > old_project_translation_time

if updates:
if newer_translation_found:
print(translation_id, updates,
(
f'[Skipped: current translation "{current_translation_time}" '
f'is more recent than "{old_project_translation_time}"]'
)
)
return 'skipped'
else:
print(translation_id, updates, '[Dry run]' if self.is_dry_run() else '')
if self.is_dry_run():
return 'updated-dry-run'
else:
current_translation.save(**updates)
return 'updated'

def sync_tags(self, old_resource, new_resource):
"""
Expand Down
220 changes: 219 additions & 1 deletion scripts/tests/test_sync_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
from transifex.api.jsonapi.auth import BearerAuthentication

from . import response_data
from ..sync_translations import Command, ORGANIZATION_SLUG
from ..sync_translations import (
Command,
ORGANIZATION_SLUG,
parse_tx_date,
)

HOST = transifex_api.HOST

Expand All @@ -34,6 +38,70 @@ def sync_command(**kwargs):
return result


@dataclass
class ResourceStringMock:
"""
String entry in Transifex.
Mock class for the transifex.api.ResourceString class.
"""
key: str
context: str = ''


@dataclass
class ResourceTranslationMock:
"""
Translation for an entry in Transifex.
Mock class for the transifex.api.ResourceTranslation class.
"""
resource_string: ResourceStringMock
strings: dict
reviewed: bool
proofread: bool
datetime_translated: str

_updates: dict = None # Last updates applied via `save()`

def save(self, **updates):
"""
Mock ResourceTranslation.save() method.
"""
self._updates = updates

@property
def updates(self):
"""
Return the last updates applied via `save()`.
"""
return self._updates

@classmethod
def factory(
cls,
key='key',
context='',
translation: Union[str, None] = 'dummy translation',
**kwargs
):
mock_kwargs = dict(
resource_string=ResourceStringMock(
key=key,
context=context
),
strings={
key: translation,
},
reviewed=False,
proofread=False,
datetime_translated='2021-01-01T00:00:00Z',
)

mock_kwargs.update(kwargs)
return cls(**mock_kwargs)


@responses.activate
def test_get_transifex_organization_projects():
"""
Expand Down Expand Up @@ -96,3 +164,153 @@ def test_get_translations():
items = list(data)
assert len(items) == 1
assert items[0].id == response_data.RESPONSE_GET_LANGUAGE['data'][0]['id']


def test_translations_entry_update_empty_translation():
"""
Test updating an entry from old project where `current_translation` is empty.
"""
command = sync_command(dry_run=False)

translation_from_old_project = ResourceTranslationMock.factory(
key='test_key',
translation='old translation',
reviewed=True,
datetime_translated='2023-01-01T00:00:00Z',
)

# Current translation is empty
current_translation = ResourceTranslationMock.factory(
key='test_key',
translation=None,
datetime_translated=None,
)

status = command.sync_translation_entry(
translation_from_old_project, current_translation
)

assert status == 'updated'
assert current_translation.updates == {
'strings': {
'test_key': 'old translation'
},
'reviewed': True,
}


@pytest.mark.parametrize(
'old_project_date, current_translation_date, new_translation_str',
[
# As long as the current translation is _not_ more recent, it should be updated
(None, '2023-01-01T00:00:00Z', None),
(None, '2023-01-01T00:00:00Z', 'some translation'),
('2023-01-01T00:00:00Z', '2023-01-01T00:00:00Z', 'some translation'), # Same date
('2023-01-01T00:00:00Z', '2021-01-01T00:00:00Z', 'some translation'), # New project has newer date
('2023-01-01T00:00:00Z', None, 'some translation'),
('2023-01-01T00:00:00Z', None, None),
]
)
def test_translations_entry_update_translation(old_project_date, current_translation_date, new_translation_str):
"""
Test updating an entry from old project where `current_translation` is has outdated translation.
"""
command = sync_command(dry_run=False)

translation_from_old_project = ResourceTranslationMock.factory(
key='test_key',
translation='old translation',
reviewed=True,
datetime_translated=old_project_date,
)

current_translation = ResourceTranslationMock.factory(
key='test_key',
translation=new_translation_str,
datetime_translated=current_translation_date,
)

status = command.sync_translation_entry(
translation_from_old_project, current_translation
)

assert status == 'updated'
assert current_translation.updates == {
'strings': {
'test_key': 'old translation'
},
'reviewed': True,
}


def test_translations_entry_more_recent_translation():
"""
Verify that the more recent translations in the open-edx/openedx-translations project are not overridden.
"""
command = sync_command(dry_run=False)

translation_from_old_project = ResourceTranslationMock.factory(
key='test_key',
translation='one translation',
reviewed=True,
datetime_translated='2019-01-01T00:00:00Z',
)

# Current translation is empty
current_translation = ResourceTranslationMock.factory(
key='test_key',
translation='more recent translation',
datetime_translated='2023-01-01T00:00:00Z',
)

status = command.sync_translation_entry(
translation_from_old_project, current_translation
)

assert status == 'skipped'
assert not current_translation.updates, 'save() should not be called'


def test_translations_entry_dry_run():
"""
Verify that --dry-run option skips the save() call.
"""
command = sync_command(dry_run=True)

translation_from_old_project = ResourceTranslationMock.factory(
key='test_key',
translation='old translation',
reviewed=True,
datetime_translated='2023-01-01T00:00:00Z',
)

current_translation = ResourceTranslationMock.factory(
key='test_key',
translation=None,
datetime_translated=None,
)

status = command.sync_translation_entry(
translation_from_old_project, current_translation
)

assert status == 'updated-dry-run'
assert not current_translation.updates, 'save() should not be called in --dry-run mode'


@pytest.mark.parametrize(
"date_str, parse_result, test_message",
[
(None, None, 'None cannot be parsed'),
('2023-01-01T00:00:00Z', datetime(2023, 1, 1, 0, 0, tzinfo=timezone.utc),
'Z suffix is replaced with the explict "+00:00" timezone'),
('2023-01-01T00:00:00', datetime(2023, 1, 1, 0, 0),
'If there is no Z suffix, no timezone is added'),
]
)
def test_parse_tx_date(date_str, parse_result, test_message):
"""
Tests for parse_tx_date() helper function.
"""
assert parse_tx_date(date_str) == parse_result, test_message

0 comments on commit bd896e1

Please sign in to comment.