forked from OpenRCT2/objects
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlanguage_clean_patch.py
executable file
·126 lines (99 loc) · 4.44 KB
/
language_clean_patch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
from languages import SUPPORTED_LANGUAGES
from unidiff import PatchSet
from unidiff.constants import LINE_TYPE_EMPTY
import argparse
import os
def dir_path(string):
""" Checks for a valid dir_path """
if os.path.isdir(string):
return string
raise NotADirectoryError(string)
def get_arg_parser():
""" Command line arguments """
parser = argparse.ArgumentParser(description=\
'Cleans up a patch file to apply to the objects repository.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-p', '--patch', required=True, help='Path to the patch file to clean up.')
parser.add_argument('-l', '--language', required=True, choices=SUPPORTED_LANGUAGES,\
help='Language that is being translated, e.g. ja-JP')
return parser
class PatchCleaner:
"""
Cleans a given unified diff such that only lines matching the language parameter are included,
as well as any preceding lines that may be giving way to them (e.g. by adding a trailing comma).
"""
def __init__(self, filename, language):
self.patch = PatchSet.from_filename(filename)
self.language = language
self.clean_patch()
def __str__(self):
return str(self.patch)
def clean_patch(self):
""" Cleans the patch, removing irrelevant files. """
files_to_be_removed = []
for i, file in enumerate(self.patch):
self.clean_file(file)
if not file.added and not file.removed:
files_to_be_removed.append(i)
# Any files to be removed?
# (In reverse order, as we'll be reindexing.)
if len(files_to_be_removed):
for i in reversed(files_to_be_removed):
del self.patch[i]
def clean_file(self, file):
""" Cleans one particular file in the patch set, removing empty hunks. """
hunks_to_be_removed = []
for j, hunk in enumerate(file):
self.clean_hunk(hunk)
# Is this hunk still modifying anything?
# If not, we'll drop it after iterating everything.
if not hunk.added and not hunk.removed:
hunks_to_be_removed.append(j)
# Any hunks to be removed?
# (In reverse order, as we'll be reindexing.)
if len(hunks_to_be_removed):
for j in reversed(hunks_to_be_removed):
del file[j]
def clean_hunk(self, hunk):
""" Cleans one particular hunk in the patch set, removing irrelevant lines. """
for k, line in enumerate(hunk):
# Is this line modifying anything?
if line.is_added or line.is_removed:
# We're definitely keeping lines that have something to do
# with the target language.
if self.language in line.value:
continue
# Is this line a removal followed by an addition of the same language,
# in turn followed by the target language?
if self.is_accommodating_change(hunk, k):
# TODO: Check languages match -- good enough a heuristic for now.
continue
# Otherwise, exclude this change from the patch.
else:
line.line_type = LINE_TYPE_EMPTY
def is_accommodating_change(self, hunk, k):
"""
Determines whether the line current line 'k' is part of a change that's
accommodating the target language in some way. For example, the addition
of a trailing comma in a seemingly unrelated line:
-"en-GB": "Penguin Trains"
+"en-GB": "Penguin Trains",
+"ja-JP": "ペンギンボブスレー"
"""
# Is line k a removal followed by an addition of the same language,
# in turn followed by the target language?
if hunk[k].is_removed and hunk[k + 1].is_added and \
self.language in hunk[k + 2].value:
return True
# Indeed, is the previous line a removal and the next line an addition
# of the target language?
elif hunk[k - 1].is_removed and hunk[k].is_added and \
self.language in hunk[k + 1].value:
return True
return False
if __name__ == "__main__":
parser = get_arg_parser()
args = parser.parse_args()
patch = PatchCleaner(args.patch, args.language)
print(patch)