forked from C1710/blobmoji
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_filenames.py
103 lines (92 loc) · 4.7 KB
/
convert_filenames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import argparse
from os import PathLike
from typing import IO, AnyStr, Dict, List, Tuple
import re
import logging
import os, os.path
import pathlib
# Regexes from emoji_builder
# These first regexes are not used at the moment
hex_sequence = r"[a-fA-F0-9]{1,8}"
range_regex = r"(?P<range>(?P<range_start>{hex})\.\.(?P<range_end>{hex}))".format(hex = hex_sequence)
sequence_regex = r"(?P<sequence>({hex})(\s+({hex}))*)".format(hex = hex_sequence)
emoji_regex = r"(?P<codepoints>{}|{})".format(range_regex, sequence_regex)
emoji_kind_regex = r"(?P<kind>[A-Za-z_\-]+)"
data_regex = r"^{}\s*;\s*{}\s*(;(?P<name>.*)\s*)?(#.*)?$".format(emoji_regex, emoji_kind_regex)
emoji_sequence_space_regex = r"(([A-F0-9a-f]{1,8})(\s+([A-F0-9a-f]{1,8}))*)"
emoji_status_regex = r"(component|fully-qualified|minimally-qualified|unqualified)"
emoji_name_regex = r"(.*)?\s*E(\d+.\d+) (.+)"
emoji_test_regex = re.compile(r"^{}\s*;\s*{}\s*#\s*{}$".format(emoji_sequence_space_regex, emoji_status_regex, emoji_name_regex))
normalize_delimiter_regex = re.compile(r"[-_. ]")
normalize_remove_regex = re.compile(r"""[,*\\/:'"()]""")
def normalize_name(name: str) -> str:
"""Normalizes names to be space-separated and lowercase with less special characters"""
name_components = normalize_delimiter_regex.split(name)
simplified_name_components = (normalize_remove_regex.sub("", name_component) for name_component in name_components)
simplified_name = " ".join(simplified_name_components)
return simplified_name.lower()
def load_test_table(table: IO[AnyStr]) -> Dict[str, Tuple[int]]:
"""
Loads the content of emoji-test.txt.
The general format is "codepoint ; fully-qualified # <Emoji> E<version> name"
"""
mapping = dict()
for line in table:
line = line.strip()
if line.startswith("#") or not line:
# Comment
continue
entry = emoji_test_regex.match(line)
if entry:
sequence: str = entry.group(1)
sequence_int = (int(codepoint, 16) for codepoint in sequence.split() if codepoint)
sequence_int_no_fe0f = tuple((codepoint for codepoint in sequence_int if codepoint != 0xfe0f))
_status = entry.group(5)
name = normalize_name(entry.group(8))
mapping[name] = sequence_int_no_fe0f
else:
logging.warn("Unrecognized table line:\n {}".format(line))
return mapping
def make_mapping(int_mappings: List[Dict[str, Tuple[int]]]) -> Dict[str, str]:
"""
Merges normalized-name to int tuple-mappings to normalized-name to emoji_u<codepoint>-mapping
"""
mapping = dict()
for int_mapping in int_mappings:
for (name, sequence) in int_mapping.items():
sequence_str = "emoji_u{}".format("_".join((hex(codepoint)[2:] for codepoint in sequence)))
mapping[name] = sequence_str
return mapping
def replace_file_names(directory: PathLike, mapping: Dict[str, str], dry_run: bool = False):
"""Actually looks up the file names and replaces them"""
for file in os.scandir(directory):
if not file.is_file() or os.fsdecode(file.name).startswith("emoji_u"):
continue
else:
file_path = pathlib.Path(file.path)
basename = file_path.stem
normalized_basename = normalize_name(basename)
if normalized_basename not in mapping:
logging.warning("Unrecognized name: {}".format(basename))
else:
# Rename/Move
new_path = file_path.with_stem(mapping[normalized_basename])
if not dry_run:
logging.info("{} --> {}".format(file_path, new_path))
os.rename(file_path, new_path)
else:
print("{} --> {}".format(file_path, new_path))
def main(directory: PathLike, emoji_test_txt: PathLike, dry_run: bool = False):
with open(emoji_test_txt, encoding="utf-8") as emoji_test_file:
test_mapping = load_test_table(emoji_test_file)
mapping = make_mapping([test_mapping])
replace_file_names(directory=directory, mapping=mapping, dry_run=dry_run)
parser = argparse.ArgumentParser(
description="Replaces emoji files with rich names by their emoji_u<codepoint> format"
)
parser.add_argument("--directory", type=pathlib.PurePath, help="The directory in which to replace the file names", default="./svg")
parser.add_argument("--emoji_test", type=pathlib.PurePath, help="The emoji-test.txt file to use for the translation", default="./emoji_test.txt")
parser.add_argument("--dry-run", action="store_true", help="does not perform any actual renaming")
if __name__ == "__main__":
args = parser.parse_args()
main(args.directory, args.emoji_test, args.dry_run)