-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_tests.py
105 lines (77 loc) · 3.53 KB
/
generate_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import token as ttype
from tokenize import tokenize
from tokenize import _generate_tokens_from_c_tokenizer
from pathlib import Path
from argparse import ArgumentParser
def token_type_from_python_to_rust(typefield):
match typefield:
case ttype.ENCODING:
return "TType::Encoding"
case ttype.STRING:
return "TType::String"
case ttype.NAME:
return "TType::Name"
case ttype.OP:
return "TType::Op"
case ttype.NEWLINE:
return "TType::NL"
case ttype.NUMBER:
return "TType::Number"
case ttype.INDENT:
return "TType::Indent"
case ttype.DEDENT:
return "TType::Dedent"
case ttype.ENDMARKER:
return "TType::EndMarker"
case ttype.NL:
return "TType::NL"
case ttype.COMMENT:
return "TType::Comment"
case default:
# Assume these are operators
return "TType::Op"
# raise ValueError("Token type Not handled yet {}".format(typefield))
def process_file(element:Path):
with element.open("r") as my_file:
print(f"Processing: {element}")
print("=" * 80)
try:
# tokens = tokenize(my_file.readline)
tokens = _generate_tokens_from_c_tokenizer(my_file.read())
for idx, token in enumerate(tokens):
type_str = f"{token_type_from_python_to_rust(token.type)}"
# print(f"//DEBUG {token.start!r} and {token.end!r}")
positions = f"({token.start[0]}, {token.start[1]}), ({token.end[0]}, {token.end[1]})"
if token.string in ("\r\n", "\n", "\r") or token.type in [ttype.NEWLINE, ttype.NL]:
print(f"test_token_w_position!(tokens[{idx}], {type_str}, {positions}, \"\" );")
elif token.string.lower() == "async":
print(f"test_token_w_position!(tokens[{idx}], TType::Async, {positions}, \"{token.string.lower()}\" );")
elif token.string.lower() == "await":
print(f"test_token_w_position!(tokens[{idx}], TType::Await, {positions}, \"{token.string.lower()}\" );")
elif token.type in (ttype.INDENT, ttype.DEDENT):
# reading tokenizer.c, the default col offset seems to be -1 and it doesn't look it
# is changed when the dent tokens are pushed/printed onto the list/stack
positions = f"({token.start[0]}, 0), ({token.end[0]}, 0)"
print(f"test_token_w_position!(tokens[{idx}], {type_str}, {positions}, \"\" );")
elif token.type == ttype.STRING:
print(f"test_token_w_position!(tokens[{idx}], {type_str}, {positions}, \"{token.string}\" );" )
else:
print(f"test_token_w_position!(tokens[{idx}], {type_str}, {positions}, \"{token.string}\" );")
except Exception as exc:
print(f"Failed to tokenize because {exc}")
raise
print("Finished\n")
def walk_workingpath(work_path:Path):
if work_path.is_dir():
for element in work_path.glob("*.py"):
if element.is_file():
process_file(element)
elif work_path.is_file():
process_file(work_path)
def main():
parser = ArgumentParser()
parser.add_argument("work_path", help="Path filled with python files to be tokenized.", type=Path)
args = parser.parse_args()
walk_workingpath(args.work_path)
if __name__ == '__main__':
main()