Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-122449: Use constants instead of hard-coded strings in Tools/cases_generator #122448

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
327acfd
use global constants for token kinds
picnixz Jul 30, 2024
c7ce6df
analyzer.py: use lexer constants
picnixz Jul 30, 2024
194e0ed
parsing.py: use lexer constants
picnixz Jul 30, 2024
d069e1c
cwriter.py: use lexer constants
picnixz Jul 30, 2024
b4ae7c1
generators_common: use lexer constants
picnixz Jul 30, 2024
950e6dc
opcode_metadata_generator.py: use lexer constants
picnixz Jul 30, 2024
c854c09
plexer.py: use lexer constants
picnixz Jul 30, 2024
cfc058f
cosmetic change for imports
picnixz Jul 30, 2024
fc3f503
make mypy happy
picnixz Jul 30, 2024
6dd3c1f
Merge remote-tracking branch 'upstream/main' into use-enumeration-for…
picnixz Aug 2, 2024
f3e009b
mypy
picnixz Aug 2, 2024
2f26b0f
Merge remote-tracking branch 'upstream/main' into use-enumeration-for…
picnixz Aug 13, 2024
92dff08
revert `import lexer as lx`
picnixz Aug 13, 2024
14aa67a
revert cosmetic change
picnixz Aug 13, 2024
cf498dd
Merge remote-tracking branch 'upstream/main' into use-enumeration-for…
picnixz Nov 15, 2024
08bfa72
cleanup imports
picnixz Nov 15, 2024
c5d04c6
update usage
picnixz Nov 15, 2024
267bffe
fixup
picnixz Nov 15, 2024
dc2e8d6
use FQN
picnixz Nov 15, 2024
bc8d2e5
update usages
picnixz Nov 15, 2024
a576b2e
small optimization
picnixz Nov 15, 2024
5751920
cosmetic changes
picnixz Nov 16, 2024
07b66e0
I don't know how to use Python anymore!
picnixz Nov 16, 2024
1d16f2a
Merge remote-tracking branch 'upstream/main' into feat/cases/kind-122449
picnixz Jan 23, 2025
72859d3
update constants
picnixz Jan 23, 2025
21cf890
Merge remote-tracking branch 'upstream/main' into feat/cases/kind-122449
picnixz Feb 23, 2025
7978d1b
mypy fix
picnixz Feb 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 50 additions & 44 deletions Tools/cases_generator/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass, field
from dataclasses import dataclass
import itertools
import lexer
import lexer as lx # for constants
import parser
import re
from typing import Optional
Expand Down Expand Up @@ -194,7 +195,7 @@ def why_not_viable(self) -> str | None:
return None # Adjusts next_instr, but only in tier 1 code
if "INSTRUMENTED" in self.name:
return "is instrumented"
if "replaced" in self.annotations:
if lx.ANN_REPLACED in self.annotations:
return "is replaced"
if self.name in ("INTERPRETER_EXIT", "JUMP_BACKWARD"):
return "has tier 1 control flow"
Expand All @@ -211,7 +212,7 @@ def is_viable(self) -> bool:

def is_super(self) -> bool:
for tkn in self.body:
if tkn.kind == "IDENTIFIER" and tkn.text == "oparg1":
if tkn.kind == lx.IDENTIFIER and tkn.text == "oparg1":
return True
return False

Expand Down Expand Up @@ -384,7 +385,7 @@ def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]:
"""Find the tokens that make up the left-hand side of an assignment"""
offset = 0
for tkn in reversed(node.block.tokens[: idx]):
if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE}:
return node.block.tokens[idx - offset : idx]
offset += 1
return []
Expand All @@ -395,17 +396,17 @@ def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]:
outnames = { out.name for out in node.outputs }
innames = { out.name for out in node.inputs }
for idx, tkn in enumerate(node.block.tokens):
if tkn.kind == "AND":
if tkn.kind == lx.AND:
name = node.block.tokens[idx+1]
if name.text in outnames:
res.append(name)
if tkn.kind != "EQUALS":
if tkn.kind != lx.EQUALS:
continue
lhs = find_assignment_target(node, idx)
assert lhs
while lhs and lhs[0].kind == "COMMENT":
while lhs and lhs[0].kind == lx.COMMENT:
lhs = lhs[1:]
if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER":
if len(lhs) != 1 or lhs[0].kind != lx.IDENTIFIER:
continue
name = lhs[0]
if name.text in innames:
Expand All @@ -419,18 +420,18 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]

def in_frame_push(idx: int) -> bool:
for tkn in reversed(node.block.tokens[: idx - 1]):
if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE}:
return False
if tkn.kind == "IDENTIFIER" and tkn.text == "_PyFrame_PushUnchecked":
if tkn.kind == lx.IDENTIFIER and tkn.text == "_PyFrame_PushUnchecked":
return True
return False

refs: dict[lexer.Token, str | None] = {}
for idx, tkn in enumerate(node.block.tokens):
if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew":
if tkn.kind != lx.IDENTIFIER or tkn.text != "PyStackRef_FromPyObjectNew":
continue

if idx == 0 or node.block.tokens[idx - 1].kind != "EQUALS":
if idx == 0 or node.block.tokens[idx - 1].kind != lx.EQUALS:
if in_frame_push(idx):
# PyStackRef_FromPyObjectNew() is called in _PyFrame_PushUnchecked()
refs[tkn] = None
Expand All @@ -443,15 +444,15 @@ def in_frame_push(idx: int) -> bool:
"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
)

if lhs[0].kind == "TIMES" or any(
t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:]
if lhs[0].kind == lx.TIMES or any(
t.kind in {lx.ARROW, lx.LBRACKET} for t in lhs[1:]
):
# Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ...
# Assume that they are visible to the GC.
refs[tkn] = None
continue

if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER":
if len(lhs) != 1 or lhs[0].kind != lx.IDENTIFIER:
raise analysis_error(
"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
)
Expand All @@ -475,22 +476,24 @@ def in_frame_push(idx: int) -> bool:
def variable_used(node: parser.InstDef, name: str) -> bool:
"""Determine whether a variable with a given name is used in a node."""
return any(
token.kind == "IDENTIFIER" and token.text == name for token in node.block.tokens
token.kind == lx.IDENTIFIER and token.text == name
for token in node.block.tokens
)


def oparg_used(node: parser.InstDef) -> bool:
"""Determine whether `oparg` is used in a node."""
return any(
token.kind == "IDENTIFIER" and token.text == "oparg" for token in node.tokens
token.kind == lx.IDENTIFIER and token.text == "oparg"
for token in node.tokens
)


def tier_variable(node: parser.InstDef) -> int | None:
"""Determine whether a tier variable is used in a node."""
for token in node.tokens:
if token.kind == "ANNOTATION":
if token.text == "specializing":
if token.kind == lx.ANNOTATION:
if token.text == lx.ANN_SPECIALIZING:
return 1
if re.fullmatch(r"tier\d", token.text):
return int(token.text[-1])
Expand Down Expand Up @@ -637,11 +640,11 @@ def find_stmt_start(node: parser.InstDef, idx: int) -> lexer.Token:
assert idx < len(node.block.tokens)
while True:
tkn = node.block.tokens[idx-1]
if tkn.kind in {"SEMI", "LBRACE", "RBRACE", "CMACRO"}:
if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE, lx.CMACRO}:
break
idx -= 1
assert idx > 0
while node.block.tokens[idx].kind == "COMMENT":
while node.block.tokens[idx].kind == lx.COMMENT:
idx += 1
return node.block.tokens[idx]

Expand All @@ -651,23 +654,23 @@ def find_stmt_end(node: parser.InstDef, idx: int) -> lexer.Token:
while True:
idx += 1
tkn = node.block.tokens[idx]
if tkn.kind == "SEMI":
if tkn.kind == lx.SEMI:
return node.block.tokens[idx+1]

def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]) -> None:
calls = {escapes[t][0] for t in escapes}
in_if = 0
tkn_iter = iter(instr.block.tokens)
for tkn in tkn_iter:
if tkn.kind == "IF":
if tkn.kind == lx.IF:
next(tkn_iter)
in_if = 1
if tkn.kind == "IDENTIFIER" and tkn.text in ("DEOPT_IF", "ERROR_IF"):
if tkn.kind == lx.IDENTIFIER and tkn.text in ("DEOPT_IF", "ERROR_IF"):
next(tkn_iter)
in_if = 1
elif tkn.kind == "LPAREN" and in_if:
elif tkn.kind == lx.LPAREN and in_if:
in_if += 1
elif tkn.kind == "RPAREN":
elif tkn.kind == lx.RPAREN:
if in_if:
in_if -= 1
elif tkn in calls and in_if:
Expand All @@ -681,11 +684,11 @@ def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[le
next_tkn = tokens[idx+1]
except IndexError:
break
if tkn.kind == "SWITCH":
if tkn.kind == lx.SWITCH:
raise analysis_error(f"switch statements are not supported due to their complex flow control. Sorry.", tkn)
if next_tkn.kind != lexer.LPAREN:
if next_tkn.kind != lx.LPAREN:
continue
if tkn.kind == lexer.IDENTIFIER:
if tkn.kind == lx.IDENTIFIER:
if tkn.text.upper() == tkn.text:
# simple macro
continue
Expand All @@ -702,12 +705,12 @@ def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[le
continue
if tkn.text in NON_ESCAPING_FUNCTIONS:
continue
elif tkn.kind == "RPAREN":
elif tkn.kind == lx.RPAREN:
prev = tokens[idx-1]
if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int":
#cast
continue
elif tkn.kind != "RBRACKET":
elif tkn.kind != lx.RBRACKET:
continue
start = find_stmt_start(instr, idx)
end = find_stmt_end(instr, idx)
Expand All @@ -729,18 +732,20 @@ def always_exits(op: parser.InstDef) -> bool:
depth = 0
tkn_iter = iter(op.tokens)
for tkn in tkn_iter:
if tkn.kind == "LBRACE":
if tkn.kind == lx.LBRACE:
depth += 1
elif tkn.kind == "RBRACE":
elif tkn.kind == lx.RBRACE:
depth -= 1
elif depth > 1:
continue
elif tkn.kind == "GOTO" or tkn.kind == "RETURN":
elif tkn.kind in {lx.GOTO, lx.RETURN}:
return True
elif tkn.kind == "KEYWORD":
elif tkn.kind == lx.KEYWORD:
# XXX: This appears to be unreachable since we never
# set tkn.kind to KEYWORD
if tkn.text in EXITS:
return True
elif tkn.kind == "IDENTIFIER":
elif tkn.kind == lx.IDENTIFIER:
if tkn.text in EXITS:
return True
if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF":
Expand Down Expand Up @@ -822,11 +827,12 @@ def compute_properties(op: parser.InstDef) -> Properties:
uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL"))
and not has_free,
has_free=has_free,
pure="pure" in op.annotations,
pure=lx.ANN_PURE in op.annotations,
tier=tier_variable(op),
needs_prev=variable_used(op, "prev_instr"),
)

ANN_REPLICATED = re.compile(rf'^{re.escape(lx.ANN_REPLICATE)}\((\d+)\)$')

def make_uop(
name: str,
Expand All @@ -845,7 +851,7 @@ def make_uop(
body=op.block.tokens,
properties=compute_properties(op),
)
if effect_depends_on_oparg_1(op) and "split" in op.annotations:
if effect_depends_on_oparg_1(op) and lx.ANN_SPLIT in op.annotations:
result.properties.oparg_and_1 = True
for bit in ("0", "1"):
name_x = name + "_" + bit
Expand All @@ -869,8 +875,8 @@ def make_uop(
rep.replicates = result
uops[name_x] = rep
for anno in op.annotations:
if anno.startswith("replicate"):
result.replicated = int(anno[10:-1])
if match := ANN_REPLICATED.match(anno):
result.replicated = int(match.group(1))
break
else:
return result
Expand Down Expand Up @@ -899,7 +905,7 @@ def make_uop(
def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None:
assert op.kind == "op"
if op.name in uops:
if "override" not in op.annotations:
if lx.ANN_OVERRIDE not in op.annotations:
raise override_error(
op.name, op.context, uops[op.name].context, op.tokens[0]
)
Expand Down Expand Up @@ -1145,11 +1151,11 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis:
for uop in uops.values():
tkn_iter = iter(uop.body)
for tkn in tkn_iter:
if tkn.kind == "IDENTIFIER" and tkn.text == "GO_TO_INSTRUCTION":
if next(tkn_iter).kind != "LPAREN":
if tkn.kind == lx.IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION":
if next(tkn_iter).kind != lx.LPAREN:
continue
target = next(tkn_iter)
if target.kind != "IDENTIFIER":
if target.kind != lx.IDENTIFIER:
continue
if target.text in instructions:
instructions[target.text].is_target = True
Expand Down
6 changes: 3 additions & 3 deletions Tools/cases_generator/cwriter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import contextlib
from lexer import Token
from lexer import COMMENT, CMACRO, Token
from typing import TextIO, Iterator


Expand Down Expand Up @@ -87,12 +87,12 @@ def emit_multiline_comment(self, tkn: Token) -> None:
self.out.write(text)

def emit_token(self, tkn: Token) -> None:
if tkn.kind == "COMMENT" and "\n" in tkn.text:
if tkn.kind == COMMENT and "\n" in tkn.text:
return self.emit_multiline_comment(tkn)
self.maybe_dedent(tkn.text)
self.set_position(tkn)
self.emit_text(tkn.text)
if tkn.kind == "CMACRO":
if tkn.kind == CMACRO:
self.newline = True
self.maybe_indent(tkn.text)

Expand Down
Loading
Loading