From 327acfdfeaacafd324a4f488e9078870d90afab3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:14:20 +0200 Subject: [PATCH 01/22] use global constants for token kinds --- Tools/cases_generator/lexer.py | 285 +++++++++++++++------------------ 1 file changed, 125 insertions(+), 160 deletions(-) diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 13aee94f2b957c..01fbaa4aee9720 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -2,6 +2,8 @@ # Originally by Mark Shannon (mark@hotpy.org) # https://gist.github.com/markshannon/db7ab649440b5af765451bb77c7dba34 +__all__ = [] + import re from dataclasses import dataclass from collections.abc import Iterator @@ -13,78 +15,74 @@ def choice(*opts: str) -> str: # Regexes -# Longer operators must go before shorter ones. - -PLUSPLUS = r"\+\+" -MINUSMINUS = r"--" - -# -> -ARROW = r"->" -ELLIPSIS = r"\.\.\." - -# Assignment operators -TIMESEQUAL = r"\*=" -DIVEQUAL = r"/=" -MODEQUAL = r"%=" -PLUSEQUAL = r"\+=" -MINUSEQUAL = r"-=" -LSHIFTEQUAL = r"<<=" -RSHIFTEQUAL = r">>=" -ANDEQUAL = r"&=" -OREQUAL = r"\|=" -XOREQUAL = r"\^=" - -# Operators -PLUS = r"\+" -MINUS = r"-" -TIMES = r"\*" -DIVIDE = r"/" -MOD = r"%" -NOT = r"~" -XOR = r"\^" -LOR = r"\|\|" -LAND = r"&&" -LSHIFT = r"<<" -RSHIFT = r">>" -LE = r"<=" -GE = r">=" -EQ = r"==" -NE = r"!=" -LT = r"<" -GT = r">" -LNOT = r"!" -OR = r"\|" -AND = r"&" -EQUALS = r"=" - -# ? -CONDOP = r"\?" - -# Delimiters -LPAREN = r"\(" -RPAREN = r"\)" -LBRACKET = r"\[" -RBRACKET = r"\]" -LBRACE = r"\{" -RBRACE = r"\}" -COMMA = r"," -PERIOD = r"\." -SEMI = r";" -COLON = r":" -BACKSLASH = r"\\" - -operators = {op: pattern for op, pattern in globals().items() if op == op.upper()} -for op in operators: - globals()[op] = op -opmap = {pattern.replace("\\", "") or "\\": op for op, pattern in operators.items()} +# Mapping from operator names to their regular expressions. +operators = { + # Longer operators must go before shorter ones. + (PLUSPLUS := "PLUSPLUS"): r'\+\+', + (MINUSMINUS := "MINUSMINUS"): r"--", + # -> + (ARROW := "ARROW"): r"->", + (ELLIPSIS := "ELLIPSIS"): r"\.\.\.", + # Assignment operators + (TIMESEQUAL := "TIMESEQUAL"): r"\*=", + (DIVEQUAL := "DIVEQUAL"): r"/=", + (MODEQUAL := "MODEQUAL"): r"%=", + (PLUSEQUAL := "PLUSEQUAL"): r"\+=", + (MINUSEQUAL := "MINUSEQUAL"): r"-=", + (LSHIFTEQUAL := "LSHIFTEQUAL"): r"<<=", + (RSHIFTEQUAL := "RSHIFTEQUAL"): r">>=", + (ANDEQUAL := "ANDEQUAL"): r"&=", + (OREQUAL := "OREQUAL"): r"\|=", + (XOREQUAL := "XOREQUAL"): r"\^=", + # Operators + (PLUS := "PLUS"): r"\+", + (MINUS := "MINUS"): r"-", + (TIMES := "TIMES"): r"\*", + (DIVIDE := "DIVIDE"): r"/", + (MOD := "MOD"): r"%", + (NOT := "NOT"): r"~", + (XOR := "XOR"): r"\^", + (LOR := "LOR"): r"\|\|", + (LAND := "LAND"): r"&&", + (LSHIFT := "LSHIFT"): r"<<", + (RSHIFT := "RSHIFT"): r">>", + (LE := "LE"): r"<=", + (GE := "GE"): r">=", + (EQ := "EQ"): r"==", + (NE := "NE"): r"!=", + (LT := "LT"): r"<", + (GT := "GT"): r">", + (LNOT := "LNOT"): r"!", + (OR := "OR"): r"\|", + (AND := "AND"): r"&", + (EQUALS := "EQUALS"): r"=", + # ? + (CONDOP := "CONDOP"): r"\?", + # Delimiters + (LPAREN := "LPAREN"): r"\(", + (RPAREN := "RPAREN"): r"\)", + (LBRACKET := "LBRACKET"): r"\[", + (RBRACKET := "RBRACKET"): r"\]", + (LBRACE := "LBRACE"): r"\{", + (RBRACE := "RBRACE"): r"\}", + (COMMA := "COMMA"): r",", + (PERIOD := "PERIOD"): r"\.", + (SEMI := "SEMI"): r";", + (COLON := "COLON"): r":", + (BACKSLASH := "BACKSLASH"): r"\\", +} +__all__.extend(operators.keys()) +opmap = {__pattern.replace("\\", "") or "\\": __opname + for __opname, __pattern in operators.items()} # Macros macro = r"# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)" CMACRO = "CMACRO" +__all__.append(CMACRO) id_re = r"[a-zA-Z_][0-9a-zA-Z_]*" IDENTIFIER = "IDENTIFIER" - +__all__.append(IDENTIFIER) suffix = r"([uU]?[lL]?[lL]?)" octal = r"0[0-7]+" + suffix @@ -99,6 +97,7 @@ def choice(*opts: str) -> str: number_re = choice(octal, hex, float, decimal) NUMBER = "NUMBER" +__all__.append(NUMBER) simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" decimal_escape = r"""(\d+)""" @@ -111,11 +110,16 @@ def choice(*opts: str) -> str: STRING = "STRING" char = r"\'.\'" # TODO: escape sequence CHARACTER = "CHARACTER" +__all__.extend([STRING, CHARACTER]) comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/" COMMENT = "COMMENT" +__all__.append(COMMENT) newline = r"\n" +NEWLINE = "NEWLINE" +__all__.append(NEWLINE) + invalid = ( r"\S" # A single non-space character that's not caught by any of the other patterns ) @@ -134,103 +138,64 @@ def choice(*opts: str) -> str: ) letter = re.compile(r"[a-zA-Z_]") - -kwds = [] -AUTO = "AUTO" -kwds.append(AUTO) -BREAK = "BREAK" -kwds.append(BREAK) -CASE = "CASE" -kwds.append(CASE) -CHAR = "CHAR" -kwds.append(CHAR) -CONST = "CONST" -kwds.append(CONST) -CONTINUE = "CONTINUE" -kwds.append(CONTINUE) -DEFAULT = "DEFAULT" -kwds.append(DEFAULT) -DO = "DO" -kwds.append(DO) -DOUBLE = "DOUBLE" -kwds.append(DOUBLE) -ELSE = "ELSE" -kwds.append(ELSE) -ENUM = "ENUM" -kwds.append(ENUM) -EXTERN = "EXTERN" -kwds.append(EXTERN) -FLOAT = "FLOAT" -kwds.append(FLOAT) -FOR = "FOR" -kwds.append(FOR) -GOTO = "GOTO" -kwds.append(GOTO) -IF = "IF" -kwds.append(IF) -INLINE = "INLINE" -kwds.append(INLINE) -INT = "INT" -kwds.append(INT) -LONG = "LONG" -kwds.append(LONG) -OFFSETOF = "OFFSETOF" -kwds.append(OFFSETOF) -RESTRICT = "RESTRICT" -kwds.append(RESTRICT) -RETURN = "RETURN" -kwds.append(RETURN) -SHORT = "SHORT" -kwds.append(SHORT) -SIGNED = "SIGNED" -kwds.append(SIGNED) -SIZEOF = "SIZEOF" -kwds.append(SIZEOF) -STATIC = "STATIC" -kwds.append(STATIC) -STRUCT = "STRUCT" -kwds.append(STRUCT) -SWITCH = "SWITCH" -kwds.append(SWITCH) -TYPEDEF = "TYPEDEF" -kwds.append(TYPEDEF) -UNION = "UNION" -kwds.append(UNION) -UNSIGNED = "UNSIGNED" -kwds.append(UNSIGNED) -VOID = "VOID" -kwds.append(VOID) -VOLATILE = "VOLATILE" -kwds.append(VOLATILE) -WHILE = "WHILE" -kwds.append(WHILE) -# An instruction in the DSL -INST = "INST" -kwds.append(INST) -# A micro-op in the DSL -OP = "OP" -kwds.append(OP) -# A macro in the DSL -MACRO = "MACRO" -kwds.append(MACRO) -keywords = {name.lower(): name for name in kwds} +# Mapping from keyword to their token kind. +keywords = { + 'auto': (AUTO := "AUTO"), + 'break': (BREAK := "BREAK"), + 'case': (CASE := "CASE"), + 'char': (CHAR := "CHAR"), + 'const': (CONST := "CONST"), + 'continue': (CONTINUE := "CONTINUE"), + 'default': (DEFAULT := "DEFAULT"), + 'do': (DO := "DO"), + 'double': (DOUBLE := "DOUBLE"), + 'else': (ELSE := "ELSE"), + 'enum': (ENUM := "ENUM"), + 'extern': (EXTERN := "EXTERN"), + 'float': (FLOAT := "FLOAT"), + 'for': (FOR := "FOR"), + 'goto': (GOTO := "GOTO"), + 'if': (IF := "IF"), + 'inline': (INLINE := "INLINE"), + 'int': (INT := "INT"), + 'long': (LONG := "LONG"), + 'offsetof': (OFFSETOF := "OFFSETOF"), + 'return': (RETURN := "RETURN"), + 'short': (SHORT := "SHORT"), + 'signed': (SIGNED := "SIGNED"), + 'sizeof': (SIZEOF := "SIZEOF"), + 'static': (STATIC := "STATIC"), + 'struct': (STRUCT := "STRUCT"), + 'switch': (SWITCH := "SWITCH"), + 'typedef': (TYPEDEF := "TYPEDEF"), + 'union': (UNION := "UNION"), + 'unsigned': (UNSIGNED := "UNSIGNED"), + 'void': (VOID := "VOID"), + 'volatile': (VOLATILE := "VOLATILE"), + 'while': (WHILE := "WHILE"), + # An instruction in the DSL. + 'inst': (INST := "INST"), + # A micro-op in the DSL. + 'op': (OP := "OP"), + # A macro in the DSL. + 'macro': (MACRO := "MACRO"), +} +__all__.extend(keywords.values()) +KEYWORD = 'KEYWORD' ANNOTATION = "ANNOTATION" annotations = { - "specializing", - "override", - "register", - "replaced", - "pure", - "split", - "replicate", - "tier1", - "tier2", + ANN_SPECIALIZING := "specializing", + ANN_OVERRIDE := "override", + ANN_REGISTER := "register", + ANN_REPLACED := "replaced", + ANN_PURE := "pure", + ANN_SPLIT := "split", + ANN_REPLICATE := "replicate", + ANN_TIER_1 := "tier1", + ANN_TIER_2 := "tier2", } -__all__ = [] -__all__.extend(kwds) - def make_syntax_error( message: str, @@ -307,7 +272,7 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]: elif text == "\n": linestart = start line += 1 - kind = "\n" + kind = NEWLINE elif text[0] == "'": kind = CHARACTER elif text[0] == "#": @@ -333,7 +298,7 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]: line += newlines else: begin = line, start - linestart - if kind != "\n": + if kind != NEWLINE: yield Token( filename, kind, text, begin, (line, start - linestart + len(text)) ) @@ -353,7 +318,7 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str: col = 1 + dedent res.append(" " * (c - col)) text = tkn.text - if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text: + if dedent != 0 and tkn.kind == COMMENT and "\n" in text: if dedent < 0: text = text.replace("\n", "\n" + " " * -dedent) # TODO: dedent > 0 From c7ce6df30a3dd70becb2b50811a3f16bf87d623b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:15:09 +0200 Subject: [PATCH 02/22] analyzer.py: use lexer constants --- Tools/cases_generator/analyzer.py | 62 +++++++++++++++++-------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 675dc0b9acaf45..b8b71a41af70bb 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1,9 +1,10 @@ -from dataclasses import dataclass -import lexer -import parser import re +from dataclasses import dataclass from typing import Optional +import lexer as lx +import parser + @dataclass class Properties: @@ -157,7 +158,7 @@ class Uop: annotations: list[str] stack: StackEffect caches: list[CacheEntry] - body: list[lexer.Token] + body: list[lx.Token] properties: Properties _size: int = -1 implicitly_created: bool = False @@ -182,7 +183,7 @@ def why_not_viable(self) -> str | None: return None # Adjusts next_instr, but only in tier 1 code if "INSTRUMENTED" in self.name: return "is instrumented" - if "replaced" in self.annotations: + if lx.ANN_REPLACED in self.annotations: return "is replaced" if self.name in ("INTERPRETER_EXIT", "JUMP_BACKWARD"): return "has tier 1 control flow" @@ -206,7 +207,7 @@ def is_viable(self) -> bool: def is_super(self) -> bool: for tkn in self.body: - if tkn.kind == "IDENTIFIER" and tkn.text == "oparg1": + if tkn.kind == lx.IDENTIFIER and tkn.text == "oparg1": return True return False @@ -287,17 +288,17 @@ class Analysis: min_instrumented: int -def analysis_error(message: str, tkn: lexer.Token) -> SyntaxError: +def analysis_error(message: str, tkn: lx.Token) -> SyntaxError: # To do -- support file and line output # Construct a SyntaxError instance from message and token - return lexer.make_syntax_error(message, tkn.filename, tkn.line, tkn.column, "") + return lx.make_syntax_error(message, tkn.filename, tkn.line, tkn.column, "") def override_error( name: str, context: parser.Context | None, prev_context: parser.Context | None, - token: lexer.Token, + token: lx.Token, ) -> SyntaxError: return analysis_error( f"Duplicate definition of '{name}' @ {context} " @@ -354,20 +355,22 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]: def variable_used(node: parser.InstDef, name: str) -> bool: """Determine whether a variable with a given name is used in a node.""" return any( - token.kind == "IDENTIFIER" and token.text == name for token in node.block.tokens + token.kind == lx.IDENTIFIER and token.text == name + for token in node.block.tokens ) def oparg_used(node: parser.InstDef) -> bool: """Determine whether `oparg` is used in a node.""" return any( - token.kind == "IDENTIFIER" and token.text == "oparg" for token in node.tokens + token.kind == lx.IDENTIFIER and token.text == "oparg" + for token in node.tokens ) def tier_variable(node: parser.InstDef) -> int | None: """Determine whether a tier variable is used in a node.""" for token in node.tokens: - if token.kind == "ANNOTATION": - if token.text == "specializing": + if token.kind == lx.ANNOTATION: + if token.text == lx.ANN_SPECIALIZING: return 1 if re.fullmatch(r"tier\d", token.text): return int(token.text[-1]) @@ -485,13 +488,13 @@ def makes_escaping_api_call(instr: parser.InstDef) -> bool: return True tkns = iter(instr.tokens) for tkn in tkns: - if tkn.kind != lexer.IDENTIFIER: + if tkn.kind != lx.IDENTIFIER: continue try: next_tkn = next(tkns) except StopIteration: return False - if next_tkn.kind != lexer.LPAREN: + if next_tkn.kind != lx.LPAREN: continue if tkn.text in ESCAPING_FUNCTIONS: return True @@ -528,18 +531,20 @@ def always_exits(op: parser.InstDef) -> bool: depth = 0 tkn_iter = iter(op.tokens) for tkn in tkn_iter: - if tkn.kind == "LBRACE": + if tkn.kind == lx.LBRACE: depth += 1 - elif tkn.kind == "RBRACE": + elif tkn.kind == lx.RBRACE: depth -= 1 elif depth > 1: continue - elif tkn.kind == "GOTO" or tkn.kind == "RETURN": + elif tkn.kind == lx.GOTO or tkn.kind == lx.RETURN: return True elif tkn.kind == "KEYWORD": + # XXX: This appears to be unreachable since we never + # set tkn.kind to "KEYWORD" if tkn.text in EXITS: return True - elif tkn.kind == "IDENTIFIER": + elif tkn.kind == lx.IDENTIFIER: if tkn.text in EXITS: return True if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": @@ -591,7 +596,7 @@ def compute_properties(op: parser.InstDef) -> Properties: exits_if = variable_used(op, "EXIT_IF") if deopts_if and exits_if: tkn = op.tokens[0] - raise lexer.make_syntax_error( + raise lx.make_syntax_error( "Op cannot contain both EXIT_IF and DEOPT_IF", tkn.filename, tkn.line, @@ -618,11 +623,12 @@ def compute_properties(op: parser.InstDef) -> Properties: uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL")) and not has_free, has_free=has_free, - pure="pure" in op.annotations, + pure=lx.ANN_PURE in op.annotations, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), ) +ANN_REPLICATED = re.compile(rf'^{re.escape(lx.ANN_REPLICATE)}\((\d+)\)$') def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uops: dict[str, Uop]) -> Uop: result = Uop( @@ -634,7 +640,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo body=op.block.tokens, properties=compute_properties(op), ) - if effect_depends_on_oparg_1(op) and "split" in op.annotations: + if effect_depends_on_oparg_1(op) and lx.ANN_SPLIT in op.annotations: result.properties.oparg_and_1 = True for bit in ("0", "1"): name_x = name + "_" + bit @@ -654,8 +660,8 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo rep.replicates = result uops[name_x] = rep for anno in op.annotations: - if anno.startswith("replicate"): - result.replicated = int(anno[10:-1]) + if match := ANN_REPLICATED.match(anno): + result.replicated = int(match.group(1)) break else: return result @@ -682,7 +688,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None: assert op.kind == "op" if op.name in uops: - if "override" not in op.annotations: + if lx.ANN_OVERRIDE not in op.annotations: raise override_error( op.name, op.context, uops[op.name].context, op.tokens[0] ) @@ -892,11 +898,11 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: for uop in uops.values(): tkn_iter = iter(uop.body) for tkn in tkn_iter: - if tkn.kind == "IDENTIFIER" and tkn.text == "GO_TO_INSTRUCTION": - if next(tkn_iter).kind != "LPAREN": + if tkn.kind == lx.IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION": + if next(tkn_iter).kind != lx.LPAREN: continue target = next(tkn_iter) - if target.kind != "IDENTIFIER": + if target.kind != lx.IDENTIFIER: continue if target.text in instructions: instructions[target.text].is_target = True From 194e0ed46c2ed2dbabaaf3666617a0632d34e1e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:15:21 +0200 Subject: [PATCH 03/22] parsing.py: use lexer constants --- Tools/cases_generator/parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 8957838f7a90a1..4e1da2e1641769 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -181,11 +181,11 @@ def inst_header(self) -> InstHeader | None: # | annotation* op(NAME, (inputs -- outputs)) annotations = [] while anno := self.expect(lx.ANNOTATION): - if anno.text == "replicate": + if anno.text == lx.ANN_REPLICATE: self.require(lx.LPAREN) times = self.require(lx.NUMBER) self.require(lx.RPAREN) - annotations.append(f"replicate({times.text})") + annotations.append(f"{lx.ANN_REPLICATE}({times.text})") else: annotations.append(anno.text) tkn = self.expect(lx.INST) From d069e1c215b8a19e4dca50f42f8ec1ed811f7db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:15:48 +0200 Subject: [PATCH 04/22] cwriter.py: use lexer constants --- Tools/cases_generator/cwriter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/cwriter.py b/Tools/cases_generator/cwriter.py index 069f0177a74018..f1e5dc5d74a6ba 100644 --- a/Tools/cases_generator/cwriter.py +++ b/Tools/cases_generator/cwriter.py @@ -1,5 +1,5 @@ import contextlib -from lexer import Token +from lexer import COMMENT, Token from typing import TextIO, Iterator @@ -86,7 +86,7 @@ def emit_multiline_comment(self, tkn: Token) -> None: self.out.write(text) def emit_token(self, tkn: Token) -> None: - if tkn.kind == "COMMENT" and "\n" in tkn.text: + if tkn.kind == COMMENT and "\n" in tkn.text: return self.emit_multiline_comment(tkn) self.maybe_dedent(tkn.text) self.set_position(tkn) From b4ae7c10464c7d7c44e45ed71c9df02ac70451a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:17:30 +0200 Subject: [PATCH 05/22] generators_common: use lexer constants --- Tools/cases_generator/generators_common.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 587dc0d03eded5..618953b9e92cd5 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -9,7 +9,7 @@ ) from cwriter import CWriter from typing import Callable, Mapping, TextIO, Iterator, Tuple -from lexer import Token +from lexer import COMMA, IDENTIFIER, LPAREN, RPAREN, Token from stack import Stack @@ -51,9 +51,9 @@ def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str) -> None: for tkn in tkn_iter: if tkn.kind == end and parens == 0: return - if tkn.kind == "LPAREN": + if tkn.kind == LPAREN: parens += 1 - if tkn.kind == "RPAREN": + if tkn.kind == RPAREN: parens -= 1 out.emit(tkn) @@ -68,7 +68,7 @@ def replace_deopt( ) -> None: out.emit_at("DEOPT_IF", tkn) out.emit(next(tkn_iter)) - emit_to(out, tkn_iter, "RPAREN") + emit_to(out, tkn_iter, RPAREN) next(tkn_iter) # Semi colon out.emit(", ") assert inst is not None @@ -87,7 +87,7 @@ def replace_error( ) -> None: out.emit_at("if ", tkn) out.emit(next(tkn_iter)) - emit_to(out, tkn_iter, "COMMA") + emit_to(out, tkn_iter, COMMA) label = next(tkn_iter).text next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon @@ -208,7 +208,7 @@ def emit_tokens( tkn_iter = iter(tkns) out.start_line() for tkn in tkn_iter: - if tkn.kind == "IDENTIFIER" and tkn.text in replacement_functions: + if tkn.kind == IDENTIFIER and tkn.text in replacement_functions: replacement_functions[tkn.text](out, tkn, tkn_iter, uop, stack, inst) else: out.emit(tkn) From 950e6dc8a2c17428bf828b92a5775f930fb6208e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:18:17 +0200 Subject: [PATCH 06/22] opcode_metadata_generator.py: use lexer constants --- Tools/cases_generator/opcode_metadata_generator.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 09b9d3d211eb24..9c49ccd5985537 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -19,6 +19,7 @@ cflags, ) from cwriter import CWriter +from lexer import ANN_REPLACED, ANN_SPECIALIZING from typing import TextIO from stack import get_stack_effect @@ -245,9 +246,9 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: size = OPARG_KINDS["OPARG_SAVE_RETURN_OFFSET"] if isinstance(part, Uop): # Skip specializations - if "specializing" in part.annotations: + if ANN_SPECIALIZING in part.annotations: continue - if "replaced" in part.annotations: + if ANN_REPLACED in part.annotations: size = OPARG_KINDS["OPARG_REPLACED"] expansions.append((part.name, size, offset if size else 0)) offset += part.size @@ -282,9 +283,9 @@ def is_viable_expansion(inst: Instruction) -> bool: for part in inst.parts: if isinstance(part, Uop): # Skip specializing and replaced uops - if "specializing" in part.annotations: + if ANN_SPECIALIZING in part.annotations: continue - if "replaced" in part.annotations: + if ANN_REPLACED in part.annotations: continue if part.properties.tier == 1 or not part.is_viable(): return False From c854c0936b4c732b5397a925866e535aaf8eec38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:18:35 +0200 Subject: [PATCH 07/22] plexer.py: use lexer constants --- Tools/cases_generator/plexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/plexer.py b/Tools/cases_generator/plexer.py index cb6c5375866490..cda490f5f0cf68 100644 --- a/Tools/cases_generator/plexer.py +++ b/Tools/cases_generator/plexer.py @@ -34,7 +34,7 @@ def next(self, raw: bool = False) -> Token | None: while self.pos < len(self.tokens): tok = self.tokens[self.pos] self.pos += 1 - if raw or tok.kind != "COMMENT": + if raw or tok.kind != lx.COMMENT: return tok return None From cfc058ffb18b8541c0e51775b437557c25b2266a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:26:59 +0200 Subject: [PATCH 08/22] cosmetic change for imports --- Tools/cases_generator/generators_common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 618953b9e92cd5..1889d633a2e0f7 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import TextIO +from typing import Callable, Mapping, TextIO, Iterator, Tuple from analyzer import ( Instruction, @@ -8,7 +8,6 @@ StackItem, ) from cwriter import CWriter -from typing import Callable, Mapping, TextIO, Iterator, Tuple from lexer import COMMA, IDENTIFIER, LPAREN, RPAREN, Token from stack import Stack From fc3f5039e2b0c20a1ecf52af332195a537678b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:40:50 +0200 Subject: [PATCH 09/22] make mypy happy --- Tools/cases_generator/lexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 01fbaa4aee9720..eb6a1ab768063d 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -2,7 +2,7 @@ # Originally by Mark Shannon (mark@hotpy.org) # https://gist.github.com/markshannon/db7ab649440b5af765451bb77c7dba34 -__all__ = [] +__all__: list[str] = [] import re from dataclasses import dataclass From f3e009b34b891ed5f7f3321e8c5f6d7b618c31fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 2 Aug 2024 13:23:46 +0200 Subject: [PATCH 10/22] mypy --- Tools/cases_generator/analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 1cb19d85bc87c9..34d051c4b1bccd 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -217,7 +217,7 @@ def is_super(self) -> bool: @dataclass class Instruction: - where: lexer.Token + where: lx.Token name: str parts: list[Part] _properties: Properties | None @@ -697,7 +697,7 @@ def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None: def add_instruction( - where: lexer.Token, name: str, parts: list[Part], + where: lx.Token, name: str, parts: list[Part], instructions: dict[str, Instruction] ) -> None: instructions[name] = Instruction(where, name, parts, None) From 92dff0850f4bad6e78e55ee31801c6b8018485bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:28:47 +0200 Subject: [PATCH 11/22] revert `import lexer as lx` --- Tools/cases_generator/analyzer.py | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 5c32296772289b..751c91354e4238 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from typing import Optional -import lexer as lx +import lexer import parser @@ -185,7 +185,7 @@ def why_not_viable(self) -> str | None: return None # Adjusts next_instr, but only in tier 1 code if "INSTRUMENTED" in self.name: return "is instrumented" - if lx.ANN_REPLACED in self.annotations: + if lexer.ANN_REPLACED in self.annotations: return "is replaced" if self.name in ("INTERPRETER_EXIT", "JUMP_BACKWARD"): return "has tier 1 control flow" @@ -209,7 +209,7 @@ def is_viable(self) -> bool: def is_super(self) -> bool: for tkn in self.body: - if tkn.kind == lx.IDENTIFIER and tkn.text == "oparg1": + if tkn.kind == lexer.IDENTIFIER and tkn.text == "oparg1": return True return False @@ -219,7 +219,7 @@ def is_super(self) -> bool: @dataclass class Instruction: - where: lx.Token + where: lexer.Token name: str parts: list[Part] _properties: Properties | None @@ -291,17 +291,17 @@ class Analysis: min_instrumented: int -def analysis_error(message: str, tkn: lx.Token) -> SyntaxError: +def analysis_error(message: str, tkn: lexer.Token) -> SyntaxError: # To do -- support file and line output # Construct a SyntaxError instance from message and token - return lx.make_syntax_error(message, tkn.filename, tkn.line, tkn.column, "") + return lexer.make_syntax_error(message, tkn.filename, tkn.line, tkn.column, "") def override_error( name: str, context: parser.Context | None, prev_context: parser.Context | None, - token: lx.Token, + token: lexer.Token, ) -> SyntaxError: return analysis_error( f"Duplicate definition of '{name}' @ {context} " @@ -418,7 +418,7 @@ def find_assignment_target(idx: int) -> list[lexer.Token]: def variable_used(node: parser.InstDef, name: str) -> bool: """Determine whether a variable with a given name is used in a node.""" return any( - token.kind == lx.IDENTIFIER and token.text == name + token.kind == lexer.IDENTIFIER and token.text == name for token in node.block.tokens ) @@ -426,7 +426,7 @@ def variable_used(node: parser.InstDef, name: str) -> bool: def oparg_used(node: parser.InstDef) -> bool: """Determine whether `oparg` is used in a node.""" return any( - token.kind == lx.IDENTIFIER and token.text == "oparg" + token.kind == lexer.IDENTIFIER and token.text == "oparg" for token in node.tokens ) @@ -434,8 +434,8 @@ def oparg_used(node: parser.InstDef) -> bool: def tier_variable(node: parser.InstDef) -> int | None: """Determine whether a tier variable is used in a node.""" for token in node.tokens: - if token.kind == lx.ANNOTATION: - if token.text == lx.ANN_SPECIALIZING: + if token.kind == lexer.ANNOTATION: + if token.text == lexer.ANN_SPECIALIZING: return 1 if re.fullmatch(r"tier\d", token.text): return int(token.text[-1]) @@ -555,13 +555,13 @@ def makes_escaping_api_call(instr: parser.InstDef) -> bool: return True tkns = iter(instr.tokens) for tkn in tkns: - if tkn.kind != lx.IDENTIFIER: + if tkn.kind != lexer.IDENTIFIER: continue try: next_tkn = next(tkns) except StopIteration: return False - if next_tkn.kind != lx.LPAREN: + if next_tkn.kind != lexer.LPAREN: continue if tkn.text in ESCAPING_FUNCTIONS: return True @@ -598,20 +598,20 @@ def always_exits(op: parser.InstDef) -> bool: depth = 0 tkn_iter = iter(op.tokens) for tkn in tkn_iter: - if tkn.kind == lx.LBRACE: + if tkn.kind == lexer.LBRACE: depth += 1 - elif tkn.kind == lx.RBRACE: + elif tkn.kind == lexer.RBRACE: depth -= 1 elif depth > 1: continue - elif tkn.kind == lx.GOTO or tkn.kind == lx.RETURN: + elif tkn.kind == lexer.GOTO or tkn.kind == lexer.RETURN: return True elif tkn.kind == "KEYWORD": # XXX: This appears to be unreachable since we never # set tkn.kind to "KEYWORD" if tkn.text in EXITS: return True - elif tkn.kind == lx.IDENTIFIER: + elif tkn.kind == lexer.IDENTIFIER: if tkn.text in EXITS: return True if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": @@ -666,7 +666,7 @@ def compute_properties(op: parser.InstDef) -> Properties: exits_if = variable_used(op, "EXIT_IF") if deopts_if and exits_if: tkn = op.tokens[0] - raise lx.make_syntax_error( + raise lexer.make_syntax_error( "Op cannot contain both EXIT_IF and DEOPT_IF", tkn.filename, tkn.line, @@ -693,12 +693,12 @@ def compute_properties(op: parser.InstDef) -> Properties: uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL")) and not has_free, has_free=has_free, - pure=lx.ANN_PURE in op.annotations, + pure=lexer.ANN_PURE in op.annotations, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), ) -ANN_REPLICATED = re.compile(rf'^{re.escape(lx.ANN_REPLICATE)}\((\d+)\)$') +ANN_REPLICATED = re.compile(rf'^{re.escape(lexer.ANN_REPLICATE)}\((\d+)\)$') def make_uop( name: str, @@ -716,7 +716,7 @@ def make_uop( body=op.block.tokens, properties=compute_properties(op), ) - if effect_depends_on_oparg_1(op) and lx.ANN_SPLIT in op.annotations: + if effect_depends_on_oparg_1(op) and lexer.ANN_SPLIT in op.annotations: result.properties.oparg_and_1 = True for bit in ("0", "1"): name_x = name + "_" + bit @@ -768,7 +768,7 @@ def make_uop( def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None: assert op.kind == "op" if op.name in uops: - if lx.ANN_OVERRIDE not in op.annotations: + if lexer.ANN_OVERRIDE not in op.annotations: raise override_error( op.name, op.context, uops[op.name].context, op.tokens[0] ) @@ -983,11 +983,11 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: for uop in uops.values(): tkn_iter = iter(uop.body) for tkn in tkn_iter: - if tkn.kind == lx.IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION": - if next(tkn_iter).kind != lx.LPAREN: + if tkn.kind == lexer.IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION": + if next(tkn_iter).kind != lexer.LPAREN: continue target = next(tkn_iter) - if target.kind != lx.IDENTIFIER: + if target.kind != lexer.IDENTIFIER: continue if target.text in instructions: instructions[target.text].is_target = True From 14aa67a45a35b136e19dd2b325aa631e3b7db727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:33:53 +0200 Subject: [PATCH 12/22] revert cosmetic change --- Tools/cases_generator/analyzer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 751c91354e4238..d2dfa9c3340524 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1,9 +1,8 @@ -import re from dataclasses import dataclass -from typing import Optional - import lexer import parser +import re +from typing import Optional @dataclass From 08bfa721f9ad87b40af79893f7f7039124873ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:30:59 +0100 Subject: [PATCH 13/22] cleanup imports --- Tools/cases_generator/generators_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 1bec11fe7455c9..f4756df036ed15 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Callable, Mapping, TextIO, Iterator +from typing import TextIO from analyzer import ( Instruction, From c5d04c6103a5adc0c635638fd8c75ead2de31965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:43:43 +0100 Subject: [PATCH 14/22] update usage --- Tools/cases_generator/analyzer.py | 95 +++++++++++----------- Tools/cases_generator/cwriter.py | 4 +- Tools/cases_generator/generators_common.py | 43 +++++----- 3 files changed, 72 insertions(+), 70 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 02952826764203..c2c9b766790dc6 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -3,6 +3,7 @@ import lexer import parser import re +from lexer import * from typing import Optional @dataclass @@ -194,7 +195,7 @@ def why_not_viable(self) -> str | None: return None # Adjusts next_instr, but only in tier 1 code if "INSTRUMENTED" in self.name: return "is instrumented" - if lexer.ANN_REPLACED in self.annotations: + if ANN_REPLACED in self.annotations: return "is replaced" if self.name in ("INTERPRETER_EXIT", "JUMP_BACKWARD"): return "has tier 1 control flow" @@ -211,7 +212,7 @@ def is_viable(self) -> bool: def is_super(self) -> bool: for tkn in self.body: - if tkn.kind == lexer.IDENTIFIER and tkn.text == "oparg1": + if tkn.kind == IDENTIFIER and tkn.text == "oparg1": return True return False @@ -384,7 +385,7 @@ def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]: """Find the tokens that make up the left-hand side of an assignment""" offset = 0 for tkn in reversed(node.block.tokens[: idx]): - if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: + if tkn.kind in {SEMI, LBRACE, RBRACE}: return node.block.tokens[idx - offset : idx] offset += 1 return [] @@ -395,17 +396,17 @@ def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]: outnames = { out.name for out in node.outputs } innames = { out.name for out in node.inputs } for idx, tkn in enumerate(node.block.tokens): - if tkn.kind == "AND": + if tkn.kind == AND: name = node.block.tokens[idx+1] if name.text in outnames: res.append(name) - if tkn.kind != "EQUALS": + if tkn.kind != EQUALS: continue lhs = find_assignment_target(node, idx) assert lhs - while lhs and lhs[0].kind == "COMMENT": + while lhs and lhs[0].kind == COMMENT: lhs = lhs[1:] - if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER": + if len(lhs) != 1 or lhs[0].kind != IDENTIFIER: continue name = lhs[0] if name.text in innames: @@ -419,18 +420,18 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None] def in_frame_push(idx: int) -> bool: for tkn in reversed(node.block.tokens[: idx - 1]): - if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: + if tkn.kind in {SEMI, LBRACE, RBRACE}: return False - if tkn.kind == "IDENTIFIER" and tkn.text == "_PyFrame_PushUnchecked": + if tkn.kind == IDENTIFIER and tkn.text == "_PyFrame_PushUnchecked": return True return False refs: dict[lexer.Token, str | None] = {} for idx, tkn in enumerate(node.block.tokens): - if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew": + if tkn.kind != IDENTIFIER or tkn.text != "PyStackRef_FromPyObjectNew": continue - if idx == 0 or node.block.tokens[idx - 1].kind != "EQUALS": + if idx == 0 or node.block.tokens[idx - 1].kind != EQUALS: if in_frame_push(idx): # PyStackRef_FromPyObjectNew() is called in _PyFrame_PushUnchecked() refs[tkn] = None @@ -443,15 +444,15 @@ def in_frame_push(idx: int) -> bool: "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn ) - if lhs[0].kind == "TIMES" or any( - t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:] + if lhs[0].kind == TIMES or any( + t.kind == ARROW or t.kind == LBRACKET for t in lhs[1:] ): # Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ... # Assume that they are visible to the GC. refs[tkn] = None continue - if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER": + if len(lhs) != 1 or lhs[0].kind != IDENTIFIER: raise analysis_error( "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn ) @@ -475,7 +476,7 @@ def in_frame_push(idx: int) -> bool: def variable_used(node: parser.InstDef, name: str) -> bool: """Determine whether a variable with a given name is used in a node.""" return any( - token.kind == lexer.IDENTIFIER and token.text == name + token.kind == IDENTIFIER and token.text == name for token in node.block.tokens ) @@ -483,7 +484,7 @@ def variable_used(node: parser.InstDef, name: str) -> bool: def oparg_used(node: parser.InstDef) -> bool: """Determine whether `oparg` is used in a node.""" return any( - token.kind == lexer.IDENTIFIER and token.text == "oparg" + token.kind == IDENTIFIER and token.text == "oparg" for token in node.tokens ) @@ -491,8 +492,8 @@ def oparg_used(node: parser.InstDef) -> bool: def tier_variable(node: parser.InstDef) -> int | None: """Determine whether a tier variable is used in a node.""" for token in node.tokens: - if token.kind == lexer.ANNOTATION: - if token.text == lexer.ANN_SPECIALIZING: + if token.kind == ANNOTATION: + if token.text == ANN_SPECIALIZING: return 1 if re.fullmatch(r"tier\d", token.text): return int(token.text[-1]) @@ -639,11 +640,11 @@ def find_stmt_start(node: parser.InstDef, idx: int) -> lexer.Token: assert idx < len(node.block.tokens) while True: tkn = node.block.tokens[idx-1] - if tkn.kind in {"SEMI", "LBRACE", "RBRACE", "CMACRO"}: + if tkn.kind in {SEMI, LBRACE, RBRACE, CMACRO}: break idx -= 1 assert idx > 0 - while node.block.tokens[idx].kind == "COMMENT": + while node.block.tokens[idx].kind == COMMENT: idx += 1 return node.block.tokens[idx] @@ -653,7 +654,7 @@ def find_stmt_end(node: parser.InstDef, idx: int) -> lexer.Token: while True: idx += 1 tkn = node.block.tokens[idx] - if tkn.kind == "SEMI": + if tkn.kind == SEMI: return node.block.tokens[idx+1] def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]) -> None: @@ -661,15 +662,15 @@ def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple in_if = 0 tkn_iter = iter(instr.block.tokens) for tkn in tkn_iter: - if tkn.kind == "IF": + if tkn.kind == IF: next(tkn_iter) in_if = 1 - if tkn.kind == "IDENTIFIER" and tkn.text in ("DEOPT_IF", "ERROR_IF"): + if tkn.kind == IDENTIFIER and tkn.text in ("DEOPT_IF", "ERROR_IF"): next(tkn_iter) in_if = 1 - elif tkn.kind == "LPAREN" and in_if: + elif tkn.kind == LPAREN and in_if: in_if += 1 - elif tkn.kind == "RPAREN": + elif tkn.kind == RPAREN: if in_if: in_if -= 1 elif tkn in calls and in_if: @@ -683,11 +684,11 @@ def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[le next_tkn = tokens[idx+1] except IndexError: break - if tkn.kind == "SWITCH": + if tkn.kind == SWITCH: raise analysis_error(f"switch statements are not supported due to their complex flow control. Sorry.", tkn) - if next_tkn.kind != lexer.LPAREN: + if next_tkn.kind != LPAREN: continue - if tkn.kind == lexer.IDENTIFIER: + if tkn.kind == IDENTIFIER: if tkn.text.upper() == tkn.text: # simple macro continue @@ -704,12 +705,12 @@ def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[le continue if tkn.text in NON_ESCAPING_FUNCTIONS: continue - elif tkn.kind == "RPAREN": + elif tkn.kind == RPAREN: prev = tokens[idx-1] if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": #cast continue - elif tkn.kind != "RBRACKET": + elif tkn.kind != RBRACKET: continue start = find_stmt_start(instr, idx) end = find_stmt_end(instr, idx) @@ -731,20 +732,20 @@ def always_exits(op: parser.InstDef) -> bool: depth = 0 tkn_iter = iter(op.tokens) for tkn in tkn_iter: - if tkn.kind == lexer.LBRACE: + if tkn.kind == LBRACE: depth += 1 - elif tkn.kind == lexer.RBRACE: + elif tkn.kind == RBRACE: depth -= 1 elif depth > 1: continue - elif tkn.kind == lexer.GOTO or tkn.kind == lexer.RETURN: + elif tkn.kind == GOTO or tkn.kind == RETURN: return True - elif tkn.kind == "KEYWORD": + elif tkn.kind == KEYWORD: # XXX: This appears to be unreachable since we never - # set tkn.kind to "KEYWORD" + # set tkn.kind to KEYWORD if tkn.text in EXITS: return True - elif tkn.kind == lexer.IDENTIFIER: + elif tkn.kind == IDENTIFIER: if tkn.text in EXITS: return True if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": @@ -826,12 +827,12 @@ def compute_properties(op: parser.InstDef) -> Properties: uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL")) and not has_free, has_free=has_free, - pure=lexer.ANN_PURE in op.annotations, + pure=ANN_PURE in op.annotations, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), ) -ANN_REPLICATED = re.compile(rf'^{re.escape(lexer.ANN_REPLICATE)}\((\d+)\)$') +ANN_REPLICATED = re.compile(rf'^{re.escape(ANN_REPLICATE)}\((\d+)\)$') def make_uop( name: str, @@ -850,7 +851,7 @@ def make_uop( body=op.block.tokens, properties=compute_properties(op), ) - if effect_depends_on_oparg_1(op) and lexer.ANN_SPLIT in op.annotations: + if effect_depends_on_oparg_1(op) and ANN_SPLIT in op.annotations: result.properties.oparg_and_1 = True for bit in ("0", "1"): name_x = name + "_" + bit @@ -902,9 +903,9 @@ def make_uop( def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None: - assert op.kind == "op" + assert op.kind == OP if op.name in uops: - if lexer.ANN_OVERRIDE not in op.annotations: + if ANN_OVERRIDE not in op.annotations: raise override_error( op.name, op.context, uops[op.name].context, op.tokens[0] ) @@ -923,7 +924,7 @@ def add_instruction( def desugar_inst( inst: parser.InstDef, instructions: dict[str, Instruction], uops: dict[str, Uop] ) -> None: - assert inst.kind == "inst" + assert inst.kind == INST name = inst.name op_inputs: list[parser.InputEffect] = [] parts: list[Part] = [] @@ -1123,10 +1124,10 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: for node in forest: match node: case parser.InstDef(name): - if node.kind == "inst": + if node.kind == INST: desugar_inst(node, instructions, uops) else: - assert node.kind == "op" + assert node.kind == OP add_op(node, uops) case parser.Macro(): pass @@ -1150,11 +1151,11 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: for uop in uops.values(): tkn_iter = iter(uop.body) for tkn in tkn_iter: - if tkn.kind == lexer.IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION": - if next(tkn_iter).kind != lexer.LPAREN: + if tkn.kind == IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION": + if next(tkn_iter).kind != LPAREN: continue target = next(tkn_iter) - if target.kind != lexer.IDENTIFIER: + if target.kind != IDENTIFIER: continue if target.text in instructions: instructions[target.text].is_target = True diff --git a/Tools/cases_generator/cwriter.py b/Tools/cases_generator/cwriter.py index ebc41df0e6c277..8c4a973261ee66 100644 --- a/Tools/cases_generator/cwriter.py +++ b/Tools/cases_generator/cwriter.py @@ -1,5 +1,5 @@ import contextlib -from lexer import COMMENT, Token +from lexer import COMMENT, CMACRO, Token from typing import TextIO, Iterator @@ -92,7 +92,7 @@ def emit_token(self, tkn: Token) -> None: self.maybe_dedent(tkn.text) self.set_position(tkn) self.emit_text(tkn.text) - if tkn.kind == "CMACRO": + if tkn.kind == CMACRO: self.newline = True self.maybe_indent(tkn.text) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index f4756df036ed15..7d72cbffa84b2c 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -10,7 +10,8 @@ ) from cwriter import CWriter from typing import Callable, TextIO, Iterator, Iterable -from lexer import COMMA, IDENTIFIER, LPAREN, RPAREN, Token +from lexer import * +from lexer import Token from stack import Storage, StackError # Set this to true for voluminous output showing state of stack and locals @@ -118,7 +119,7 @@ def __init__(self, out: CWriter): "PyStackRef_CLOSE": self.stackref_close, "PyStackRef_CLOSE_SPECIALIZED": self.stackref_close, "PyStackRef_AsPyObjectSteal": self.stackref_steal, - "DISPATCH": self.dispatch, + DISPATCH: self.dispatch, "INSTRUCTION_SIZE": self.instruction_size, } self.out = out @@ -145,9 +146,9 @@ def deopt_if( self.out.emit_at("DEOPT_IF", tkn) lparen = next(tkn_iter) self.emit(lparen) - assert lparen.kind == "LPAREN" + assert lparen.kind == LPAREN first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "RPAREN") + emit_to(self.out, tkn_iter, LPAREN) next(tkn_iter) # Semi colon self.out.emit(", ") assert inst is not None @@ -167,19 +168,19 @@ def error_if( inst: Instruction | None, ) -> bool: lparen = next(tkn_iter) - assert lparen.kind == "LPAREN" + assert lparen.kind == LPAREN first_tkn = tkn_iter.peek() unconditional = always_true(first_tkn) if unconditional: next(tkn_iter) comma = next(tkn_iter) - if comma.kind != "COMMA": + if comma.kind != COMMA: raise analysis_error(f"Expected comma, got '{comma.text}'", comma) self.out.start_line() else: self.out.emit_at("if ", tkn) self.emit(lparen) - emit_to(self.out, tkn_iter, "COMMA") + emit_to(self.out, tkn_iter, COMMA) self.out.emit(") ") label = next(tkn_iter).text next(tkn_iter) # RPAREN @@ -300,15 +301,15 @@ def stackref_close( ) -> bool: self.out.emit(tkn) tkn = next(tkn_iter) - assert tkn.kind == "LPAREN" + assert tkn.kind == LPAREN self.out.emit(tkn) name = next(tkn_iter) self.out.emit(name) - if name.kind == "IDENTIFIER": + if name.kind == IDENTIFIER: for var in storage.inputs: if var.name == name.text: var.defined = False - rparen = emit_to(self.out, tkn_iter, "RPAREN") + rparen = emit_to(self.out, tkn_iter, RPAREN) self.emit(rparen) return True @@ -394,20 +395,20 @@ def _emit_if( ) -> tuple[bool, Token, Storage]: """Returns (reachable?, closing '}', stack).""" tkn = next(tkn_iter) - assert tkn.kind == "LPAREN" + assert tkn.kind == LPAREN self.out.emit(tkn) - rparen = emit_to(self.out, tkn_iter, "RPAREN") + rparen = emit_to(self.out, tkn_iter, LPAREN) self.emit(rparen) if_storage = storage.copy() reachable, rbrace, if_storage = self._emit_block(tkn_iter, uop, if_storage, inst, True) try: maybe_else = tkn_iter.peek() - if maybe_else and maybe_else.kind == "ELSE": + if maybe_else and maybe_else.kind == ELSE: self._print_storage(storage) self.emit(rbrace) self.emit(next(tkn_iter)) maybe_if = tkn_iter.peek() - if maybe_if and maybe_if.kind == "IF": + if maybe_if and maybe_if.kind == IF: #Emit extra braces around the if to get scoping right self.emit(" {\n") self.emit(next(tkn_iter)) @@ -461,7 +462,7 @@ def _emit_block( try: reachable = True line : int = -1 - if tkn.kind != "LBRACE": + if tkn.kind != LBRACE: raise analysis_error(f"PEP 7: expected '{{', found: {tkn.text}", tkn) escaping_calls = uop.properties.escaping_calls if emit_first_brace: @@ -479,19 +480,19 @@ def _emit_block( _, reload = escaping_calls[tkn] elif tkn == reload: self.emit_reload(storage) - if tkn.kind == "LBRACE": + if tkn.kind == LBRACE: self.out.emit(tkn) braces += 1 - elif tkn.kind == "RBRACE": + elif tkn.kind == RBRACE: self._print_storage(storage) braces -= 1 if braces == 0: return reachable, tkn, storage self.out.emit(tkn) - elif tkn.kind == "GOTO": + elif tkn.kind == GOTO: reachable = False; self.out.emit(tkn) - elif tkn.kind == "IDENTIFIER": + elif tkn.kind == IDENTIFIER: if tkn.text in self._replacers: if not self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst): reachable = False @@ -502,11 +503,11 @@ def _emit_block( out.defined = True out.in_memory = False break - if tkn.text.startswith("DISPATCH"): + if tkn.text.startswith(DISPATCH): self._print_storage(storage) reachable = False self.out.emit(tkn) - elif tkn.kind == "IF": + elif tkn.kind == IF: self.out.emit(tkn) if_reachable, rbrace, storage = self._emit_if(tkn_iter, uop, storage, inst) if reachable: From 267bffec5305a809db1d1f5c36d6c047e87297d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:47:15 +0100 Subject: [PATCH 15/22] fixup --- Tools/cases_generator/analyzer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index c2c9b766790dc6..49a36b5ec7ec14 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -903,7 +903,7 @@ def make_uop( def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None: - assert op.kind == OP + assert op.kind == "op" if op.name in uops: if ANN_OVERRIDE not in op.annotations: raise override_error( @@ -924,7 +924,7 @@ def add_instruction( def desugar_inst( inst: parser.InstDef, instructions: dict[str, Instruction], uops: dict[str, Uop] ) -> None: - assert inst.kind == INST + assert inst.kind == "inst" name = inst.name op_inputs: list[parser.InputEffect] = [] parts: list[Part] = [] @@ -1124,10 +1124,10 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: for node in forest: match node: case parser.InstDef(name): - if node.kind == INST: + if node.kind == "inst": desugar_inst(node, instructions, uops) else: - assert node.kind == OP + assert node.kind == "op" add_op(node, uops) case parser.Macro(): pass From dc2e8d6e86ab42d61cd26d9ec416b418cd6cebe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:04:29 +0100 Subject: [PATCH 16/22] use FQN --- Tools/cases_generator/analyzer.py | 88 +++++++++++----------- Tools/cases_generator/generators_common.py | 46 +++++------ 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 49a36b5ec7ec14..18364f7bbc0772 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1,9 +1,9 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass import itertools import lexer +import lexer as lx # for constants import parser import re -from lexer import * from typing import Optional @dataclass @@ -195,7 +195,7 @@ def why_not_viable(self) -> str | None: return None # Adjusts next_instr, but only in tier 1 code if "INSTRUMENTED" in self.name: return "is instrumented" - if ANN_REPLACED in self.annotations: + if lx.ANN_REPLACED in self.annotations: return "is replaced" if self.name in ("INTERPRETER_EXIT", "JUMP_BACKWARD"): return "has tier 1 control flow" @@ -212,7 +212,7 @@ def is_viable(self) -> bool: def is_super(self) -> bool: for tkn in self.body: - if tkn.kind == IDENTIFIER and tkn.text == "oparg1": + if tkn.kind == lx.IDENTIFIER and tkn.text == "oparg1": return True return False @@ -385,7 +385,7 @@ def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]: """Find the tokens that make up the left-hand side of an assignment""" offset = 0 for tkn in reversed(node.block.tokens[: idx]): - if tkn.kind in {SEMI, LBRACE, RBRACE}: + if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE}: return node.block.tokens[idx - offset : idx] offset += 1 return [] @@ -396,17 +396,17 @@ def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]: outnames = { out.name for out in node.outputs } innames = { out.name for out in node.inputs } for idx, tkn in enumerate(node.block.tokens): - if tkn.kind == AND: + if tkn.kind == lx.AND: name = node.block.tokens[idx+1] if name.text in outnames: res.append(name) - if tkn.kind != EQUALS: + if tkn.kind != lx.EQUALS: continue lhs = find_assignment_target(node, idx) assert lhs - while lhs and lhs[0].kind == COMMENT: + while lhs and lhs[0].kind == lx.COMMENT: lhs = lhs[1:] - if len(lhs) != 1 or lhs[0].kind != IDENTIFIER: + if len(lhs) != 1 or lhs[0].kind != lx.IDENTIFIER: continue name = lhs[0] if name.text in innames: @@ -420,18 +420,18 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None] def in_frame_push(idx: int) -> bool: for tkn in reversed(node.block.tokens[: idx - 1]): - if tkn.kind in {SEMI, LBRACE, RBRACE}: + if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE}: return False - if tkn.kind == IDENTIFIER and tkn.text == "_PyFrame_PushUnchecked": + if tkn.kind == lx.IDENTIFIER and tkn.text == "_PyFrame_PushUnchecked": return True return False refs: dict[lexer.Token, str | None] = {} for idx, tkn in enumerate(node.block.tokens): - if tkn.kind != IDENTIFIER or tkn.text != "PyStackRef_FromPyObjectNew": + if tkn.kind != lx.IDENTIFIER or tkn.text != "PyStackRef_FromPyObjectNew": continue - if idx == 0 or node.block.tokens[idx - 1].kind != EQUALS: + if idx == 0 or node.block.tokens[idx - 1].kind != lx.EQUALS: if in_frame_push(idx): # PyStackRef_FromPyObjectNew() is called in _PyFrame_PushUnchecked() refs[tkn] = None @@ -444,15 +444,15 @@ def in_frame_push(idx: int) -> bool: "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn ) - if lhs[0].kind == TIMES or any( - t.kind == ARROW or t.kind == LBRACKET for t in lhs[1:] + if lhs[0].kind == lx.TIMES or any( + t.kind == lx.ARROW or t.kind == lx.LBRACKET for t in lhs[1:] ): # Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ... # Assume that they are visible to the GC. refs[tkn] = None continue - if len(lhs) != 1 or lhs[0].kind != IDENTIFIER: + if len(lhs) != 1 or lhs[0].kind != lx.IDENTIFIER: raise analysis_error( "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn ) @@ -476,7 +476,7 @@ def in_frame_push(idx: int) -> bool: def variable_used(node: parser.InstDef, name: str) -> bool: """Determine whether a variable with a given name is used in a node.""" return any( - token.kind == IDENTIFIER and token.text == name + token.kind == lx.IDENTIFIER and token.text == name for token in node.block.tokens ) @@ -484,7 +484,7 @@ def variable_used(node: parser.InstDef, name: str) -> bool: def oparg_used(node: parser.InstDef) -> bool: """Determine whether `oparg` is used in a node.""" return any( - token.kind == IDENTIFIER and token.text == "oparg" + token.kind == lx.IDENTIFIER and token.text == "oparg" for token in node.tokens ) @@ -492,8 +492,8 @@ def oparg_used(node: parser.InstDef) -> bool: def tier_variable(node: parser.InstDef) -> int | None: """Determine whether a tier variable is used in a node.""" for token in node.tokens: - if token.kind == ANNOTATION: - if token.text == ANN_SPECIALIZING: + if token.kind == lx.ANNOTATION: + if token.text == lx.ANN_SPECIALIZING: return 1 if re.fullmatch(r"tier\d", token.text): return int(token.text[-1]) @@ -640,11 +640,11 @@ def find_stmt_start(node: parser.InstDef, idx: int) -> lexer.Token: assert idx < len(node.block.tokens) while True: tkn = node.block.tokens[idx-1] - if tkn.kind in {SEMI, LBRACE, RBRACE, CMACRO}: + if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE, lx.CMACRO}: break idx -= 1 assert idx > 0 - while node.block.tokens[idx].kind == COMMENT: + while node.block.tokens[idx].kind == lx.COMMENT: idx += 1 return node.block.tokens[idx] @@ -654,7 +654,7 @@ def find_stmt_end(node: parser.InstDef, idx: int) -> lexer.Token: while True: idx += 1 tkn = node.block.tokens[idx] - if tkn.kind == SEMI: + if tkn.kind == lx.SEMI: return node.block.tokens[idx+1] def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]) -> None: @@ -662,15 +662,15 @@ def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple in_if = 0 tkn_iter = iter(instr.block.tokens) for tkn in tkn_iter: - if tkn.kind == IF: + if tkn.kind == lx.IF: next(tkn_iter) in_if = 1 - if tkn.kind == IDENTIFIER and tkn.text in ("DEOPT_IF", "ERROR_IF"): + if tkn.kind == lx.IDENTIFIER and tkn.text in ("DEOPT_IF", "ERROR_IF"): next(tkn_iter) in_if = 1 - elif tkn.kind == LPAREN and in_if: + elif tkn.kind == lx.LPAREN and in_if: in_if += 1 - elif tkn.kind == RPAREN: + elif tkn.kind == lx.RPAREN: if in_if: in_if -= 1 elif tkn in calls and in_if: @@ -684,11 +684,11 @@ def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[le next_tkn = tokens[idx+1] except IndexError: break - if tkn.kind == SWITCH: + if tkn.kind == lx.SWITCH: raise analysis_error(f"switch statements are not supported due to their complex flow control. Sorry.", tkn) - if next_tkn.kind != LPAREN: + if next_tkn.kind != lx.LPAREN: continue - if tkn.kind == IDENTIFIER: + if tkn.kind == lx.IDENTIFIER: if tkn.text.upper() == tkn.text: # simple macro continue @@ -705,12 +705,12 @@ def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[le continue if tkn.text in NON_ESCAPING_FUNCTIONS: continue - elif tkn.kind == RPAREN: + elif tkn.kind == lx.RPAREN: prev = tokens[idx-1] if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": #cast continue - elif tkn.kind != RBRACKET: + elif tkn.kind != lx.RBRACKET: continue start = find_stmt_start(instr, idx) end = find_stmt_end(instr, idx) @@ -732,20 +732,20 @@ def always_exits(op: parser.InstDef) -> bool: depth = 0 tkn_iter = iter(op.tokens) for tkn in tkn_iter: - if tkn.kind == LBRACE: + if tkn.kind == lx.LBRACE: depth += 1 - elif tkn.kind == RBRACE: + elif tkn.kind == lx.RBRACE: depth -= 1 elif depth > 1: continue - elif tkn.kind == GOTO or tkn.kind == RETURN: + elif tkn.kind == lx.GOTO or tkn.kind == lx.RETURN: return True - elif tkn.kind == KEYWORD: + elif tkn.kind == lx.KEYWORD: # XXX: This appears to be unreachable since we never # set tkn.kind to KEYWORD if tkn.text in EXITS: return True - elif tkn.kind == IDENTIFIER: + elif tkn.kind == lx.IDENTIFIER: if tkn.text in EXITS: return True if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": @@ -827,12 +827,12 @@ def compute_properties(op: parser.InstDef) -> Properties: uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL")) and not has_free, has_free=has_free, - pure=ANN_PURE in op.annotations, + pure=lx.ANN_PURE in op.annotations, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), ) -ANN_REPLICATED = re.compile(rf'^{re.escape(ANN_REPLICATE)}\((\d+)\)$') +ANN_REPLICATED = re.compile(rf'^{re.escape(lx.ANN_REPLICATE)}\((\d+)\)$') def make_uop( name: str, @@ -851,7 +851,7 @@ def make_uop( body=op.block.tokens, properties=compute_properties(op), ) - if effect_depends_on_oparg_1(op) and ANN_SPLIT in op.annotations: + if effect_depends_on_oparg_1(op) and lx.ANN_SPLIT in op.annotations: result.properties.oparg_and_1 = True for bit in ("0", "1"): name_x = name + "_" + bit @@ -905,7 +905,7 @@ def make_uop( def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None: assert op.kind == "op" if op.name in uops: - if ANN_OVERRIDE not in op.annotations: + if lx.ANN_OVERRIDE not in op.annotations: raise override_error( op.name, op.context, uops[op.name].context, op.tokens[0] ) @@ -1151,11 +1151,11 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: for uop in uops.values(): tkn_iter = iter(uop.body) for tkn in tkn_iter: - if tkn.kind == IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION": - if next(tkn_iter).kind != LPAREN: + if tkn.kind == lx.IDENTIFIER and tkn.text == "GO_TO_INSTRUCTION": + if next(tkn_iter).kind != lx.LPAREN: continue target = next(tkn_iter) - if target.kind != IDENTIFIER: + if target.kind != lx.IDENTIFIER: continue if target.text in instructions: instructions[target.text].is_target = True diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 7d72cbffa84b2c..af28ff70760062 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -1,6 +1,7 @@ from pathlib import Path from typing import TextIO +import lexer as lx # for constants from analyzer import ( Instruction, Uop, @@ -10,7 +11,6 @@ ) from cwriter import CWriter from typing import Callable, TextIO, Iterator, Iterable -from lexer import * from lexer import Token from stack import Storage, StackError @@ -82,9 +82,9 @@ def emit_to(out: CWriter, tkn_iter: TokenIterator, end: str) -> Token: for tkn in tkn_iter: if tkn.kind == end and parens == 0: return tkn - if tkn.kind == LPAREN: + if tkn.kind == lx.LPAREN: parens += 1 - if tkn.kind == RPAREN: + if tkn.kind == lx.RPAREN: parens -= 1 out.emit(tkn) raise analysis_error(f"Expecting {end}. Reached end of file", tkn) @@ -119,7 +119,7 @@ def __init__(self, out: CWriter): "PyStackRef_CLOSE": self.stackref_close, "PyStackRef_CLOSE_SPECIALIZED": self.stackref_close, "PyStackRef_AsPyObjectSteal": self.stackref_steal, - DISPATCH: self.dispatch, + "DISPATCH": self.dispatch, "INSTRUCTION_SIZE": self.instruction_size, } self.out = out @@ -146,9 +146,9 @@ def deopt_if( self.out.emit_at("DEOPT_IF", tkn) lparen = next(tkn_iter) self.emit(lparen) - assert lparen.kind == LPAREN + assert lparen.kind == lx.LPAREN first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, LPAREN) + emit_to(self.out, tkn_iter, lx.LPAREN) next(tkn_iter) # Semi colon self.out.emit(", ") assert inst is not None @@ -168,19 +168,19 @@ def error_if( inst: Instruction | None, ) -> bool: lparen = next(tkn_iter) - assert lparen.kind == LPAREN + assert lparen.kind == lx.LPAREN first_tkn = tkn_iter.peek() unconditional = always_true(first_tkn) if unconditional: next(tkn_iter) comma = next(tkn_iter) - if comma.kind != COMMA: + if comma.kind != lx.COMMA: raise analysis_error(f"Expected comma, got '{comma.text}'", comma) self.out.start_line() else: self.out.emit_at("if ", tkn) self.emit(lparen) - emit_to(self.out, tkn_iter, COMMA) + emit_to(self.out, tkn_iter, lx.COMMA) self.out.emit(") ") label = next(tkn_iter).text next(tkn_iter) # RPAREN @@ -301,15 +301,15 @@ def stackref_close( ) -> bool: self.out.emit(tkn) tkn = next(tkn_iter) - assert tkn.kind == LPAREN + assert tkn.kind == lx.LPAREN self.out.emit(tkn) name = next(tkn_iter) self.out.emit(name) - if name.kind == IDENTIFIER: + if name.kind == lx.IDENTIFIER: for var in storage.inputs: if var.name == name.text: var.defined = False - rparen = emit_to(self.out, tkn_iter, RPAREN) + rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen) return True @@ -395,20 +395,20 @@ def _emit_if( ) -> tuple[bool, Token, Storage]: """Returns (reachable?, closing '}', stack).""" tkn = next(tkn_iter) - assert tkn.kind == LPAREN + assert tkn.kind == lx.LPAREN self.out.emit(tkn) - rparen = emit_to(self.out, tkn_iter, LPAREN) + rparen = emit_to(self.out, tkn_iter, lx.LPAREN) self.emit(rparen) if_storage = storage.copy() reachable, rbrace, if_storage = self._emit_block(tkn_iter, uop, if_storage, inst, True) try: maybe_else = tkn_iter.peek() - if maybe_else and maybe_else.kind == ELSE: + if maybe_else and maybe_else.kind == lx.ELSE: self._print_storage(storage) self.emit(rbrace) self.emit(next(tkn_iter)) maybe_if = tkn_iter.peek() - if maybe_if and maybe_if.kind == IF: + if maybe_if and maybe_if.kind == lx.IF: #Emit extra braces around the if to get scoping right self.emit(" {\n") self.emit(next(tkn_iter)) @@ -462,7 +462,7 @@ def _emit_block( try: reachable = True line : int = -1 - if tkn.kind != LBRACE: + if tkn.kind != lx.LBRACE: raise analysis_error(f"PEP 7: expected '{{', found: {tkn.text}", tkn) escaping_calls = uop.properties.escaping_calls if emit_first_brace: @@ -480,19 +480,19 @@ def _emit_block( _, reload = escaping_calls[tkn] elif tkn == reload: self.emit_reload(storage) - if tkn.kind == LBRACE: + if tkn.kind == lx.LBRACE: self.out.emit(tkn) braces += 1 - elif tkn.kind == RBRACE: + elif tkn.kind == lx.RBRACE: self._print_storage(storage) braces -= 1 if braces == 0: return reachable, tkn, storage self.out.emit(tkn) - elif tkn.kind == GOTO: + elif tkn.kind == lx.GOTO: reachable = False; self.out.emit(tkn) - elif tkn.kind == IDENTIFIER: + elif tkn.kind == lx.IDENTIFIER: if tkn.text in self._replacers: if not self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst): reachable = False @@ -503,11 +503,11 @@ def _emit_block( out.defined = True out.in_memory = False break - if tkn.text.startswith(DISPATCH): + if tkn.text.startswith("DISPATCH"): self._print_storage(storage) reachable = False self.out.emit(tkn) - elif tkn.kind == IF: + elif tkn.kind == lx.IF: self.out.emit(tkn) if_reachable, rbrace, storage = self._emit_if(tkn_iter, uop, storage, inst) if reachable: From bc8d2e515af16fa23933a7315413f906fd283da9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:10:54 +0100 Subject: [PATCH 17/22] update usages --- Tools/cases_generator/analyzer.py | 2 +- Tools/cases_generator/generators_common.py | 4 ++-- Tools/cases_generator/tier2_generator.py | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 18364f7bbc0772..b9507a263a1867 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -738,7 +738,7 @@ def always_exits(op: parser.InstDef) -> bool: depth -= 1 elif depth > 1: continue - elif tkn.kind == lx.GOTO or tkn.kind == lx.RETURN: + elif tkn.kind in {lx.GOTO, lx.RETURN}: return True elif tkn.kind == lx.KEYWORD: # XXX: This appears to be unreachable since we never diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index af28ff70760062..5750c6f133a4b4 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -148,7 +148,7 @@ def deopt_if( self.emit(lparen) assert lparen.kind == lx.LPAREN first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, lx.LPAREN) + emit_to(self.out, tkn_iter, lx.RPAREN) next(tkn_iter) # Semi colon self.out.emit(", ") assert inst is not None @@ -397,7 +397,7 @@ def _emit_if( tkn = next(tkn_iter) assert tkn.kind == lx.LPAREN self.out.emit(tkn) - rparen = emit_to(self.out, tkn_iter, lx.LPAREN) + rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen) if_storage = storage.copy() reachable, rbrace, if_storage = self._emit_block(tkn_iter, uop, if_storage, inst, True) diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index dd16a1a7eb28b5..d6731f89970a37 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -5,6 +5,7 @@ import argparse +import lexer as lx # for constants from analyzer import ( Analysis, Instruction, @@ -80,10 +81,10 @@ def error_if( self.out.emit_at("if ", tkn) lparen = next(tkn_iter) self.emit(lparen) - assert lparen.kind == "LPAREN" + assert lparen.kind == lx.LPAREN first_tkn = next(tkn_iter) self.out.emit(first_tkn) - emit_to(self.out, tkn_iter, "COMMA") + emit_to(self.out, tkn_iter, lx.COMMA) label = next(tkn_iter).text next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon @@ -116,9 +117,9 @@ def deopt_if( self.out.emit_at("if ", tkn) lparen = next(tkn_iter) self.emit(lparen) - assert lparen.kind == "LPAREN" + assert lparen.kind == lx.LPAREN first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "RPAREN") + emit_to(self.out, tkn_iter, lx.RPAREN) next(tkn_iter) # Semi colon self.emit(") {\n") self.emit("UOP_STAT_INC(uopcode, miss);\n") @@ -138,7 +139,7 @@ def exit_if( # type: ignore[override] lparen = next(tkn_iter) self.emit(lparen) first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "RPAREN") + emit_to(self.out, tkn_iter, lx.RPAREN) next(tkn_iter) # Semi colon self.emit(") {\n") self.emit("UOP_STAT_INC(uopcode, miss);\n") From a576b2eb4a53524680111edde85404d09220effb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:18:40 +0100 Subject: [PATCH 18/22] small optimization --- Tools/cases_generator/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b9507a263a1867..1fd306e1572075 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -445,7 +445,7 @@ def in_frame_push(idx: int) -> bool: ) if lhs[0].kind == lx.TIMES or any( - t.kind == lx.ARROW or t.kind == lx.LBRACKET for t in lhs[1:] + t.kind in {lx.ARROW, lx.LBRACKET} for t in lhs[1:] ): # Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ... # Assume that they are visible to the GC. From 575192014998c65dd95e425513d9960d4c31cc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 16 Nov 2024 10:11:23 +0100 Subject: [PATCH 19/22] cosmetic changes --- Tools/cases_generator/lexer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 1bcbc647e2bf88..55a5df35eac074 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -72,8 +72,9 @@ def choice(*opts: str) -> str: (BACKSLASH := "BACKSLASH"): r"\\", } __all__.extend(operators.keys()) -opmap = {__pattern.replace("\\", "") or "\\": __opname - for __opname, __pattern in operators.items()} +opmap = {pattern.replace("\\", "") or "\\": opname + for opname, pattern in operators.items()} +del opname, pattern # Macros macro = r"#.*\n" @@ -90,7 +91,6 @@ def choice(*opts: str) -> str: decimal_digits = r"(0|[1-9][0-9]*)" decimal = decimal_digits + suffix - exponent = r"""([eE][-+]?[0-9]+)""" fraction = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" float = "((((" + fraction + ")" + exponent + "?)|([0-9]+" + exponent + "))[FfLl]?)" From 07b66e02ee75e9ab85c70d696b6a7ad157b98716 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 16 Nov 2024 10:21:42 +0100 Subject: [PATCH 20/22] I don't know how to use Python anymore! --- Tools/cases_generator/lexer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 55a5df35eac074..7130f728d92e3a 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -74,7 +74,6 @@ def choice(*opts: str) -> str: __all__.extend(operators.keys()) opmap = {pattern.replace("\\", "") or "\\": opname for opname, pattern in operators.items()} -del opname, pattern # Macros macro = r"#.*\n" From 72859d37ec7d5f373cea2da34abefaa546fd5d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:06:48 +0100 Subject: [PATCH 21/22] update constants --- Tools/cases_generator/generators_common.py | 18 +++++++++--------- Tools/cases_generator/lexer.py | 2 +- .../opcode_metadata_generator.py | 14 ++++++++++++-- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index be56c33f883e3d..9a286a5071e586 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -352,22 +352,22 @@ def stackref_close_specialized( self.out.emit(tkn) tkn = next(tkn_iter) - assert tkn.kind == "LPAREN" + assert tkn.kind == lx.LPAREN self.out.emit(tkn) name = next(tkn_iter) self.out.emit(name) comma = next(tkn_iter) - if comma.kind != "COMMA": + if comma.kind != lx.COMMA: raise analysis_error("Expected comma", comma) self.out.emit(comma) dealloc = next(tkn_iter) - if dealloc.kind != "IDENTIFIER": + if dealloc.kind != lx.IDENTIFIER: raise analysis_error("Expected identifier", dealloc) self.out.emit(dealloc) - if name.kind == "IDENTIFIER": + if name.kind == lx.IDENTIFIER: escapes = dealloc.text not in NON_ESCAPING_DEALLOCS return self.stackref_kill(name, storage, escapes) - rparen = emit_to(self.out, tkn_iter, "RPAREN") + rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen) return True @@ -381,13 +381,13 @@ def stackref_steal( ) -> bool: self.out.emit(tkn) tkn = next(tkn_iter) - assert tkn.kind == "LPAREN" + assert tkn.kind == LPAREN self.out.emit(tkn) name = next(tkn_iter) self.out.emit(name) - if name.kind == "IDENTIFIER": + if name.kind == IDENTIFIER: return self.stackref_kill(name, storage, False) - rparen = emit_to(self.out, tkn_iter, "RPAREN") + rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen) return True @@ -419,7 +419,7 @@ def go_to_instruction( name = next(tkn_iter) next(tkn_iter) next(tkn_iter) - assert name.kind == "IDENTIFIER" + assert name.kind == IDENTIFIER self.emit("\n") self.emit(f"goto PREDICTED_{name.text};\n") return True diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 8a47da0b4a0316..88112d43b06786 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -180,7 +180,7 @@ def choice(*opts: str) -> str: 'macro': (MACRO := "MACRO"), } __all__.extend(keywords.values()) -KEYWORD = 'KEYWORD' +KEYWORD = "KEYWORD" ANNOTATION = "ANNOTATION" annotations = { diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 88b799f2b371d0..364244344fb083 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -21,12 +21,22 @@ from cwriter import CWriter from dataclasses import dataclass from lexer import ANN_REPLACED, ANN_SPECIALIZING -from typing import TextIO +from typing import TextIO, TypedDict from stack import Stack, get_stack_effect, get_stack_effects +class _OpArgDictKind(TypedDict): + OPARG_FULL: int + OPARG_CACHE_1: int + OPARG_CACHE_2: int + OPARG_CACHE_4: int + OPARG_TOP: int + OPARG_BOTTOM: int + OPARG_SAVE_RETURN_OFFSET: int + OPARG_REPLACED: int + # Constants used instead of size for macro expansions. # Note: 1, 2, 4 must match actual cache entry sizes. -OPARG_KINDS = { +OPARG_KINDS: _OpArgDictKind = { "OPARG_FULL": 0, "OPARG_CACHE_1": 1, "OPARG_CACHE_2": 2, From 7978d1b87e8d701c2898700b27642a54a655756c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 23 Feb 2025 11:57:17 +0100 Subject: [PATCH 22/22] mypy fix --- Tools/cases_generator/generators_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 7402c4241fc0cc..2013b527c69be2 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -350,11 +350,11 @@ def stackref_steal( ) -> bool: self.out.emit(tkn) tkn = next(tkn_iter) - assert tkn.kind == LPAREN + assert tkn.kind == lx.LPAREN self.out.emit(tkn) name = next(tkn_iter) self.out.emit(name) - if name.kind == IDENTIFIER: + if name.kind == lx.IDENTIFIER: return self.stackref_kill(name, storage, False) rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen)