diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 10db9acbb362c8..ff9b9e0ae92406 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1,6 +1,7 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass import itertools import lexer +import lexer as lx # for constants import parser import re from typing import Optional @@ -208,7 +209,7 @@ def why_not_viable(self) -> str | None: return None # Adjusts next_instr, but only in tier 1 code if "INSTRUMENTED" in self.name: return "is instrumented" - if "replaced" in self.annotations: + if lx.ANN_REPLACED in self.annotations: return "is replaced" if self.name in ("INTERPRETER_EXIT", "JUMP_BACKWARD"): return "has tier 1 control flow" @@ -225,7 +226,7 @@ def is_viable(self) -> bool: def is_super(self) -> bool: for tkn in self.body: - if tkn.kind == "IDENTIFIER" and tkn.text == "oparg1": + if tkn.kind == lx.IDENTIFIER and tkn.text == "oparg1": return True return False @@ -428,7 +429,7 @@ def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]: """Find the tokens that make up the left-hand side of an assignment""" offset = 0 for tkn in reversed(node.block.tokens[: idx]): - if tkn.kind in {"SEMI", "LBRACE", "RBRACE", "CMACRO"}: + if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE, lx.CMACRO}: return node.block.tokens[idx - offset : idx] offset += 1 return [] @@ -439,17 +440,17 @@ def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]: outnames = { out.name for out in node.outputs } innames = { out.name for out in node.inputs } for idx, tkn in enumerate(node.block.tokens): - if tkn.kind == "AND": + if tkn.kind == lx.AND: name = node.block.tokens[idx+1] if name.text in outnames: res.append(name) - if tkn.kind != "EQUALS": + if tkn.kind != lx.EQUALS: continue lhs = find_assignment_target(node, idx) assert lhs - while lhs and lhs[0].kind == "COMMENT": + while lhs and lhs[0].kind == lx.COMMENT: lhs = lhs[1:] - if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER": + if len(lhs) != 1 or lhs[0].kind != lx.IDENTIFIER: continue name = lhs[0] if name.text in innames: @@ -463,18 +464,18 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None] def in_frame_push(idx: int) -> bool: for tkn in reversed(node.block.tokens[: idx - 1]): - if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: + if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE}: return False - if tkn.kind == "IDENTIFIER" and tkn.text == "_PyFrame_PushUnchecked": + if tkn.kind == lx.IDENTIFIER and tkn.text == "_PyFrame_PushUnchecked": return True return False refs: dict[lexer.Token, str | None] = {} for idx, tkn in enumerate(node.block.tokens): - if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew": + if tkn.kind != lx.IDENTIFIER or tkn.text != "PyStackRef_FromPyObjectNew": continue - if idx == 0 or node.block.tokens[idx - 1].kind != "EQUALS": + if idx == 0 or node.block.tokens[idx - 1].kind != lx.EQUALS: if in_frame_push(idx): # PyStackRef_FromPyObjectNew() is called in _PyFrame_PushUnchecked() refs[tkn] = None @@ -487,15 +488,15 @@ def in_frame_push(idx: int) -> bool: "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn ) - if lhs[0].kind == "TIMES" or any( - t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:] + if lhs[0].kind == lx.TIMES or any( + t.kind in {lx.ARROW, lx.LBRACKET} for t in lhs[1:] ): # Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ... # Assume that they are visible to the GC. refs[tkn] = None continue - if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER": + if len(lhs) != 1 or lhs[0].kind != lx.IDENTIFIER: raise analysis_error( "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn ) @@ -519,14 +520,16 @@ def in_frame_push(idx: int) -> bool: def variable_used(node: parser.CodeDef, name: str) -> bool: """Determine whether a variable with a given name is used in a node.""" return any( - token.kind == "IDENTIFIER" and token.text == name for token in node.block.tokens + token.kind == lx.IDENTIFIER and token.text == name + for token in node.block.tokens ) def oparg_used(node: parser.CodeDef) -> bool: """Determine whether `oparg` is used in a node.""" return any( - token.kind == "IDENTIFIER" and token.text == "oparg" for token in node.tokens + token.kind == lx.IDENTIFIER and token.text == "oparg" + for token in node.tokens ) @@ -535,8 +538,8 @@ def tier_variable(node: parser.CodeDef) -> int | None: if isinstance(node, parser.LabelDef): return None for token in node.tokens: - if token.kind == "ANNOTATION": - if token.text == "specializing": + if token.kind == lx.ANNOTATION: + if token.text == lx.ANN_SPECIALIZING: return 1 if re.fullmatch(r"tier\d", token.text): return int(token.text[-1]) @@ -679,11 +682,11 @@ def find_stmt_start(node: parser.CodeDef, idx: int) -> lexer.Token: assert idx < len(node.block.tokens) while True: tkn = node.block.tokens[idx-1] - if tkn.kind in {"SEMI", "LBRACE", "RBRACE", "CMACRO"}: + if tkn.kind in {lx.SEMI, lx.LBRACE, lx.RBRACE, lx.CMACRO}: break idx -= 1 assert idx > 0 - while node.block.tokens[idx].kind == "COMMENT": + while node.block.tokens[idx].kind == lx.COMMENT: idx += 1 return node.block.tokens[idx] @@ -693,7 +696,7 @@ def find_stmt_end(node: parser.CodeDef, idx: int) -> lexer.Token: while True: idx += 1 tkn = node.block.tokens[idx] - if tkn.kind == "SEMI": + if tkn.kind == lx.SEMI: return node.block.tokens[idx+1] def check_escaping_calls(instr: parser.CodeDef, escapes: dict[lexer.Token, EscapingCall]) -> None: @@ -701,15 +704,15 @@ def check_escaping_calls(instr: parser.CodeDef, escapes: dict[lexer.Token, Escap in_if = 0 tkn_iter = iter(instr.block.tokens) for tkn in tkn_iter: - if tkn.kind == "IF": + if tkn.kind == lx.IF: next(tkn_iter) in_if = 1 - if tkn.kind == "IDENTIFIER" and tkn.text in ("DEOPT_IF", "ERROR_IF", "EXIT_IF"): + if tkn.kind == lx.IDENTIFIER and tkn.text in ("DEOPT_IF", "ERROR_IF", "EXIT_IF"): next(tkn_iter) in_if = 1 - elif tkn.kind == "LPAREN" and in_if: + elif tkn.kind == lx.LPAREN and in_if: in_if += 1 - elif tkn.kind == "RPAREN": + elif tkn.kind == lx.RPAREN: if in_if: in_if -= 1 elif tkn in calls and in_if: @@ -723,11 +726,11 @@ def find_escaping_api_calls(instr: parser.CodeDef) -> dict[lexer.Token, Escaping next_tkn = tokens[idx+1] except IndexError: break - if tkn.kind == "SWITCH": + if tkn.kind == lx.SWITCH: raise analysis_error(f"switch statements are not supported due to their complex flow control. Sorry.", tkn) - if next_tkn.kind != lexer.LPAREN: + if next_tkn.kind != lx.LPAREN: continue - if tkn.kind == lexer.IDENTIFIER: + if tkn.kind == lx.IDENTIFIER: if tkn.text.upper() == tkn.text: # simple macro continue @@ -744,12 +747,12 @@ def find_escaping_api_calls(instr: parser.CodeDef) -> dict[lexer.Token, Escaping continue if tkn.text in NON_ESCAPING_FUNCTIONS: continue - elif tkn.kind == "RPAREN": + elif tkn.kind == lx.RPAREN: prev = tokens[idx-1] if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": #cast continue - elif tkn.kind != "RBRACKET": + elif tkn.kind != lx.RBRACKET: continue if tkn.text in ("PyStackRef_CLOSE", "PyStackRef_XCLOSE"): if len(tokens) <= idx+2: @@ -778,18 +781,20 @@ def always_exits(op: parser.CodeDef) -> bool: depth = 0 tkn_iter = iter(op.tokens) for tkn in tkn_iter: - if tkn.kind == "LBRACE": + if tkn.kind == lx.LBRACE: depth += 1 - elif tkn.kind == "RBRACE": + elif tkn.kind == lx.RBRACE: depth -= 1 elif depth > 1: continue - elif tkn.kind == "GOTO" or tkn.kind == "RETURN": + elif tkn.kind in {lx.GOTO, lx.RETURN}: return True - elif tkn.kind == "KEYWORD": + elif tkn.kind == lx.KEYWORD: + # XXX: This appears to be unreachable since we never + # set tkn.kind to KEYWORD if tkn.text in EXITS: return True - elif tkn.kind == "IDENTIFIER": + elif tkn.kind == lx.IDENTIFIER: if tkn.text in EXITS: return True if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": @@ -855,8 +860,8 @@ def compute_properties(op: parser.CodeDef) -> Properties: error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) escapes = bool(escaping_calls) - pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations - no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations + pure = False if isinstance(op, parser.LabelDef) else lx.ANN_PURE in op.annotations + no_save_ip = False if isinstance(op, parser.LabelDef) else lx.ANN_NO_SAVE_IP in op.annotations return Properties( escaping_calls=escaping_calls, escapes=escapes, @@ -881,6 +886,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: needs_prev=variable_used(op, "prev_instr"), ) +ANN_REPLICATED = re.compile(rf'^{re.escape(lx.ANN_REPLICATE)}\((\d+)\)$') def make_uop( name: str, @@ -899,7 +905,7 @@ def make_uop( body=op.block.tokens, properties=compute_properties(op), ) - if effect_depends_on_oparg_1(op) and "split" in op.annotations: + if effect_depends_on_oparg_1(op) and lx.ANN_SPLIT in op.annotations: result.properties.oparg_and_1 = True for bit in ("0", "1"): name_x = name + "_" + bit @@ -923,8 +929,8 @@ def make_uop( rep.replicates = result uops[name_x] = rep for anno in op.annotations: - if anno.startswith("replicate"): - result.replicated = int(anno[10:-1]) + if match := ANN_REPLICATED.match(anno): + result.replicated = int(match.group(1)) break else: return result @@ -953,7 +959,7 @@ def make_uop( def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None: assert op.kind == "op" if op.name in uops: - if "override" not in op.annotations: + if lx.ANN_OVERRIDE not in op.annotations: raise override_error( op.name, op.context, uops[op.name].context, op.tokens[0] ) diff --git a/Tools/cases_generator/cwriter.py b/Tools/cases_generator/cwriter.py index 07a785e4312fa6..80004a7b6bb522 100644 --- a/Tools/cases_generator/cwriter.py +++ b/Tools/cases_generator/cwriter.py @@ -1,5 +1,5 @@ import contextlib -from lexer import Token +from lexer import COMMENT, CMACRO, Token from typing import TextIO, Iterator @@ -90,12 +90,12 @@ def emit_multiline_comment(self, tkn: Token) -> None: self.out.write(text) def emit_token(self, tkn: Token) -> None: - if tkn.kind == "COMMENT" and "\n" in tkn.text: + if tkn.kind == COMMENT and "\n" in tkn.text: return self.emit_multiline_comment(tkn) self.maybe_dedent(tkn.text) self.set_position(tkn) self.emit_text(tkn.text) - if tkn.kind == "CMACRO": + if tkn.kind == CMACRO: self.newline = True self.maybe_indent(tkn.text) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 6b2ef51b29f4eb..2013b527c69be2 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -1,5 +1,6 @@ from pathlib import Path +import lexer as lx # for constants from analyzer import ( Instruction, Uop, @@ -82,9 +83,9 @@ def emit_to(out: CWriter, tkn_iter: TokenIterator, end: str) -> Token: for tkn in tkn_iter: if tkn.kind == end and parens == 0: return tkn - if tkn.kind == "LPAREN": + if tkn.kind == lx.LPAREN: parens += 1 - if tkn.kind == "RPAREN": + if tkn.kind == lx.RPAREN: parens -= 1 out.emit(tkn) raise analysis_error(f"Expecting {end}. Reached end of file", tkn) @@ -156,9 +157,9 @@ def deopt_if( self.out.start_line() self.out.emit("if (") lparen = next(tkn_iter) - assert lparen.kind == "LPAREN" + assert lparen.kind == lx.LPAREN first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "RPAREN") + emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(") {\n") next(tkn_iter) # Semi colon assert inst is not None @@ -188,19 +189,19 @@ def error_if( inst: Instruction | None, ) -> bool: lparen = next(tkn_iter) - assert lparen.kind == "LPAREN" + assert lparen.kind == lx.LPAREN first_tkn = tkn_iter.peek() unconditional = always_true(first_tkn) if unconditional: next(tkn_iter) comma = next(tkn_iter) - if comma.kind != "COMMA": + if comma.kind != lx.COMMA: raise analysis_error(f"Expected comma, got '{comma.text}'", comma) self.out.start_line() else: self.out.emit_at("if ", tkn) self.emit(lparen) - emit_to(self.out, tkn_iter, "COMMA") + emit_to(self.out, tkn_iter, lx.COMMA) self.out.emit(") {\n") label = next(tkn_iter).text next(tkn_iter) # RPAREN @@ -320,22 +321,22 @@ def stackref_close_specialized( self.out.emit(tkn) tkn = next(tkn_iter) - assert tkn.kind == "LPAREN" + assert tkn.kind == lx.LPAREN self.out.emit(tkn) name = next(tkn_iter) self.out.emit(name) comma = next(tkn_iter) - if comma.kind != "COMMA": + if comma.kind != lx.COMMA: raise analysis_error("Expected comma", comma) self.out.emit(comma) dealloc = next(tkn_iter) - if dealloc.kind != "IDENTIFIER": + if dealloc.kind != lx.IDENTIFIER: raise analysis_error("Expected identifier", dealloc) self.out.emit(dealloc) - if name.kind == "IDENTIFIER": + if name.kind == lx.IDENTIFIER: escapes = dealloc.text not in NON_ESCAPING_DEALLOCS return self.stackref_kill(name, storage, escapes) - rparen = emit_to(self.out, tkn_iter, "RPAREN") + rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen) return True @@ -349,13 +350,13 @@ def stackref_steal( ) -> bool: self.out.emit(tkn) tkn = next(tkn_iter) - assert tkn.kind == "LPAREN" + assert tkn.kind == lx.LPAREN self.out.emit(tkn) name = next(tkn_iter) self.out.emit(name) - if name.kind == "IDENTIFIER": + if name.kind == lx.IDENTIFIER: return self.stackref_kill(name, storage, False) - rparen = emit_to(self.out, tkn_iter, "RPAREN") + rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen) return True @@ -490,20 +491,20 @@ def _emit_if( ) -> tuple[bool, Token, Storage]: """Returns (reachable?, closing '}', stack).""" tkn = next(tkn_iter) - assert tkn.kind == "LPAREN" + assert tkn.kind == lx.LPAREN self.out.emit(tkn) - rparen = emit_to(self.out, tkn_iter, "RPAREN") + rparen = emit_to(self.out, tkn_iter, lx.RPAREN) self.emit(rparen) if_storage = storage.copy() reachable, rbrace, if_storage = self._emit_block(tkn_iter, uop, if_storage, inst, True) try: maybe_else = tkn_iter.peek() - if maybe_else and maybe_else.kind == "ELSE": + if maybe_else and maybe_else.kind == lx.ELSE: self._print_storage(storage) self.emit(rbrace) self.emit(next(tkn_iter)) maybe_if = tkn_iter.peek() - if maybe_if and maybe_if.kind == "IF": + if maybe_if and maybe_if.kind == lx.IF: # Emit extra braces around the if to get scoping right self.emit(" {\n") self.emit(next(tkn_iter)) @@ -557,7 +558,7 @@ def _emit_block( try: reachable = True line : int = -1 - if tkn.kind != "LBRACE": + if tkn.kind != lx.LBRACE: raise analysis_error(f"PEP 7: expected '{{', found: {tkn.text}", tkn) escaping_calls = uop.properties.escaping_calls if emit_first_brace: @@ -581,20 +582,20 @@ def _emit_block( reload = escape.end elif tkn == reload: self.emit_reload(storage) - if tkn.kind == "LBRACE": + if tkn.kind == lx.LBRACE: self.out.emit(tkn) braces += 1 - elif tkn.kind == "RBRACE": + elif tkn.kind == lx.RBRACE: self._print_storage(storage) braces -= 1 if braces == 0: return reachable, tkn, storage self.out.emit(tkn) - elif tkn.kind == "GOTO": + elif tkn.kind == lx.GOTO: label_tkn = next(tkn_iter) self.goto_label(tkn, label_tkn, storage) reachable = False - elif tkn.kind == "IDENTIFIER": + elif tkn.kind == lx.IDENTIFIER: if tkn.text in self._replacers: if not self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst): reachable = False @@ -609,7 +610,7 @@ def _emit_block( self._print_storage(storage) reachable = False self.out.emit(tkn) - elif tkn.kind == "IF": + elif tkn.kind == lx.IF: self.out.emit(tkn) if_reachable, rbrace, storage = self._emit_if(tkn_iter, uop, storage, inst) if reachable: diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 6afca750be9b19..151cd0054dd800 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -2,6 +2,8 @@ # Originally by Mark Shannon (mark@hotpy.org) # https://gist.github.com/markshannon/db7ab649440b5af765451bb77c7dba34 +__all__: list[str] = [] + import re from dataclasses import dataclass from collections.abc import Iterator @@ -13,78 +15,74 @@ def choice(*opts: str) -> str: # Regexes -# Longer operators must go before shorter ones. - -PLUSPLUS = r"\+\+" -MINUSMINUS = r"--" - -# -> -ARROW = r"->" -ELLIPSIS = r"\.\.\." - -# Assignment operators -TIMESEQUAL = r"\*=" -DIVEQUAL = r"/=" -MODEQUAL = r"%=" -PLUSEQUAL = r"\+=" -MINUSEQUAL = r"-=" -LSHIFTEQUAL = r"<<=" -RSHIFTEQUAL = r">>=" -ANDEQUAL = r"&=" -OREQUAL = r"\|=" -XOREQUAL = r"\^=" - -# Operators -PLUS = r"\+" -MINUS = r"-" -TIMES = r"\*" -DIVIDE = r"/" -MOD = r"%" -NOT = r"~" -XOR = r"\^" -LOR = r"\|\|" -LAND = r"&&" -LSHIFT = r"<<" -RSHIFT = r">>" -LE = r"<=" -GE = r">=" -EQ = r"==" -NE = r"!=" -LT = r"<" -GT = r">" -LNOT = r"!" -OR = r"\|" -AND = r"&" -EQUALS = r"=" - -# ? -CONDOP = r"\?" - -# Delimiters -LPAREN = r"\(" -RPAREN = r"\)" -LBRACKET = r"\[" -RBRACKET = r"\]" -LBRACE = r"\{" -RBRACE = r"\}" -COMMA = r"," -PERIOD = r"\." -SEMI = r";" -COLON = r":" -BACKSLASH = r"\\" - -operators = {op: pattern for op, pattern in globals().items() if op == op.upper()} -for op in operators: - globals()[op] = op -opmap = {pattern.replace("\\", "") or "\\": op for op, pattern in operators.items()} +# Mapping from operator names to their regular expressions. +operators = { + # Longer operators must go before shorter ones. + (PLUSPLUS := "PLUSPLUS"): r'\+\+', + (MINUSMINUS := "MINUSMINUS"): r"--", + # -> + (ARROW := "ARROW"): r"->", + (ELLIPSIS := "ELLIPSIS"): r"\.\.\.", + # Assignment operators + (TIMESEQUAL := "TIMESEQUAL"): r"\*=", + (DIVEQUAL := "DIVEQUAL"): r"/=", + (MODEQUAL := "MODEQUAL"): r"%=", + (PLUSEQUAL := "PLUSEQUAL"): r"\+=", + (MINUSEQUAL := "MINUSEQUAL"): r"-=", + (LSHIFTEQUAL := "LSHIFTEQUAL"): r"<<=", + (RSHIFTEQUAL := "RSHIFTEQUAL"): r">>=", + (ANDEQUAL := "ANDEQUAL"): r"&=", + (OREQUAL := "OREQUAL"): r"\|=", + (XOREQUAL := "XOREQUAL"): r"\^=", + # Operators + (PLUS := "PLUS"): r"\+", + (MINUS := "MINUS"): r"-", + (TIMES := "TIMES"): r"\*", + (DIVIDE := "DIVIDE"): r"/", + (MOD := "MOD"): r"%", + (NOT := "NOT"): r"~", + (XOR := "XOR"): r"\^", + (LOR := "LOR"): r"\|\|", + (LAND := "LAND"): r"&&", + (LSHIFT := "LSHIFT"): r"<<", + (RSHIFT := "RSHIFT"): r">>", + (LE := "LE"): r"<=", + (GE := "GE"): r">=", + (EQ := "EQ"): r"==", + (NE := "NE"): r"!=", + (LT := "LT"): r"<", + (GT := "GT"): r">", + (LNOT := "LNOT"): r"!", + (OR := "OR"): r"\|", + (AND := "AND"): r"&", + (EQUALS := "EQUALS"): r"=", + # ? + (CONDOP := "CONDOP"): r"\?", + # Delimiters + (LPAREN := "LPAREN"): r"\(", + (RPAREN := "RPAREN"): r"\)", + (LBRACKET := "LBRACKET"): r"\[", + (RBRACKET := "RBRACKET"): r"\]", + (LBRACE := "LBRACE"): r"\{", + (RBRACE := "RBRACE"): r"\}", + (COMMA := "COMMA"): r",", + (PERIOD := "PERIOD"): r"\.", + (SEMI := "SEMI"): r";", + (COLON := "COLON"): r":", + (BACKSLASH := "BACKSLASH"): r"\\", +} +__all__.extend(operators.keys()) +opmap = {pattern.replace("\\", "") or "\\": opname + for opname, pattern in operators.items()} # Macros macro = r"#.*\n" CMACRO = "CMACRO" +__all__.append(CMACRO) id_re = r"[a-zA-Z_][0-9a-zA-Z_]*" IDENTIFIER = "IDENTIFIER" - +__all__.append(IDENTIFIER) suffix = r"([uU]?[lL]?[lL]?)" octal = r"0[0-7]+" + suffix @@ -92,13 +90,13 @@ def choice(*opts: str) -> str: decimal_digits = r"(0|[1-9][0-9]*)" decimal = decimal_digits + suffix - exponent = r"""([eE][-+]?[0-9]+)""" fraction = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" float = "((((" + fraction + ")" + exponent + "?)|([0-9]+" + exponent + "))[FfLl]?)" number_re = choice(octal, hex, float, decimal) NUMBER = "NUMBER" +__all__.append(NUMBER) simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" decimal_escape = r"""(\d+)""" @@ -111,11 +109,16 @@ def choice(*opts: str) -> str: STRING = "STRING" char = r"\'.\'" # TODO: escape sequence CHARACTER = "CHARACTER" +__all__.extend([STRING, CHARACTER]) comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/" COMMENT = "COMMENT" +__all__.append(COMMENT) newline = r"\n" +NEWLINE = "NEWLINE" +__all__.append(NEWLINE) + invalid = ( r"\S" # A single non-space character that's not caught by any of the other patterns ) @@ -134,109 +137,69 @@ def choice(*opts: str) -> str: ) letter = re.compile(r"[a-zA-Z_]") - -kwds = [] -AUTO = "AUTO" -kwds.append(AUTO) -BREAK = "BREAK" -kwds.append(BREAK) -CASE = "CASE" -kwds.append(CASE) -CHAR = "CHAR" -kwds.append(CHAR) -CONST = "CONST" -kwds.append(CONST) -CONTINUE = "CONTINUE" -kwds.append(CONTINUE) -DEFAULT = "DEFAULT" -kwds.append(DEFAULT) -DO = "DO" -kwds.append(DO) -DOUBLE = "DOUBLE" -kwds.append(DOUBLE) -ELSE = "ELSE" -kwds.append(ELSE) -ENUM = "ENUM" -kwds.append(ENUM) -EXTERN = "EXTERN" -kwds.append(EXTERN) -FLOAT = "FLOAT" -kwds.append(FLOAT) -FOR = "FOR" -kwds.append(FOR) -GOTO = "GOTO" -kwds.append(GOTO) -IF = "IF" -kwds.append(IF) -INLINE = "INLINE" -kwds.append(INLINE) -INT = "INT" -kwds.append(INT) -LONG = "LONG" -kwds.append(LONG) -OFFSETOF = "OFFSETOF" -kwds.append(OFFSETOF) -RESTRICT = "RESTRICT" -kwds.append(RESTRICT) -RETURN = "RETURN" -kwds.append(RETURN) -SHORT = "SHORT" -kwds.append(SHORT) -SIGNED = "SIGNED" -kwds.append(SIGNED) -SIZEOF = "SIZEOF" -kwds.append(SIZEOF) -STATIC = "STATIC" -kwds.append(STATIC) -STRUCT = "STRUCT" -kwds.append(STRUCT) -SWITCH = "SWITCH" -kwds.append(SWITCH) -TYPEDEF = "TYPEDEF" -kwds.append(TYPEDEF) -UNION = "UNION" -kwds.append(UNION) -UNSIGNED = "UNSIGNED" -kwds.append(UNSIGNED) -VOID = "VOID" -kwds.append(VOID) -VOLATILE = "VOLATILE" -kwds.append(VOLATILE) -WHILE = "WHILE" -kwds.append(WHILE) -# An instruction in the DSL -INST = "INST" -kwds.append(INST) -# A micro-op in the DSL -OP = "OP" -kwds.append(OP) -# A macro in the DSL -MACRO = "MACRO" -kwds.append(MACRO) -# A label in the DSL -LABEL = "LABEL" -kwds.append(LABEL) -SPILLED = "SPILLED" -kwds.append(SPILLED) -keywords = {name.lower(): name for name in kwds} +# Mapping from keyword to their token kind. +keywords = { + 'auto': (AUTO := "AUTO"), + 'break': (BREAK := "BREAK"), + 'case': (CASE := "CASE"), + 'char': (CHAR := "CHAR"), + 'const': (CONST := "CONST"), + 'continue': (CONTINUE := "CONTINUE"), + 'default': (DEFAULT := "DEFAULT"), + 'do': (DO := "DO"), + 'double': (DOUBLE := "DOUBLE"), + 'else': (ELSE := "ELSE"), + 'enum': (ENUM := "ENUM"), + 'extern': (EXTERN := "EXTERN"), + 'float': (FLOAT := "FLOAT"), + 'for': (FOR := "FOR"), + 'goto': (GOTO := "GOTO"), + 'if': (IF := "IF"), + 'inline': (INLINE := "INLINE"), + 'int': (INT := "INT"), + 'long': (LONG := "LONG"), + 'offsetof': (OFFSETOF := "OFFSETOF"), + 'restrict': (RESTRICT := "RESTRICT"), + 'return': (RETURN := "RETURN"), + 'short': (SHORT := "SHORT"), + 'signed': (SIGNED := "SIGNED"), + 'sizeof': (SIZEOF := "SIZEOF"), + 'static': (STATIC := "STATIC"), + 'struct': (STRUCT := "STRUCT"), + 'switch': (SWITCH := "SWITCH"), + 'typedef': (TYPEDEF := "TYPEDEF"), + 'union': (UNION := "UNION"), + 'unsigned': (UNSIGNED := "UNSIGNED"), + 'void': (VOID := "VOID"), + 'volatile': (VOLATILE := "VOLATILE"), + 'while': (WHILE := "WHILE"), + # An instruction in the DSL. + 'inst': (INST := "INST"), + # A micro-op in the DSL. + 'op': (OP := "OP"), + # A macro in the DSL. + 'macro': (MACRO := "MACRO"), + # A label in the DSL. + 'label': (LABEL := "LABEL"), + 'spilled': (SPILLED := "SPILLED"), +} +__all__.extend(keywords.values()) +KEYWORD = "KEYWORD" ANNOTATION = "ANNOTATION" annotations = { - "specializing", - "override", - "register", - "replaced", - "pure", - "split", - "replicate", - "tier1", - "tier2", - "no_save_ip", + ANN_SPECIALIZING := "specializing", + ANN_OVERRIDE := "override", + ANN_REGISTER := "register", + ANN_REPLACED := "replaced", + ANN_PURE := "pure", + ANN_SPLIT := "split", + ANN_REPLICATE := "replicate", + ANN_TIER_1 := "tier1", + ANN_TIER_2 := "tier2", + ANN_NO_SAVE_IP := "no_save_ip", } -__all__ = [] -__all__.extend(kwds) - def make_syntax_error( message: str, @@ -313,7 +276,7 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]: elif text == "\n": linestart = start line += 1 - kind = "\n" + kind = NEWLINE elif text[0] == "'": kind = CHARACTER elif text[0] == "#": @@ -342,7 +305,7 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]: if kind == CMACRO: linestart = end line += 1 - if kind != "\n": + if kind != NEWLINE: yield Token( filename, kind, text, begin, (line, start - linestart + len(text)) ) @@ -362,7 +325,7 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str: col = 1 + dedent res.append(" " * (c - col)) text = tkn.text - if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text: + if dedent != 0 and tkn.kind == COMMENT and "\n" in text: if dedent < 0: text = text.replace("\n", "\n" + " " * -dedent) # TODO: dedent > 0 diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 453db6905d6842..364244344fb083 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -20,12 +20,23 @@ ) from cwriter import CWriter from dataclasses import dataclass -from typing import TextIO +from lexer import ANN_REPLACED, ANN_SPECIALIZING +from typing import TextIO, TypedDict from stack import Stack, get_stack_effect, get_stack_effects +class _OpArgDictKind(TypedDict): + OPARG_FULL: int + OPARG_CACHE_1: int + OPARG_CACHE_2: int + OPARG_CACHE_4: int + OPARG_TOP: int + OPARG_BOTTOM: int + OPARG_SAVE_RETURN_OFFSET: int + OPARG_REPLACED: int + # Constants used instead of size for macro expansions. # Note: 1, 2, 4 must match actual cache entry sizes. -OPARG_KINDS = { +OPARG_KINDS: _OpArgDictKind = { "OPARG_FULL": 0, "OPARG_CACHE_1": 1, "OPARG_CACHE_2": 2, @@ -343,9 +354,9 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: size = OPARG_KINDS["OPARG_SAVE_RETURN_OFFSET"] if isinstance(part, Uop): # Skip specializations - if "specializing" in part.annotations: + if ANN_SPECIALIZING in part.annotations: continue - if "replaced" in part.annotations: + if ANN_REPLACED in part.annotations: size = OPARG_KINDS["OPARG_REPLACED"] expansions.append((part.name, size, offset if size else 0)) offset += part.size @@ -380,9 +391,9 @@ def is_viable_expansion(inst: Instruction) -> bool: for part in inst.parts: if isinstance(part, Uop): # Skip specializing and replaced uops - if "specializing" in part.annotations: + if ANN_SPECIALIZING in part.annotations: continue - if "replaced" in part.annotations: + if ANN_REPLACED in part.annotations: continue if part.properties.tier == 1 or not part.is_viable(): return False diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 011f34de288871..4b006623811788 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -209,11 +209,11 @@ def inst_header(self) -> InstHeader | None: # | annotation* op(NAME, (inputs -- outputs)) annotations = [] while anno := self.expect(lx.ANNOTATION): - if anno.text == "replicate": + if anno.text == lx.ANN_REPLICATE: self.require(lx.LPAREN) times = self.require(lx.NUMBER) self.require(lx.RPAREN) - annotations.append(f"replicate({times.text})") + annotations.append(f"{lx.ANN_REPLICATE}({times.text})") else: annotations.append(anno.text) tkn = self.expect(lx.INST) diff --git a/Tools/cases_generator/plexer.py b/Tools/cases_generator/plexer.py index cb6c5375866490..cda490f5f0cf68 100644 --- a/Tools/cases_generator/plexer.py +++ b/Tools/cases_generator/plexer.py @@ -34,7 +34,7 @@ def next(self, raw: bool = False) -> Token | None: while self.pos < len(self.tokens): tok = self.tokens[self.pos] self.pos += 1 - if raw or tok.kind != "COMMENT": + if raw or tok.kind != lx.COMMENT: return tok return None diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 5e23360cdc0aaf..e1317802f9e755 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -5,6 +5,7 @@ import argparse +import lexer as lx # for constants from analyzer import ( Analysis, Instruction, @@ -88,9 +89,9 @@ def deopt_if( self.out.emit_at("if ", tkn) lparen = next(tkn_iter) self.emit(lparen) - assert lparen.kind == "LPAREN" + assert lparen.kind == lx.LPAREN first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "RPAREN") + emit_to(self.out, tkn_iter, lx.RPAREN) next(tkn_iter) # Semi colon self.emit(") {\n") self.emit("UOP_STAT_INC(uopcode, miss);\n") @@ -110,7 +111,7 @@ def exit_if( # type: ignore[override] lparen = next(tkn_iter) self.emit(lparen) first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "RPAREN") + emit_to(self.out, tkn_iter, lx.RPAREN) next(tkn_iter) # Semi colon self.emit(") {\n") self.emit("UOP_STAT_INC(uopcode, miss);\n")