From 6b7896026733c2ba48f58ec171daf3a6c01feb09 Mon Sep 17 00:00:00 2001 From: Markus Rosskopf Date: Sat, 13 Jan 2024 16:41:56 +0100 Subject: [PATCH] Add Token_Stream class as new lexer and ast_link to every token --- trlc/ast.py | 23 +++- trlc/lexer.py | 13 +- trlc/parser.py | 349 ++++++++++++++++++++++++++++++++++++++++--------- trlc/trlc.py | 4 +- 4 files changed, 326 insertions(+), 63 deletions(-) diff --git a/trlc/ast.py b/trlc/ast.py index 824f268e..5fe0f6d6 100644 --- a/trlc/ast.py +++ b/trlc/ast.py @@ -112,6 +112,10 @@ def __init__(self, location): assert isinstance(location, Location) self.location = location + def set_ast_link(self, tok): + assert isinstance(tok, Token) + tok.ast_link = self + def write_indent(self, indent, message): # pragma: no cover # lobster-exclude: Debugging feature assert isinstance(indent, int) @@ -266,7 +270,9 @@ def resolve_imports(self, mh, stab): # We can ignore errors here, because that just means we # generate more error later. try: - self.imports.add(stab.lookup(mh, t_import, Package)) + a_import = stab.lookup(mh, t_import, Package) + self.imports.add(a_import) + a_import.set_ast_link(t_import) except TRLC_Error: pass @@ -2399,6 +2405,10 @@ def dump(self, indent=0): # pragma: no cover self.write_indent(indent + 1, "Declared_Late: %s" % self.declared_late) self.symbols.dump(indent + 1, omit_heading=True) + def __repr__(self): + return "%s<%s>" % (self.__class__.__name__, + self.name) + class Composite_Type(Concrete_Type, metaclass=ABCMeta): """Abstract base for record and tuple types, as they share some @@ -2495,6 +2505,11 @@ def dump(self, indent=0): # pragma: no cover self.write_indent(indent + 1, "Optional: %s" % self.optional) self.write_indent(indent + 1, "Type: %s" % self.n_typ.name) + def __repr__(self): + return "%s<%s>" % (self.__class__.__name__, + self.member_of.fully_qualified_name() + "." + + self.name) + class Record_Type(Composite_Type): """A user-defined record type. @@ -2950,6 +2965,12 @@ def perform_checks(self, mh): return ok + def __repr__(self): + return "%s<%s>" % (self.__class__.__name__, + self.n_package.name + "." + + self.n_typ.name + "." + + self.name) + class Section(Entity): # lobster-trace: LRM.Section_Declaration diff --git a/trlc/lexer.py b/trlc/lexer.py index 55efda21..357777c4 100644 --- a/trlc/lexer.py +++ b/trlc/lexer.py @@ -159,7 +159,7 @@ class Token(Token_Base): "STRING" : "string literal", } - def __init__(self, location, kind, value=None): + def __init__(self, location, kind, value=None, ast_link=None): assert kind in Token.KIND if kind in ("COMMENT", "IDENTIFIER", "BUILTIN", "KEYWORD", "OPERATOR", "STRING"): @@ -171,6 +171,7 @@ def __init__(self, location, kind, value=None): else: assert value is None super().__init__(location, kind, value) + self.ast_link = ast_link def __repr__(self): if self.value is None: @@ -186,6 +187,7 @@ def __init__(self, mh, content): self.mh = mh self.content = content self.length = len(self.content) + self.tokens = [] self.lexpos = -3 self.line_no = 0 @@ -643,6 +645,15 @@ def token(self): return Token(sref, kind, value) +class Token_Stream(TRLC_Lexer): + + def token(self): + tok = super().token() + if tok is not None: + self.tokens.append(tok) + return tok + + def sanity_test(): # lobster-exclude: Developer test function mh = Message_Handler() diff --git a/trlc/parser.py b/trlc/parser.py index 5854db8e..9fef0fe1 100644 --- a/trlc/parser.py +++ b/trlc/parser.py @@ -330,9 +330,10 @@ def parse_described_name(self): if self.peek("STRING"): self.match("STRING") - return name, self.ct.value + t_descr = self.ct + return name, t_descr.value, t_descr else: - return name, None + return name, None, None def parse_qualified_name(self, scope, @@ -348,12 +349,14 @@ def parse_qualified_name(self, if match_ident: self.match("IDENTIFIER") sym = scope.lookup(self.mh, self.ct) + sym.set_ast_link(self.ct) if isinstance(sym, ast.Package): if not self.cu.is_visible(sym): self.mh.error(self.ct.location, "package must be imported before use") self.match("DOT") + sym.set_ast_link(self.ct) self.match("IDENTIFIER") return sym.symbols.lookup(self.mh, self.ct, required_subclass) else: @@ -374,25 +377,35 @@ def parse_type_declaration(self): def parse_enum_declaration(self): # lobster-trace: LRM.Enumeration_Declaration self.match_kw("enum") - name, description = self.parse_described_name() + t_enum = self.ct + name, description, t_description = self.parse_described_name() enum = ast.Enumeration_Type(name = name.value, description = description, location = name.location, package = self.cu.package) self.cu.package.symbols.register(self.mh, enum) + enum.set_ast_link(t_enum) + enum.set_ast_link(name) + if t_description: + enum.set_ast_link(t_description) self.match("C_BRA") + enum.set_ast_link(self.ct) empty = True while not self.peek("C_KET"): - name, description = self.parse_described_name() + name, description, t_description = self.parse_described_name() lit = ast.Enumeration_Literal_Spec(name = name.value, description = description, location = name.location, enum = enum) + lit.set_ast_link(name) + if t_description: + lit.set_ast_link(self.ct) empty = False enum.literals.register(self.mh, lit) self.match("C_KET") + enum.set_ast_link(self.ct) if empty: # lobster-trace: LRM.No_Empty_Enumerations @@ -412,10 +425,11 @@ def parse_tuple_field(self, assert isinstance(optional_required, bool) assert optional_allowed or not optional_required - field_name, field_description = self.parse_described_name() + field_name, field_description, t_descr = self.parse_described_name() if optional_required or self.peek_kw("optional"): self.match_kw("optional") + t_optional = self.ct if optional_allowed: field_is_optional = True else: @@ -426,26 +440,37 @@ def parse_tuple_field(self, # lobster-trace: LRM.Tuple_Field_Types field_type = self.parse_qualified_name(self.default_scope, ast.Type) - - return ast.Composite_Component( - name = field_name.value, - description = field_description, - location = field_name.location, - member_of = n_tuple, - n_typ = field_type, - optional = field_is_optional) + comp = ast.Composite_Component(name = field_name.value, + description = field_description, + location = field_name.location, + member_of = n_tuple, + n_typ = field_type, + optional = field_is_optional) + comp.set_ast_link(field_name) + if t_descr: + comp.set_ast_link(t_descr) + if field_is_optional: + comp.set_ast_link(t_optional) + + return comp def parse_tuple_declaration(self): # lobster-trace: LRM.Tuple_Declaration self.match_kw("tuple") - name, description = self.parse_described_name() + t_tuple = self.ct + name, description, t_descr = self.parse_described_name() n_tuple = ast.Tuple_Type(name = name.value, description = description, location = name.location, package = self.cu.package) + n_tuple.set_ast_link(t_tuple) + n_tuple.set_ast_link(name) + if t_descr: + n_tuple.set_ast_link(t_descr) self.match("C_BRA") + n_tuple.set_ast_link(self.ct) n_field = self.parse_tuple_field( n_tuple, @@ -462,6 +487,7 @@ def parse_tuple_declaration(self): if has_separators or self.peek_kw("separator"): has_separators = True self.match_kw("separator") + t_sep = self.ct if not separator_allowed: # lobster-trace: LRM.Tuple_Separators_All_Or_None self.mh.error(self.ct.location, @@ -472,7 +498,10 @@ def parse_tuple_declaration(self): self.peek("COLON") or \ self.peek("SEMICOLON"): self.advance() - n_tuple.add_separator(ast.Separator(self.ct)) + sep = ast.Separator(self.ct) + sep.set_ast_link(t_sep) + sep.set_ast_link(self.ct) + n_tuple.add_separator(sep) else: separator_allowed = False # lobster-trace: LRM.Tuple_Optional_Requires_Separators @@ -487,6 +516,7 @@ def parse_tuple_declaration(self): optional_required |= n_field.optional self.match("C_KET") + n_tuple.set_ast_link(self.ct) # Final check to ban tuples with separators containing other # tuples. @@ -510,21 +540,26 @@ def parse_tuple_declaration(self): def parse_record_component(self, n_record): assert isinstance(n_record, ast.Record_Type) - c_name, c_descr = self.parse_described_name() + c_name, c_descr, t_descr = self.parse_described_name() if self.peek_kw("optional"): self.match_kw("optional") + t_optional = self.ct c_optional = True else: c_optional = False c_typ = self.parse_qualified_name(self.default_scope, ast.Type) + c_typ.set_ast_link(self.ct) if self.peek("S_BRA"): self.match("S_BRA") + t_s_bra = self.ct self.match("INTEGER") + t_lo = self.ct a_lo = self.ct.value loc_lo = self.ct.location self.match("RANGE") + t_range = self.ct a_loc = self.ct.location if self.peek("INTEGER"): self.match("INTEGER") @@ -535,29 +570,45 @@ def parse_record_component(self, n_record): else: self.mh.error(self.nt.location, "expected INTEGER or * for upper bound") + t_hi = self.ct loc_hi = self.ct.location self.match("S_KET") + t_s_ket = self.ct c_typ = ast.Array_Type(location = a_loc, element_type = c_typ, lower_bound = a_lo, upper_bound = a_hi, loc_lower = loc_lo, loc_upper = loc_hi) - - return ast.Composite_Component(name = c_name.value, - description = c_descr, - location = c_name.location, - member_of = n_record, - n_typ = c_typ, - optional = c_optional) + c_typ.set_ast_link(t_s_bra) + c_typ.set_ast_link(t_lo) + c_typ.set_ast_link(t_range) + c_typ.set_ast_link(t_hi) + c_typ.set_ast_link(t_s_ket) + + c_comp = ast.Composite_Component(name = c_name.value, + description = c_descr, + location = c_name.location, + member_of = n_record, + n_typ = c_typ, + optional = c_optional) + c_comp.set_ast_link(c_name) + if t_descr: + c_comp.set_ast_link(t_descr) + if c_optional: + c_comp.set_ast_link(t_optional) + + return c_comp def parse_record_declaration(self): if self.peek_kw("abstract"): self.match_kw("abstract") + t_abstract = self.ct is_abstract = True is_final = False elif self.peek_kw("final"): self.match_kw("final") + t_final = self.ct is_abstract = False is_final = True else: @@ -565,12 +616,16 @@ def parse_record_declaration(self): is_final = False self.match_kw("type") - name, description = self.parse_described_name() + t_type = self.ct + name, description, t_description = self.parse_described_name() if self.peek_kw("extends"): self.match_kw("extends") + t_extends = self.ct root_record = self.parse_qualified_name(self.default_scope, ast.Record_Type) + root_record.set_ast_link(t_extends) + root_record.set_ast_link(self.ct) else: root_record = None @@ -592,11 +647,21 @@ def parse_record_declaration(self): n_parent = root_record, is_abstract = is_abstract) self.cu.package.symbols.register(self.mh, record) + if is_abstract: + record.set_ast_link(t_abstract) + if is_final: + record.set_ast_link(t_final) + record.set_ast_link(t_type) + record.set_ast_link(name) + if t_description: + record.set_ast_link(t_description) self.match("C_BRA") + record.set_ast_link(self.ct) while not self.peek("C_KET"): if self.peek_kw("freeze"): self.match_kw("freeze") + t_freeze = self.ct self.match("IDENTIFIER") n_comp = record.components.lookup(self.mh, self.ct, @@ -608,8 +673,12 @@ def parse_record_declaration(self): "duplicate freezing of %s, previously frozen at %s" % (n_comp.name, self.mh.cross_file_reference(n_value.location))) + n_comp.set_ast_link(t_freeze) + n_comp.set_ast_link(self.ct) self.match("ASSIGN") + n_comp.set_ast_link(self.ct) n_value = self.parse_value(n_comp.n_typ) + n_value.set_ast_link(self.ct) record.frozen[n_comp.name] = n_value @@ -623,6 +692,7 @@ def parse_record_declaration(self): record.components.register(self.mh, n_comp) self.match("C_KET") + record.set_ast_link(self.ct) # Finally mark record final if applicable if is_final: @@ -640,12 +710,14 @@ def parse_expression(self, scope): while self.peek_kw("and"): self.match_kw("and") t_op = self.ct + a_op = ast.Binary_Operator.LOGICAL_AND + t_op.ast_link = a_op n_rhs = self.parse_relation(scope) n_lhs = ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = self.builtin_bool, - operator = ast.Binary_Operator.LOGICAL_AND, + operator = a_op, n_lhs = n_lhs, n_rhs = n_rhs) @@ -653,36 +725,42 @@ def parse_expression(self, scope): while self.peek_kw("or"): self.match_kw("or") t_op = self.ct + a_op = ast.Binary_Operator.LOGICAL_OR + t_op.ast_link = a_op n_rhs = self.parse_relation(scope) n_lhs = ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = self.builtin_bool, - operator = ast.Binary_Operator.LOGICAL_OR, + operator = a_op, n_lhs = n_lhs, n_rhs = n_rhs) elif self.peek_kw("xor"): self.match_kw("xor") t_op = self.ct + a_op = ast.Binary_Operator.LOGICAL_XOR + t_op.ast_link = a_op n_rhs = self.parse_relation(scope) n_lhs = ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = self.builtin_bool, - operator = ast.Binary_Operator.LOGICAL_XOR, + operator = a_op, n_lhs = n_lhs, n_rhs = n_rhs) elif self.peek_kw("implies"): self.match_kw("implies") t_op = self.ct + a_op = ast.Binary_Operator.LOGICAL_IMPLIES + t_op.ast_link = a_op n_rhs = self.parse_relation(scope) n_lhs = ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = self.builtin_bool, - operator = ast.Binary_Operator.LOGICAL_IMPLIES, + operator = a_op, n_lhs = n_lhs, n_rhs = n_rhs) @@ -706,12 +784,14 @@ def parse_relation(self, scope): self.nt.value in Parser.COMPARISON_OPERATOR: self.match("OPERATOR") t_op = self.ct + a_op = relop_mapping[t_op.value] + t_op.ast_link = a_op n_rhs = self.parse_simple_expression(scope) return ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = self.builtin_bool, - operator = relop_mapping[t_op.value], + operator = a_op, n_lhs = n_lhs, n_rhs = n_rhs) @@ -726,9 +806,13 @@ def parse_relation(self, scope): t_in = self.ct n_a = self.parse_simple_expression(scope) + t_n_a = self.ct if self.peek("RANGE"): self.match("RANGE") + t_range = self.ct n_b = self.parse_simple_expression(scope) + n_b.set_ast_link(self.ct) + n_a.set_ast_link(t_n_a) rv = ast.Range_Test( mh = self.mh, location = t_in.location, @@ -736,6 +820,8 @@ def parse_relation(self, scope): n_lhs = n_lhs, n_lower = n_a, n_upper = n_b) + rv.set_ast_link(t_range) + rv.set_ast_link(t_in) elif isinstance(n_a.typ, ast.Builtin_String): rv = ast.Binary_Expression( @@ -745,13 +831,16 @@ def parse_relation(self, scope): operator = ast.Binary_Operator.STRING_CONTAINS, n_lhs = n_lhs, n_rhs = n_a) + rv.set_ast_link(t_in) elif isinstance(n_a.typ, ast.Array_Type): + a_op = ast.Binary_Operator.ARRAY_CONTAINS + t_in.ast_link = a_op rv = ast.Binary_Expression( mh = self.mh, location = t_in.location, typ = self.builtin_bool, - operator = ast.Binary_Operator.ARRAY_CONTAINS, + operator = a_op, n_lhs = n_lhs, n_rhs = n_a) @@ -762,11 +851,13 @@ def parse_relation(self, scope): " not for %s" % n_a.typ.name) if t_not is not None: + a_unary_op = ast.Unary_Operator.LOGICAL_NOT + t_not.ast_link = a_unary_op rv = ast.Unary_Expression( mh = self.mh, location = t_not.location, typ = self.builtin_bool, - operator = ast.Unary_Operator.LOGICAL_NOT, + operator = a_unary_op, n_operand = rv) return rv @@ -790,6 +881,8 @@ def parse_simple_expression(self, scope): self.nt.value in Parser.ADDING_OPERATOR: self.match("OPERATOR") t_unary = self.ct + a_unary = un_add_map[t_unary.value] + t_unary.ast_link = a_unary has_explicit_brackets = self.peek("BRA") else: t_unary = None @@ -809,7 +902,7 @@ def parse_simple_expression(self, scope): mh = self.mh, location = t_unary.location, typ = n_lhs.typ, - operator = un_add_map[t_unary.value], + operator = a_unary, n_operand = n_lhs) if isinstance(n_lhs.typ, ast.Builtin_String): @@ -821,12 +914,14 @@ def parse_simple_expression(self, scope): self.nt.value in Parser.ADDING_OPERATOR: self.match("OPERATOR") t_op = self.ct + a_op = bin_add_map[t_op.value] + t_op.ast_link = a_op n_rhs = self.parse_term(scope) n_lhs = ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = rtyp, - operator = bin_add_map[t_op.value], + operator = a_op, n_lhs = n_lhs, n_rhs = n_rhs) @@ -846,12 +941,14 @@ def parse_term(self, scope): self.nt.value in Parser.MULTIPLYING_OPERATOR: self.match("OPERATOR") t_op = self.ct + a_op = mul_map[t_op.value] + t_op.ast_link = a_op n_rhs = self.parse_factor(scope) n_lhs = ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = n_lhs.typ, - operator = mul_map[t_op.value], + operator = a_op, n_lhs = n_lhs, n_rhs = n_rhs) @@ -865,22 +962,26 @@ def parse_factor(self, scope): self.match_kw("not") t_op = self.ct n_operand = self.parse_primary(scope) + a_not = ast.Unary_Operator.LOGICAL_NOT + t_op.ast_link = a_not return ast.Unary_Expression( mh = self.mh, location = t_op.location, typ = self.builtin_bool, - operator = ast.Unary_Operator.LOGICAL_NOT, + operator = a_not, n_operand = n_operand) elif self.peek_kw("abs"): self.match_kw("abs") t_op = self.ct n_operand = self.parse_primary(scope) + a_abs = ast.Unary_Operator.ABSOLUTE_VALUE + t_op.ast_link = a_abs return ast.Unary_Expression( mh = self.mh, location = t_op.location, typ = n_operand.typ, - operator = ast.Unary_Operator.ABSOLUTE_VALUE, + operator = a_abs, n_operand = n_operand) else: @@ -890,11 +991,13 @@ def parse_factor(self, scope): t_op = self.ct n_rhs = self.parse_primary(scope) rhs_value = n_rhs.evaluate(self.mh, None) + a_binary = ast.Binary_Operator.POWER + t_op.ast_link = a_binary n_lhs = ast.Binary_Expression( mh = self.mh, location = t_op.location, typ = n_lhs.typ, - operator = ast.Binary_Operator.POWER, + operator = a_binary, n_lhs = n_lhs, n_rhs = n_rhs) if rhs_value.value < 0: @@ -909,36 +1012,49 @@ def parse_primary(self, scope): if self.peek("INTEGER"): # lobster-trace: LRM.Integer_Values self.match("INTEGER") - return ast.Integer_Literal(self.ct, self.builtin_int) + int_lit = ast.Integer_Literal(self.ct, self.builtin_int) + int_lit.set_ast_link(self.ct) + return int_lit elif self.peek("DECIMAL"): # lobster-trace: LRM.Decimal_Values self.match("DECIMAL") - return ast.Decimal_Literal(self.ct, self.builtin_decimal) + dec_lit = ast.Decimal_Literal(self.ct, self.builtin_decimal) + dec_lit.set_ast_link(self.ct) + return dec_lit elif self.peek("STRING"): # lobster-trace: LRM.String_Values self.match("STRING") - return ast.String_Literal(self.ct, self.builtin_str) + string_lit = ast.String_Literal(self.ct, self.builtin_str) + string_lit.set_ast_link(self.ct) + return string_lit elif self.peek_kw("true") or self.peek_kw("false"): # lobster-trace: LRM.Boolean_Values self.match("KEYWORD") - return ast.Boolean_Literal(self.ct, self.builtin_bool) + bool_lit = ast.Boolean_Literal(self.ct, self.builtin_bool) + bool_lit.set_ast_link(self.ct) + return bool_lit elif self.peek_kw("null"): self.match_kw("null") - return ast.Null_Literal(self.ct) + null_lit = ast.Null_Literal(self.ct) + null_lit.set_ast_link(self.ct) + return null_lit elif self.peek("BRA"): self.match("BRA") + t_bra = self.ct if self.peek_kw("forall") or self.peek_kw("exists"): rv = self.parse_quantified_expression(scope) elif self.peek_kw("if"): rv = self.parse_conditional_expression(scope) else: rv = self.parse_expression(scope) + rv.set_ast_link(t_bra) self.match("KET") + rv.set_ast_link(self.ct) return rv else: @@ -949,9 +1065,11 @@ def parse_quantified_expression(self, scope): if self.peek_kw("forall"): self.match_kw("forall") + t_quantified = self.ct universal = True else: self.match_kw("exists") + t_quantified = self.ct universal = False loc = self.ct.location self.match("IDENTIFIER") @@ -964,17 +1082,21 @@ def parse_quantified_expression(self, scope): pdef.name, self.mh.cross_file_reference(pdef.location))) self.match_kw("in") + t_in = self.ct self.match("IDENTIFIER") field = scope.lookup(self.mh, self.ct, ast.Composite_Component) n_source = ast.Name_Reference(self.ct.location, field) + n_source.set_ast_link(self.ct) if not isinstance(field.n_typ, ast.Array_Type): self.mh.error(self.ct.location, "you can only quantify over arrays") n_var = ast.Quantified_Variable(t_qv.value, t_qv.location, field.n_typ.element_type) + n_var.set_ast_link(t_qv) self.match("ARROW") + t_arrow = self.ct new_table = ast.Symbol_Table() new_table.register(self.mh, n_var) @@ -982,13 +1104,20 @@ def parse_quantified_expression(self, scope): n_expr = self.parse_expression(scope) scope.pop() - return ast.Quantified_Expression(mh = self.mh, - location = loc, - typ = self.builtin_bool, - universal = universal, - n_variable = n_var, - n_source = n_source, - n_expr = n_expr) + quantified_expression = ast.Quantified_Expression( + mh = self.mh, + location = loc, + typ = self.builtin_bool, + universal = universal, + n_variable = n_var, + n_source = n_source, + n_expr = n_expr) + + quantified_expression.set_ast_link(t_quantified) + quantified_expression.set_ast_link(t_in) + quantified_expression.set_ast_link(t_arrow) + + return quantified_expression def parse_conditional_expression(self, scope): # lobster-trace: LRM.Conditional_Expression @@ -999,6 +1128,7 @@ def parse_conditional_expression(self, scope): t_if = self.ct if_cond = self.parse_expression(scope) self.match_kw("then") + t_then = self.ct if_expr = self.parse_expression(scope) if if_expr.typ is None: self.mh.error(if_expr.location, @@ -1007,17 +1137,23 @@ def parse_conditional_expression(self, scope): rv = ast.Conditional_Expression(location = t_if.location, if_action = if_action) + if_action.set_ast_link(t_if) + if_action.set_ast_link(t_then) while self.peek_kw("elsif"): self.match_kw("elsif") t_elsif = self.ct elsif_cond = self.parse_expression(scope) self.match_kw("then") + t_then = self.ct elsif_expr = self.parse_expression(scope) elsif_action = ast.Action(self.mh, t_elsif, elsif_cond, elsif_expr) + elsif_action.set_ast_link(t_elsif) + elsif_action.set_ast_link(t_then) rv.add_elsif(self.mh, elsif_action) self.match_kw("else") + rv.set_ast_link(self.ct) else_expr = self.parse_expression(scope) rv.set_else_part(self.mh, else_expr) @@ -1042,14 +1178,22 @@ def parse_builtin(self, scope, n_name, t_name): # Parse the arguments. parameters = [] + n_name.set_ast_link(self.ct) self.match("BRA") + n_name.set_ast_link(self.ct) while not self.peek("KET"): - parameters.append(self.parse_expression(scope)) + exp = self.parse_expression(scope) + if not self.ct.ast_link: + exp.set_ast_link(self.ct) + parameters.append(exp) + if self.peek("COMMA"): self.match("COMMA") + n_name.set_ast_link(self.ct) else: break self.match("KET") + n_name.set_ast_link(self.ct) # Enforce arity if isinstance(n_name, ast.Builtin_Function): @@ -1184,13 +1328,17 @@ def parse_name(self, scope): # Enum literals are a bit different, so we deal with themq # first. if isinstance(n_name, ast.Enumeration_Type): + n_name.set_ast_link(self.ct) self.match("DOT") + n_name.set_ast_link(self.ct) self.match("IDENTIFIER") lit = n_name.literals.lookup(self.mh, self.ct, ast.Enumeration_Literal_Spec) - return ast.Enumeration_Literal(location = self.ct.location, - literal = lit) + enum_lit = ast.Enumeration_Literal(location = self.ct.location, + literal = lit) + enum_lit.set_ast_link(self.ct) + return enum_lit # Anything that remains is either a function call or an actual # name. Let's just enforce this for sanity. @@ -1220,7 +1368,7 @@ def parse_name(self, scope): # | name '[' expression ']' n_name = ast.Name_Reference(location = self.ct.location, entity = n_name) - + n_name.set_ast_link(self.ct) while self.peek("DOT") or self.peek("S_BRA"): if self.peek("DOT"): if not isinstance(n_name.typ, ast.Tuple_Type): @@ -1230,15 +1378,18 @@ def parse_name(self, scope): (n_name.to_string(), n_name.typ.name)) self.match("DOT") + t_dot = self.ct self.match("IDENTIFIER") n_field = n_name.typ.components.lookup(self.mh, self.ct, ast.Composite_Component) + n_field.set_ast_link(self.ct) n_name = ast.Field_Access_Expression( mh = self.mh, location = self.ct.location, n_prefix = n_name, n_field = n_field) + n_name.set_ast_link(t_dot) elif self.peek("S_BRA"): if not isinstance(n_name.typ, ast.Array_Type): @@ -1252,12 +1403,15 @@ def parse_name(self, scope): t_bracket = self.ct n_index = self.parse_expression(scope) self.match("S_KET") + a_binary = ast.Binary_Operator.INDEX + t_bracket.ast_link = a_binary + self.ct.ast_link = a_binary n_name = ast.Binary_Expression( mh = self.mh, location = t_bracket.location, typ = n_name.typ.element_type, - operator = ast.Binary_Operator.INDEX, + operator = a_binary, n_lhs = n_name, n_rhs = n_index) @@ -1265,7 +1419,9 @@ def parse_name(self, scope): def parse_check_block(self): # lobster-trace: LRM.Check_Block + t_severity = None self.match_kw("checks") + t_checks = self.ct self.match("IDENTIFIER") # lobster-trace: LRM.Applicable_Types # lobster-trace: LRM.Applicable_Components @@ -1274,11 +1430,14 @@ def parse_check_block(self): ast.Composite_Type) n_check_block = ast.Check_Block(location = self.ct.location, n_typ = n_ctype) + n_check_block.set_ast_link(t_checks) + n_ctype.set_ast_link(self.ct) scope = ast.Scope() scope.push(self.stab) scope.push(self.cu.package.symbols) scope.push(n_ctype.components) self.match("C_BRA") + n_check_block.set_ast_link(self.ct) while not self.peek("C_KET"): c_expr = self.parse_expression(scope) if not isinstance(c_expr.typ, ast.Builtin_Boolean): @@ -1286,8 +1445,10 @@ def parse_check_block(self): "check expression must be Boolean") self.match("COMMA") + t_first_comma = self.ct if self.peek("KEYWORD"): self.match("KEYWORD") + t_severity = self.ct if self.ct.value not in ("warning", "error", "fatal"): self.mh.error(self.ct.location, "expected warning|error|fatal") @@ -1307,6 +1468,7 @@ def parse_check_block(self): has_anchor = False if self.peek("COMMA"): self.match("COMMA") + t_second_comma = self.ct if self.peek("IDENTIFIER"): has_anchor = True elif self.peek("STRING"): @@ -1318,10 +1480,12 @@ def parse_check_block(self): if has_extrainfo: self.match("STRING") + t_extrainfo = self.ct c_extrainfo = self.ct.value if self.peek("COMMA"): self.match("COMMA") + t_third_comma = self.ct has_anchor = True else: @@ -1329,6 +1493,7 @@ def parse_check_block(self): if has_anchor: self.match("IDENTIFIER") + t_anchor = self.ct c_anchor = n_ctype.components.lookup(self.mh, self.ct, ast.Composite_Component) @@ -1342,18 +1507,33 @@ def parse_check_block(self): t_message = t_msg, extrainfo = c_extrainfo) + n_check.set_ast_link(t_first_comma) + if t_severity: + n_check.set_ast_link(t_severity) + n_check.set_ast_link(t_msg) + if c_extrainfo or c_anchor: + n_check.set_ast_link(t_second_comma) + if c_extrainfo: + n_check.set_ast_link(t_extrainfo) + if c_anchor: + c_anchor.set_ast_link(t_anchor) + if c_anchor and c_extrainfo: + n_check.set_ast_link(t_third_comma) + n_ctype.add_check(n_check) n_check_block.add_check(n_check) assert scope.size() == 3 self.match("C_KET") + n_check_block.set_ast_link(self.ct) return n_check_block def parse_section_declaration(self): # lobster-trace: LRM.Section_Declaration self.match_kw("section") + t_section = self.ct self.match("STRING") if self.section: sec = ast.Section(name = self.ct.value, @@ -1363,11 +1543,15 @@ def parse_section_declaration(self): sec = ast.Section(name = self.ct.value, location = self.ct.location, parent = None) + sec.set_ast_link(self.ct) + sec.set_ast_link(t_section) self.section.append(sec) self.match("C_BRA") + sec.set_ast_link(self.ct) while not self.peek("C_KET"): self.parse_trlc_entry() self.match("C_KET") + sec.set_ast_link(self.ct) self.section.pop() def parse_boolean(self): @@ -1392,15 +1576,18 @@ def parse_value(self, typ): e_op = (ast.Unary_Operator.PLUS if t_op.value == "+" else ast.Unary_Operator.MINUS) + t_op.ast_link = e_op else: t_op = None if isinstance(typ, ast.Builtin_Decimal): self.match("DECIMAL") rv = ast.Decimal_Literal(self.ct, self.builtin_decimal) + rv.set_ast_link(self.ct) elif isinstance(typ, ast.Builtin_Integer): self.match("INTEGER") rv = ast.Integer_Literal(self.ct, self.builtin_int) + rv.set_ast_link(self.ct) else: assert False @@ -1420,19 +1607,26 @@ def parse_value(self, typ): elif isinstance(typ, ast.Builtin_String): # lobster-trace: LRM.String_Values self.match("STRING") - return ast.String_Literal(self.ct, self.builtin_str) + rv = ast.String_Literal(self.ct, self.builtin_str) + rv.set_ast_link(self.ct) + return rv elif isinstance(typ, ast.Builtin_Boolean): - return self.parse_boolean() + rv = self.parse_boolean() + rv.set_ast_link(self.ct) + return rv elif isinstance(typ, ast.Array_Type): self.match("S_BRA") rv = ast.Array_Aggregate(self.ct.location, typ) + rv.set_ast_link(self.ct) while not self.peek("S_KET"): - rv.append(self.parse_value(typ.element_type)) + array_elem = self.parse_value(typ.element_type) + rv.append(array_elem) if self.peek("COMMA"): self.match("COMMA") + rv.set_ast_link(self.ct) elif self.peek("S_KET") or self.nt is None: break else: @@ -1442,6 +1636,7 @@ def parse_value(self, typ): fatal = False) self.match("S_KET") + rv.set_ast_link(self.ct) if len(rv.value) < typ.lower_bound: self.mh.error(self.ct.location, @@ -1463,10 +1658,12 @@ def parse_value(self, typ): elif isinstance(typ, ast.Enumeration_Type): enum = self.parse_qualified_name(self.default_scope, ast.Enumeration_Type) + enum.set_ast_link(self.ct) if enum != typ: self.mh.error(self.ct.location, "expected %s" % typ.name) self.match("DOT") + enum.set_ast_link(self.ct) self.match("IDENTIFIER") lit = enum.literals.lookup(self.mh, self.ct, @@ -1479,8 +1676,11 @@ def parse_value(self, typ): t_name = self.ct if self.peek("DOT"): self.match("DOT") + t_dot = self.ct self.match("IDENTIFIER") the_pkg = self.stab.lookup(self.mh, t_name, ast.Package) + the_pkg.set_ast_link(t_name) + the_pkg.set_ast_link(t_dot) if not self.cu.is_visible(the_pkg): self.mh.error(self.ct.location, "package must be imported before use") @@ -1492,6 +1692,7 @@ def parse_value(self, typ): name = t_name.value, typ = typ, package = the_pkg) + rv.set_ast_link(t_name) # We can do an early lookup if the target is known if the_pkg.symbols.contains(t_name.value): @@ -1507,12 +1708,13 @@ def parse_value(self, typ): if isinstance(n_item, ast.Composite_Component): if next_is_optional and n_item.optional: break - rv.assign(n_item.name, - self.parse_value(n_item.n_typ)) + value = self.parse_value(n_item.n_typ) + rv.assign(n_item.name, value) elif n_item.token.kind in ("AT", "COLON", "SEMICOLON"): if self.peek(n_item.token.kind): self.match(n_item.token.kind) + n_item.set_ast_link(self.ct) else: next_is_optional = True @@ -1520,6 +1722,7 @@ def parse_value(self, typ): if self.peek("IDENTIFIER") and \ self.nt.value == n_item.token.value: self.match("IDENTIFIER") + n_item.set_ast_link(self.ct) else: next_is_optional = True @@ -1531,6 +1734,7 @@ def parse_value(self, typ): elif isinstance(typ, ast.Tuple_Type) and not typ.has_separators(): self.match("BRA") rv = ast.Tuple_Aggregate(self.ct.location, typ) + rv.set_ast_link(self.ct) first = True for n_field in typ.iter_sequence(): @@ -1538,10 +1742,12 @@ def parse_value(self, typ): first = False else: self.match("COMMA") + rv.set_ast_link(self.ct) rv.assign(n_field.name, self.parse_value(n_field.n_typ)) self.match("KET") + rv.set_ast_link(self.ct) return rv else: @@ -1561,6 +1767,7 @@ def parse_record_object_declaration(self): # lobster-trace: LRM.Section_Declaration r_typ = self.parse_qualified_name(self.default_scope, ast.Record_Type) + r_typ.set_ast_link(self.ct) if r_typ.is_abstract: self.mh.error(self.ct.location, "cannot declare object of abstract record type %s" % @@ -1574,19 +1781,25 @@ def parse_record_object_declaration(self): section = self.section[-1] if self.section else None, n_package = self.cu.package) self.cu.package.symbols.register(self.mh, obj) + obj.set_ast_link(self.ct) self.match("C_BRA") + obj.set_ast_link(self.ct) while not self.peek("C_KET"): self.match("IDENTIFIER") comp = r_typ.components.lookup(self.mh, self.ct, ast.Composite_Component) + comp.set_ast_link(self.ct) if r_typ.is_frozen(comp): self.mh.error(self.ct.location, "cannot overwrite frozen component %s" % comp.name) self.match("ASSIGN") + comp.set_ast_link(self.ct) value = self.parse_value(comp.n_typ) + if not self.ct.ast_link: + value.set_ast_link(self.ct) obj.assign(comp, value) # Check that each non-optional component has been specified @@ -1602,6 +1815,7 @@ def parse_record_object_declaration(self): self.mh.cross_file_reference(comp.location))) self.match("C_KET") + obj.set_ast_link(self.ct) return obj @@ -1621,6 +1835,7 @@ def parse_preamble(self, kind): # First, parse package indication, declaring the package if # needed self.match_kw("package") + t_pkg = self.ct self.match("IDENTIFIER") if kind == "rsl": @@ -1651,6 +1866,9 @@ def parse_preamble(self, kind): (pkg.name, self.mh.cross_file_reference(pkg.location))) + pkg.set_ast_link(t_pkg) + pkg.set_ast_link(self.ct) + # lobster-trace: LRM.Current_Package self.cu.set_package(pkg) @@ -1661,6 +1879,7 @@ def parse_preamble(self, kind): if kind != "check": while self.peek_kw("import"): self.match_kw("import") + pkg.set_ast_link(self.ct) self.match("IDENTIFIER") self.cu.add_import(self.mh, self.ct) @@ -1697,6 +1916,10 @@ def parse_rsl_file(self): self.match_eof() + for tok in self.lexer.tokens: + if tok.kind == "COMMENT": + self.cu.package.set_ast_link(tok) + return ok def parse_check_file(self): @@ -1731,6 +1954,10 @@ def parse_check_file(self): self.match_eof() + for tok in self.lexer.tokens: + if tok.kind == "COMMENT": + self.cu.package.set_ast_link(tok) + return ok def parse_trlc_file(self): @@ -1773,4 +2000,8 @@ def parse_trlc_file(self): self.match_eof() + for tok in self.lexer.tokens: + if tok.kind == "COMMENT": + self.cu.package.set_ast_link(tok) + return ok diff --git a/trlc/trlc.py b/trlc/trlc.py index c1d6352f..e7b41823 100644 --- a/trlc/trlc.py +++ b/trlc/trlc.py @@ -30,7 +30,7 @@ from trlc import lint from trlc.errors import TRLC_Error, Location, Message_Handler, Kind from trlc.parser import Parser -from trlc.lexer import TRLC_Lexer +from trlc.lexer import Token_Stream from trlc.version import TRLC_VERSION, BUGS_URL # pylint: disable=unused-import @@ -160,7 +160,7 @@ def create_parser(self, file_name, file_content=None, primary_file=True): assert isinstance(file_content, str) or file_content is None assert isinstance(primary_file, bool) - lexer = TRLC_Lexer(self.mh, file_name, file_content) + lexer = Token_Stream(self.mh, file_name, file_content) return Parser(mh = self.mh, stab = self.stab,