From 9f6bf436f54160b65d7680aab9094b2d98cc51a4 Mon Sep 17 00:00:00 2001 From: Victor Lei Date: Mon, 29 Aug 2016 03:25:13 +0300 Subject: [PATCH] Track END_STMT tokens and generate END_FUNCTION tokens where appropriate --- smop/lexer.py | 40 +++++++++++++++++++++++++--------------- smop/parse.py | 3 +++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/smop/lexer.py b/smop/lexer.py index ba543840..890ec3b1 100644 --- a/smop/lexer.py +++ b/smop/lexer.py @@ -9,6 +9,9 @@ import readline import options +class unbalanced_end(Exception): + pass + class IllegalCharacterError(Exception): pass @@ -20,7 +23,8 @@ class IllegalCharacterError(Exception): "MINUS","MINUSMINUS","MINUSEQ","MUL","MULEQ","NE", "NEG", "NUMBER", "OR","OREQ", "OROR", "PLUS", "PLUSEQ","PLUSPLUS", "RBRACE", "RBRACKET", "RPAREN", "SEMI", "STRING", - "TRANSPOSE", "ERROR_STMT", "COMMENT", "END_FUNCTION","POW", ] + "TRANSPOSE", "ERROR_STMT", "COMMENT", "END_FUNCTION", + "END_UNEXPECTED","POW", ] reserved = { "break" : "BREAK", @@ -149,22 +153,28 @@ def t_IDENT(t): # is illegal, but foo.return=1 is fine. t.type = "FIELD" return t - if t.value == "endfunction": - t.type = "END_FUNCTION" + if (t.value == "end" and (t.lexer.parens > 0 or + t.lexer.brackets > 0 or + t.lexer.braces > 0)): + t.type = "END_EXPR" return t - if t.value in ("endwhile", "endif","endfor", - "endswitch","end_try_catch"): - t.type = "END_STMT" - return t - if t.value == "end": - if (t.lexer.parens > 0 or - t.lexer.brackets > 0 or - t.lexer.braces > 0): - t.type = "END_EXPR" + if t.value in ("end","endif","endfunction","endwhile", + "endfor","endswitch","end_try_catch"): + keyword = t.lexer.stack.pop() # if,while,etc. + #assert keyword == t.value or keyword == "try" + if keyword == "function": + t.type = "END_FUNCTION" else: t.type = "END_STMT" + return t else: t.type = reserved.get(t.value,"IDENT") + if t.value in ("if","function","while", + "for","switch","try"): + # lexer stack may contain only these + # six words, ever, because there is + # one place to push -- here + t.lexer.stack.append(t.value) if (t.type != "IDENT" and t.lexer.lexdata[t.lexer.lexpos]=="'"): t.lexer.begin("afterkeyword") @@ -326,9 +336,9 @@ def t_error(t): lexer = lex.lex(reflags=re.MULTILINE) lexer.brackets = 0 # count open square brackets - lexer.parens = 0 # count open parentheses - lexer.braces = 0 # count open curly braces - lexer.stack = [] + lexer.parens = 0 # count open parentheses + lexer.braces = 0 # count open curly braces + lexer.stack = [] return lexer def main(): diff --git a/smop/parse.py b/smop/parse.py index 78e19380..de18db46 100644 --- a/smop/parse.py +++ b/smop/parse.py @@ -831,6 +831,9 @@ def p_error(p): #print "Discarded comment", p.value parser.errok() return + if p.type == "END_UNEXPECTED": + raise syntax_error(p) + raise syntax_error(p) parser = yacc.yacc(start="top")