From 9f6bf436f54160b65d7680aab9094b2d98cc51a4 Mon Sep 17 00:00:00 2001
From: Victor Lei <victorlei@gmail.com>
Date: Mon, 29 Aug 2016 03:25:13 +0300
Subject: [PATCH] Track END_STMT tokens and generate END_FUNCTION tokens where
 appropriate

---
 smop/lexer.py | 40 +++++++++++++++++++++++++---------------
 smop/parse.py |  3 +++
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/smop/lexer.py b/smop/lexer.py
index ba543840..890ec3b1 100644
--- a/smop/lexer.py
+++ b/smop/lexer.py
@@ -9,6 +9,9 @@
 import readline
 import options
 
+class unbalanced_end(Exception):
+    pass
+
 class IllegalCharacterError(Exception):
     pass
 
@@ -20,7 +23,8 @@ class IllegalCharacterError(Exception):
            "MINUS","MINUSMINUS","MINUSEQ","MUL","MULEQ","NE", "NEG",
            "NUMBER", "OR","OREQ", "OROR", "PLUS", "PLUSEQ","PLUSPLUS",
            "RBRACE", "RBRACKET", "RPAREN", "SEMI", "STRING",
-           "TRANSPOSE", "ERROR_STMT", "COMMENT", "END_FUNCTION","POW", ]
+           "TRANSPOSE", "ERROR_STMT", "COMMENT", "END_FUNCTION",
+           "END_UNEXPECTED","POW", ]
 
 reserved = {
     "break"                  : "BREAK",
@@ -149,22 +153,28 @@ def t_IDENT(t):
             # is illegal, but foo.return=1 is fine.
             t.type = "FIELD"
             return t
-        if t.value == "endfunction":
-            t.type = "END_FUNCTION"
+        if (t.value == "end" and (t.lexer.parens > 0 or
+                                  t.lexer.brackets > 0 or
+                                  t.lexer.braces > 0)):
+            t.type = "END_EXPR"
             return t
-        if t.value in ("endwhile", "endif","endfor",
-                       "endswitch","end_try_catch"):
-            t.type = "END_STMT"
-            return t
-        if t.value == "end":
-            if (t.lexer.parens > 0 or
-                t.lexer.brackets > 0 or
-                t.lexer.braces > 0):
-                t.type = "END_EXPR"
+        if t.value in ("end","endif","endfunction","endwhile",
+                           "endfor","endswitch","end_try_catch"):
+            keyword = t.lexer.stack.pop() # if,while,etc.
+            #assert keyword == t.value or keyword == "try"
+            if keyword == "function":
+                t.type = "END_FUNCTION"
             else:
                 t.type = "END_STMT"
+            return t
         else:
             t.type = reserved.get(t.value,"IDENT")
+            if t.value in ("if","function","while",
+                           "for","switch","try"):
+                # lexer stack may contain only these
+                # six words, ever, because there is
+                # one place to push -- here
+                t.lexer.stack.append(t.value)
             if (t.type != "IDENT" and 
                 t.lexer.lexdata[t.lexer.lexpos]=="'"):
                 t.lexer.begin("afterkeyword")
@@ -326,9 +336,9 @@ def t_error(t):
 
     lexer = lex.lex(reflags=re.MULTILINE)
     lexer.brackets = 0  # count open square brackets
-    lexer.parens = 0    # count open parentheses
-    lexer.braces = 0    # count open curly braces
-    lexer.stack  = []
+    lexer.parens   = 0  # count open parentheses
+    lexer.braces   = 0  # count open curly braces
+    lexer.stack    = []
     return lexer
 
 def main():
diff --git a/smop/parse.py b/smop/parse.py
index 78e19380..de18db46 100644
--- a/smop/parse.py
+++ b/smop/parse.py
@@ -831,6 +831,9 @@ def p_error(p):
         #print "Discarded comment", p.value
         parser.errok()
         return
+    if p.type == "END_UNEXPECTED":
+        raise syntax_error(p)
+
     raise syntax_error(p)
 
 parser = yacc.yacc(start="top")