From 8afb2a5a3f050b822e7b49840afc220a2aeafb59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ely=C3=A9zer=20Rezende?= <elyezermr@gmail.com>
Date: Fri, 17 Jun 2016 17:11:26 -0300
Subject: [PATCH] Improve docstring parser

Using split may lead to some unpredicted parsing. Use regex to improve the
parser in order to avoid that.

For example:

    """Some content before tokens

    More content, but with : more content

    @token1: value1

    @token2: value2
    """

Using the old parser and the docstring above that will generate a invalid
token like the following:

    Some content before tokens
    More content, but with :
      more content

In the above snippet all content up to `:` is the token and all content after
`:` is the value, which is not valid since the it does not start with the `@`.

On the other hand the new parser gets just the expected tokens.

Also with the new parsing a blank line is expected after each token value.
Which will ensure better generation of documentation by Sphinx.
---
 testimony/parser.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/testimony/parser.py b/testimony/parser.py
index be9b057..0d1af53 100644
--- a/testimony/parser.py
+++ b/testimony/parser.py
@@ -1,7 +1,11 @@
 # coding=utf-8
 """Docstring parser utilities for Testimony."""
+import re
+
 from testimony.constants import DEFAULT_MINIMUM_TOKENS, DEFAULT_TOKENS
 
+TOKEN_RE = re.compile(r'^@(\w+):\s+([^@]+)(\n|$)', flags=re.MULTILINE)
+
 
 class DocstringParser(object):
     """Parse docstring extracting tokens."""
@@ -46,19 +50,13 @@ def parse(self, docstring=None):
             return {}, {}
         valid_tokens = {}
         invalid_tokens = {}
-        for line in docstring.split('@'):
-            line = line.rstrip()
-            # Sometimes there are double new line characters in the middle.  We
-            # need only one of those to print
-            line = line.replace('\n\n', '\n')
-            if len(line) > 0 and ':' in line:
-                token, value = line.split(':', 1)
-                token = token.lower()
-                value = value.strip()
-                if token in self.tokens:
-                    valid_tokens[token] = value
-                else:
-                    invalid_tokens[token] = value
+        for match in TOKEN_RE.finditer(docstring):
+            token = match.group(1).strip().lower()
+            value = match.group(2).strip()
+            if token in self.tokens:
+                valid_tokens[token] = value
+            else:
+                invalid_tokens[token] = value
         return valid_tokens, invalid_tokens
 
     def validate_tokens(self, tokens):