Improve docstring parser

Using split may lead to some unpredicted parsing. Use regex to improve the parser in order to avoid that. For example: """Some content before tokens More content, but with : more content @Token1: value1 @token2: value2 """ Using the old parser and the docstring above that will generate a invalid token like the following: Some content before tokens More content, but with : more content In the above snippet all content up to `:` is the token and all content after `:` is the value, which is not valid since the it does not start with the `@`. On the other hand the new parser gets just the expected tokens. Also with the new parsing a blank line is expected after each token value. Which will ensure better generation of documentation by Sphinx.
SatelliteQE · Jun 17, 2016 · 8afb2a5 · 8afb2a5
1 parent 2dae21f
commit 8afb2a5
Showing 1 changed file with 11 additions and 13 deletions.
diff --git a/testimony/parser.py b/testimony/parser.py
@@ -1,7 +1,11 @@
 # coding=utf-8
 """Docstring parser utilities for Testimony."""
+import re
+
 from testimony.constants import DEFAULT_MINIMUM_TOKENS, DEFAULT_TOKENS
 
+TOKEN_RE = re.compile(r'^@(\w+):\s+([^@]+)(\n|$)', flags=re.MULTILINE)
+
 
 class DocstringParser(object):
     """Parse docstring extracting tokens."""
@@ -46,19 +50,13 @@ def parse(self, docstring=None):
             return {}, {}
         valid_tokens = {}
         invalid_tokens = {}
-        for line in docstring.split('@'):
-            line = line.rstrip()
-            # Sometimes there are double new line characters in the middle.  We
-            # need only one of those to print
-            line = line.replace('\n\n', '\n')
-            if len(line) > 0 and ':' in line:
-                token, value = line.split(':', 1)
-                token = token.lower()
-                value = value.strip()
-                if token in self.tokens:
-                    valid_tokens[token] = value
-                else:
-                    invalid_tokens[token] = value
+        for match in TOKEN_RE.finditer(docstring):
+            token = match.group(1).strip().lower()
+            value = match.group(2).strip()
+            if token in self.tokens:
+                valid_tokens[token] = value
+            else:
+                invalid_tokens[token] = value
         return valid_tokens, invalid_tokens
 
     def validate_tokens(self, tokens):