From 8afb2a5a3f050b822e7b49840afc220a2aeafb59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ely=C3=A9zer=20Rezende?= Date: Fri, 17 Jun 2016 17:11:26 -0300 Subject: [PATCH] Improve docstring parser Using split may lead to some unpredicted parsing. Use regex to improve the parser in order to avoid that. For example: """Some content before tokens More content, but with : more content @token1: value1 @token2: value2 """ Using the old parser and the docstring above that will generate a invalid token like the following: Some content before tokens More content, but with : more content In the above snippet all content up to `:` is the token and all content after `:` is the value, which is not valid since the it does not start with the `@`. On the other hand the new parser gets just the expected tokens. Also with the new parsing a blank line is expected after each token value. Which will ensure better generation of documentation by Sphinx. --- testimony/parser.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/testimony/parser.py b/testimony/parser.py index be9b057..0d1af53 100644 --- a/testimony/parser.py +++ b/testimony/parser.py @@ -1,7 +1,11 @@ # coding=utf-8 """Docstring parser utilities for Testimony.""" +import re + from testimony.constants import DEFAULT_MINIMUM_TOKENS, DEFAULT_TOKENS +TOKEN_RE = re.compile(r'^@(\w+):\s+([^@]+)(\n|$)', flags=re.MULTILINE) + class DocstringParser(object): """Parse docstring extracting tokens.""" @@ -46,19 +50,13 @@ def parse(self, docstring=None): return {}, {} valid_tokens = {} invalid_tokens = {} - for line in docstring.split('@'): - line = line.rstrip() - # Sometimes there are double new line characters in the middle. We - # need only one of those to print - line = line.replace('\n\n', '\n') - if len(line) > 0 and ':' in line: - token, value = line.split(':', 1) - token = token.lower() - value = value.strip() - if token in self.tokens: - valid_tokens[token] = value - else: - invalid_tokens[token] = value + for match in TOKEN_RE.finditer(docstring): + token = match.group(1).strip().lower() + value = match.group(2).strip() + if token in self.tokens: + valid_tokens[token] = value + else: + invalid_tokens[token] = value return valid_tokens, invalid_tokens def validate_tokens(self, tokens):