diff --git a/mdsplit.py b/mdsplit.py index 19e2234..b64da0d 100755 --- a/mdsplit.py +++ b/mdsplit.py @@ -124,9 +124,12 @@ class Line: """ Detect code blocks and ATX headings. - Headings according to commonmark: + Headings are detected according to commonmark, e.g.: - only 6 valid levels - up to three spaces before the first # is ok + - empty heading is valid + - closing hashes are stripped + - whitespace around title are stripped """ def __init__(self, line): @@ -134,10 +137,17 @@ def __init__(self, line): self.heading_level = 0 self.heading_title = None - result = re.search("^[ ]?[ ]?[ ]?(#+) (.*)", line) - if result is not None and len(result[1]) <= MAX_HEADING_LEVEL: + result = re.search("^[ ]{0,3}(#+)(.*)", line) + if result is not None and (len(result[1]) <= MAX_HEADING_LEVEL): + title = result[2] + if len(title) > 0 and not (title.startswith(" ") or title.startswith("\t")): + # if there is a title it must start with space or tab + return self.heading_level = len(result[1]) - self.heading_title = result[2] + + # TODO strip whitespace and closing hashes + title = title.strip().rstrip("#").rstrip() + self.heading_title = title def is_fence(self): for fence in FENCES: diff --git a/test_mdsplit.py b/test_mdsplit.py index 6acee6a..3037d1b 100644 --- a/test_mdsplit.py +++ b/test_mdsplit.py @@ -42,6 +42,22 @@ def test_line(): line = Line(" # four spaces are too much") assert not line.is_heading() + line = Line("#At least one space or tab required after heading") + assert not line.is_heading() + + line = Line("#\ta tab is ok") + assert line.is_heading() + + line = Line("###") # headings without title (also without a space) are allowed + assert line.heading_level == 3 + assert line.heading_title == "" + + line = Line("#\t please strip\t\t ") + assert line.heading_title == "please strip" + + line = Line("## strip rightmost hashes ######### ") + assert line.heading_title == "strip rightmost hashes" + @pytest.mark.parametrize("max_level", [1, 3]) def test_split_by_heading_simple(max_level):