From b570e1bda7732c37b748155fb5dc2716c0517a20 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Sat, 6 Apr 2024 12:06:32 +0700 Subject: [PATCH] Fix link reference definition parsing with invalid title An input like this: ``` [foo]: /url "title" bad ``` Means the second line is just a paragraph because only spaces/tabs are allowed after a title. The parser used to set the title to "title" in this case and assign the source span of the second line to the definition, which is wrong. Fixes #315. --- CHANGELOG.md | 7 ++++++ .../LinkReferenceDefinitionParser.java | 10 ++++++-- .../LinkReferenceDefinitionParserTest.java | 24 +++++++++++++++++++ .../org/commonmark/test/SourceSpansTest.java | 9 +++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e31b8d15..df6beb20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html), with the exception that 0.x versions can break between minor versions. +## Unreleased +### Fixed +- Fix parsing of link reference definitions where it looks like it has a title + but it doesn't because it's followed by characters other than space/tab. In that + case, the title was set to the partially-parsed title and the source spans were + wrong (#315). + ## [0.22.0] - 2024-03-15 ### Added - New `MarkdownRenderer` for rendering nodes to Markdown (CommonMark)! diff --git a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java index ebe6ebb2..b58e669e 100644 --- a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java @@ -15,7 +15,7 @@ /** * Parser for link reference definitions at the beginning of a paragraph. * - * @see Link reference definitions + * @see Link reference definitions */ public class LinkReferenceDefinitionParser { @@ -70,6 +70,9 @@ public void parse(SourceLine line) { // Parsing failed, which means we fall back to treating text as a paragraph. if (!success) { state = State.PARAGRAPH; + // If parsing of the title part failed, we still have a valid reference that we can add, and we need to + // do it before the source span for this line is added. + finishReference(); return; } } @@ -218,7 +221,8 @@ private boolean startTitle(Scanner scanner) { private boolean title(Scanner scanner) { Position start = scanner.position(); if (!LinkScanner.scanLinkTitleContent(scanner, titleDelimiter)) { - // Invalid title, stop + // Invalid title, stop. Title collected so far must not be used. + title = null; return false; } @@ -235,6 +239,8 @@ private boolean title(Scanner scanner) { scanner.whitespace(); if (scanner.hasNext()) { // spec: No further non-whitespace characters may occur on the line. + // Title collected so far must not be used. + title = null; return false; } referenceValid = true; diff --git a/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java b/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java index b4f57739..3f22adac 100644 --- a/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java +++ b/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java @@ -145,6 +145,30 @@ public void testTitleMultiline2() { assertDef(parser.getDefinitions().get(0), "foo", "/url", "\ntitle"); } + @Test + public void testTitleMultiline3() { + parse("[foo]: /url"); + assertEquals(State.START_TITLE, parser.getState()); + // Note that this looks like a valid title until we parse "bad", at which point we need to treat the whole line + // as a paragraph line and discard any already parsed title. + parse("\"title\" bad"); + assertEquals(State.PARAGRAPH, parser.getState()); + + assertDef(parser.getDefinitions().get(0), "foo", "/url", null); + } + + @Test + public void testTitleMultiline4() { + parse("[foo]: /url"); + assertEquals(State.START_TITLE, parser.getState()); + parse("(title"); + assertEquals(State.TITLE, parser.getState()); + parse("foo("); + assertEquals(State.PARAGRAPH, parser.getState()); + + assertDef(parser.getDefinitions().get(0), "foo", "/url", null); + } + @Test public void testTitleInvalid() { assertParagraph("[foo]: /url (invalid("); diff --git a/commonmark/src/test/java/org/commonmark/test/SourceSpansTest.java b/commonmark/src/test/java/org/commonmark/test/SourceSpansTest.java index 95959893..59241e49 100644 --- a/commonmark/src/test/java/org/commonmark/test/SourceSpansTest.java +++ b/commonmark/src/test/java/org/commonmark/test/SourceSpansTest.java @@ -183,6 +183,15 @@ public void linkReferenceDefinitionWithTitle() { assertEquals(List.of(SourceSpan.of(1, 0, 11)), def2.getSourceSpans()); } + @Test + public void linkReferenceDefinitionWithTitleInvalid() { + var doc = PARSER.parse("[foo]: /url\n\"title\" ok\n"); + var def = Nodes.find(doc, LinkReferenceDefinition.class); + var paragraph = Nodes.find(doc, Paragraph.class); + assertEquals(List.of(SourceSpan.of(0, 0, 11)), def.getSourceSpans()); + assertEquals(List.of(SourceSpan.of(1, 0, 10)), paragraph.getSourceSpans()); + } + @Test public void linkReferenceDefinitionHeading() { // This is probably the trickiest because we have a link reference definition at the start of a paragraph