From 4c2f72948f261fd52c81b20b34145f9740362156 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Sat, 25 May 2024 22:39:51 +1000 Subject: [PATCH] Allow BlockParsers to return definitions (for lookup during inline parsing) --- .../internal/FootnoteBlockParser.java | 10 +++++ .../internal/FootnoteBracketProcessor.java | 14 +++--- .../ext/footnotes/FootnotesTest.java | 6 +++ .../org/commonmark/internal/Definitions.java | 33 ++++++++++++++ .../commonmark/internal/DocumentParser.java | 45 ++++++++----------- .../internal/InlineParserContextImpl.java | 14 +++--- .../LinkReferenceDefinitionParser.java | 1 + .../commonmark/internal/ParagraphParser.java | 23 ++++++---- .../internal/inline/CoreBracketProcessor.java | 3 +- .../org/commonmark/node/DefinitionMap.java | 30 ++++++++++--- .../parser/InlineParserContext.java | 15 ++++++- .../java/org/commonmark/parser/Parser.java | 3 +- .../parser/block/AbstractBlockParser.java | 8 ++++ .../commonmark/parser/block/BlockParser.java | 9 ++++ .../test/InlineParserContextTest.java | 7 ++- 15 files changed, 166 insertions(+), 55 deletions(-) create mode 100644 commonmark/src/main/java/org/commonmark/internal/Definitions.java diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java index a0c1ea181..55ca86993 100644 --- a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java @@ -2,8 +2,11 @@ import org.commonmark.ext.footnotes.FootnoteDefinition; import org.commonmark.node.Block; +import org.commonmark.node.DefinitionMap; import org.commonmark.parser.block.*; +import java.util.List; + public class FootnoteBlockParser extends AbstractBlockParser { private final FootnoteDefinition block; @@ -40,6 +43,13 @@ public BlockContinue tryContinue(ParserState parserState) { } } + @Override + public List> getDefinitions() { + var map = new DefinitionMap<>(FootnoteDefinition.class); + map.putIfAbsent(block.getLabel(), block); + return List.of(map); + } + public static class Factory implements BlockParserFactory { @Override diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBracketProcessor.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBracketProcessor.java index 6fa4ac3ee..75f98b0bf 100644 --- a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBracketProcessor.java +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBracketProcessor.java @@ -1,5 +1,6 @@ package org.commonmark.ext.footnotes.internal; +import org.commonmark.ext.footnotes.FootnoteDefinition; import org.commonmark.ext.footnotes.FootnoteReference; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.beta.BracketInfo; @@ -13,13 +14,14 @@ public BracketResult process(BracketInfo bracketInfo, Scanner scanner, InlinePar // TODO: Does parsing need to be more strict here? var text = bracketInfo.text(); if (text.startsWith("^")) { - // TODO: Do we need to check if a definition exists before doing this? (That would be the same as reference - // links.) - - // For footnotes, we only ever consume the text part of the link, not the label part (if any). - var position = bracketInfo.afterTextBracket(); var label = text.substring(1); - return BracketResult.replaceWith(new FootnoteReference(label), position); + // Check if we have a definition, otherwise ignore (same behavior as for link reference definitions) + var def = context.getDefinition(FootnoteDefinition.class, label); + if (def != null) { + // For footnotes, we only ever consume the text part of the link, not the label part (if any). + var position = bracketInfo.afterTextBracket(); + return BracketResult.replaceWith(new FootnoteReference(label), position); + } } return BracketResult.none(); } diff --git a/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java index d9e44b5c4..51b722701 100644 --- a/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java +++ b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java @@ -114,6 +114,12 @@ public void testReference() { assertEquals("foo", ref.getLabel()); } + @Test + public void testReferenceNoDefinition() { + var doc = PARSER.parse("Test [^foo]\n"); + assertNull(tryFind(doc, FootnoteReference.class)); + } + // Interesting test cases: // Test [foo][^bar] diff --git a/commonmark/src/main/java/org/commonmark/internal/Definitions.java b/commonmark/src/main/java/org/commonmark/internal/Definitions.java new file mode 100644 index 000000000..0377842c9 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/Definitions.java @@ -0,0 +1,33 @@ +package org.commonmark.internal; + +import org.commonmark.node.DefinitionMap; + +import java.util.HashMap; +import java.util.Map; + +public class Definitions { + + private final Map, DefinitionMap> definitionsByType = new HashMap<>(); + + public void addDefinitions(DefinitionMap definitionMap) { + var existingMap = getMap(definitionMap.getType()); + if (existingMap == null) { + definitionsByType.put(definitionMap.getType(), definitionMap); + } else { + existingMap.addAll(definitionMap); + } + } + + public V getDefinition(Class type, String label) { + var definitionMap = getMap(type); + if (definitionMap == null) { + return null; + } + return definitionMap.get(label); + } + + private DefinitionMap getMap(Class type) { + //noinspection unchecked + return (DefinitionMap) definitionsByType.get(type); + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index af0cb16b7..cff40486b 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -76,7 +76,7 @@ public class DocumentParser implements ParserState { private final List bracketProcessors; private final IncludeSourceSpans includeSourceSpans; private final DocumentBlockParser documentBlockParser; - private final DefinitionMap linkReferenceDefinitions = new DefinitionMap<>(); + private final Definitions definitions = new Definitions(); private final List openBlockParsers = new ArrayList<>(); private final List allBlockParsers = new ArrayList<>(); @@ -462,35 +462,11 @@ private BlockStartImpl findBlockStart(BlockParser blockParser) { return null; } - /** - * Finalize a block. Close it and do any necessary postprocessing, e.g. setting the content of blocks and - * collecting link reference definitions from paragraphs. - */ - private void finalize(BlockParser blockParser) { - if (blockParser instanceof ParagraphParser) { - addDefinitionsFrom((ParagraphParser) blockParser); - } - - blockParser.closeBlock(); - } - - private void addDefinitionsFrom(ParagraphParser paragraphParser) { - // TODO: Generalize this allow block parsers to add definitions by their types. - // We'll keep a map for each type, e.g. one for LinkReferenceDefinition, one for FootnoteDefinition, etc :) - // The context then allows lookup with the type and label - for (LinkReferenceDefinition definition : paragraphParser.getDefinitions()) { - // Add nodes into document before paragraph. - paragraphParser.getBlock().insertBefore(definition); - - linkReferenceDefinitions.putIfAbsent(definition.getLabel(), definition); - } - } - /** * Walk through a block & children recursively, parsing string content into inline content where appropriate. */ private void processInlines() { - var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, bracketProcessors, linkReferenceDefinitions); + var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, bracketProcessors, definitions); var inlineParser = inlineParserFactory.create(context); for (var blockParser : allBlockParsers) { @@ -529,7 +505,7 @@ private Block prepareActiveBlockParserForReplacement() { // block parser got the current paragraph content using MatchedBlockParser#getContentString. In case the // paragraph started with link reference definitions, we parse and strip them before the block parser gets // the content. We want to keep them. - // If no replacement happens, we collect the definitions as part of finalizing paragraph blocks. + // If no replacement happens, we collect the definitions as part of finalizing blocks. addDefinitionsFrom(paragraphParser); } @@ -556,6 +532,21 @@ private void closeBlockParsers(int count) { } } + /** + * Finalize a block. Close it and do any necessary postprocessing, e.g. setting the content of blocks and + * collecting link reference definitions from paragraphs. + */ + private void finalize(BlockParser blockParser) { + addDefinitionsFrom(blockParser); + blockParser.closeBlock(); + } + + private void addDefinitionsFrom(BlockParser blockParser) { + for (var definitionMap : blockParser.getDefinitions()) { + definitions.addDefinitions(definitionMap); + } + } + /** * Prepares the input line replacing {@code \0} */ diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java index ef99c0c9c..79fe2a56a 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java @@ -1,6 +1,5 @@ package org.commonmark.internal; -import org.commonmark.node.DefinitionMap; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.beta.BracketProcessor; @@ -14,16 +13,16 @@ public class InlineParserContextImpl implements InlineParserContext { private final List inlineContentParserFactories; private final List delimiterProcessors; private final List bracketProcessors; - private final DefinitionMap linkReferenceDefinitions; + private final Definitions definitions; public InlineParserContextImpl(List inlineContentParserFactories, List delimiterProcessors, List bracketProcessors, - DefinitionMap linkReferenceDefinitions) { + Definitions definitions) { this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; this.bracketProcessors = bracketProcessors; - this.linkReferenceDefinitions = linkReferenceDefinitions; + this.definitions = definitions; } @Override @@ -43,6 +42,11 @@ public List getCustomBracketProcessors() { @Override public LinkReferenceDefinition getLinkReferenceDefinition(String label) { - return linkReferenceDefinitions.get(label); + return definitions.getDefinition(LinkReferenceDefinition.class, label); + } + + @Override + public D getDefinition(Class type, String label) { + return definitions.getDefinition(type, label); } } diff --git a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java index b58e669ef..070f29ceb 100644 --- a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java @@ -2,6 +2,7 @@ import org.commonmark.internal.util.Escaping; import org.commonmark.internal.util.LinkScanner; +import org.commonmark.node.DefinitionMap; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.node.SourceSpan; import org.commonmark.parser.SourceLine; diff --git a/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java b/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java index 89328ef2a..18808d499 100644 --- a/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java @@ -1,9 +1,6 @@ package org.commonmark.internal; -import org.commonmark.node.Block; -import org.commonmark.node.LinkReferenceDefinition; -import org.commonmark.node.Paragraph; -import org.commonmark.node.SourceSpan; +import org.commonmark.node.*; import org.commonmark.parser.InlineParser; import org.commonmark.parser.SourceLine; import org.commonmark.parser.SourceLines; @@ -12,6 +9,7 @@ import org.commonmark.parser.block.ParserState; import java.util.List; +import java.util.Map; public class ParagraphParser extends AbstractBlockParser { @@ -49,8 +47,21 @@ public void addSourceSpan(SourceSpan sourceSpan) { linkReferenceDefinitionParser.addSourceSpan(sourceSpan); } + @Override + public List> getDefinitions() { + var map = new DefinitionMap<>(LinkReferenceDefinition.class); + for (var def : linkReferenceDefinitionParser.getDefinitions()) { + map.putIfAbsent(def.getLabel(), def); + } + return List.of(map); + } + @Override public void closeBlock() { + for (var def : linkReferenceDefinitionParser.getDefinitions()) { + block.insertBefore(def); + } + if (linkReferenceDefinitionParser.getParagraphLines().isEmpty()) { block.unlink(); } else { @@ -69,8 +80,4 @@ public void parseInlines(InlineParser inlineParser) { public SourceLines getParagraphLines() { return linkReferenceDefinitionParser.getParagraphLines(); } - - public List getDefinitions() { - return linkReferenceDefinitionParser.getDefinitions(); - } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/CoreBracketProcessor.java b/commonmark/src/main/java/org/commonmark/internal/inline/CoreBracketProcessor.java index f208f76e2..ce5fc27af 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/CoreBracketProcessor.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/CoreBracketProcessor.java @@ -2,6 +2,7 @@ import org.commonmark.node.Image; import org.commonmark.node.Link; +import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.beta.BracketInfo; import org.commonmark.parser.beta.BracketProcessor; @@ -14,7 +15,7 @@ public class CoreBracketProcessor implements BracketProcessor { public BracketResult process(BracketInfo bracketInfo, Scanner scanner, InlineParserContext context) { var label = bracketInfo.label(); var ref = label != null && !label.isEmpty() ? label : bracketInfo.text(); - var def = context.getLinkReferenceDefinition(ref); + var def = context.getDefinition(LinkReferenceDefinition.class, ref); if (def != null) { if (bracketInfo.openerType() == BracketInfo.OpenerType.IMAGE) { return BracketResult.wrapTextIn(new Image(def.getDestination(), def.getTitle()), scanner.position()); diff --git a/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java index bc0c0b221..82f553ff1 100644 --- a/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java +++ b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java @@ -2,6 +2,7 @@ import org.commonmark.internal.util.Escaping; +import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; @@ -9,17 +10,32 @@ * A map that can be used to store and lookup reference definitions by a label. The labels are case-insensitive and * normalized, the same way as for {@link LinkReferenceDefinition} nodes. * - * @param the type of value + * @param the type of value */ -public class DefinitionMap { +public class DefinitionMap { + private final Class type; // LinkedHashMap for determinism and to preserve document order - private final Map definitions = new LinkedHashMap<>(); + private final Map definitions = new LinkedHashMap<>(); + + public DefinitionMap(Class type) { + this.type = type; + } + + public Class getType() { + return type; + } + + public void addAll(DefinitionMap that) { + for (var entry : that.definitions.entrySet()) { + definitions.putIfAbsent(entry.getKey(), entry.getValue()); + } + } /** * Store a new definition unless one is already in the map. */ - public void putIfAbsent(String label, V definition) { + public void putIfAbsent(String label, D definition) { String normalizedLabel = Escaping.normalizeLabelContent(label); // spec: When there are multiple matching link reference definitions, the first is used @@ -31,8 +47,12 @@ public void putIfAbsent(String label, V definition) { * * @return the value or null */ - public V get(String label) { + public D get(String label) { String normalizedLabel = Escaping.normalizeLabelContent(label); return definitions.get(normalizedLabel); } + + public Collection values() { + return definitions.values(); + } } diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index 6c3c9cbc6..4a2951e70 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -32,10 +32,23 @@ public interface InlineParserContext { /** * Look up a {@link LinkReferenceDefinition} for a given label. *

- * Note that the label is not normalized yet; implementations are responsible for normalizing before lookup. + * Note that the passed in label does not need to be normalized; implementations are responsible for doing the + * normalization before lookup. * * @param label the link label to look up * @return the definition if one exists, {@code null} otherwise + * @deprecated use {@link #getDefinition} with {@link LinkReferenceDefinition} instead */ + @Deprecated LinkReferenceDefinition getLinkReferenceDefinition(String label); + + /** + * Look up a definition of a type for a given label. + *

+ * Note that the passed in label does not need to be normalized; implementations are responsible for doing the + * normalization before lookup. + * + * @return the definition if one exists, null otherwise + */ + D getDefinition(Class type, String label); } diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 6a626b2bc..e09d45da3 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -1,6 +1,7 @@ package org.commonmark.parser; import org.commonmark.Extension; +import org.commonmark.internal.Definitions; import org.commonmark.internal.DocumentParser; import org.commonmark.internal.InlineParserContextImpl; import org.commonmark.internal.InlineParserImpl; @@ -49,7 +50,7 @@ private Parser(Builder builder) { // Try to construct an inline parser. Invalid configuration might result in an exception, which we want to // detect as soon as possible. var context = new InlineParserContextImpl( - inlineContentParserFactories, delimiterProcessors, bracketProcessors, new DefinitionMap<>()); + inlineContentParserFactories, delimiterProcessors, bracketProcessors, new Definitions()); this.inlineParserFactory.create(context); } diff --git a/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java index 3d4cbb77b..4fb1a05ac 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java @@ -1,10 +1,13 @@ package org.commonmark.parser.block; import org.commonmark.node.Block; +import org.commonmark.node.DefinitionMap; import org.commonmark.node.SourceSpan; import org.commonmark.parser.InlineParser; import org.commonmark.parser.SourceLine; +import java.util.List; + public abstract class AbstractBlockParser implements BlockParser { @Override @@ -31,6 +34,11 @@ public void addSourceSpan(SourceSpan sourceSpan) { getBlock().addSourceSpan(sourceSpan); } + @Override + public List> getDefinitions() { + return List.of(); + } + @Override public void closeBlock() { } diff --git a/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java index addd90d1a..32ff2a474 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java @@ -1,10 +1,13 @@ package org.commonmark.parser.block; import org.commonmark.node.Block; +import org.commonmark.node.DefinitionMap; import org.commonmark.node.SourceSpan; import org.commonmark.parser.InlineParser; import org.commonmark.parser.SourceLine; +import java.util.List; + /** * Parser for a specific block node. *

@@ -49,6 +52,12 @@ public interface BlockParser { */ void addSourceSpan(SourceSpan sourceSpan); + /** + * Return definitions parsed by this parser. The definitions returned here can later be accessed during inline + * parsing via {@link org.commonmark.parser.InlineParserContext#getDefinition}. + */ + List> getDefinitions(); + void closeBlock(); void parseInlines(InlineParser inlineParser); diff --git a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java index c9aa50dc2..07b94c076 100644 --- a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java +++ b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java @@ -59,8 +59,13 @@ public List getCustomBracketProcessors() { @Override public LinkReferenceDefinition getLinkReferenceDefinition(String label) { + return getDefinition(LinkReferenceDefinition.class, label); + } + + @Override + public D getDefinition(Class type, String label) { lookups.add(label); - return inlineParserContext.getLinkReferenceDefinition(label); + return inlineParserContext.getDefinition(type, label); } };