Skip to content

Commit

Permalink
Allow BlockParsers to return definitions (for lookup during inline pa…
Browse files Browse the repository at this point in the history
…rsing)
  • Loading branch information
robinst committed May 25, 2024
1 parent 04ba63f commit 4c2f729
Show file tree
Hide file tree
Showing 15 changed files with 166 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

import org.commonmark.ext.footnotes.FootnoteDefinition;
import org.commonmark.node.Block;
import org.commonmark.node.DefinitionMap;
import org.commonmark.parser.block.*;

import java.util.List;

public class FootnoteBlockParser extends AbstractBlockParser {

private final FootnoteDefinition block;
Expand Down Expand Up @@ -40,6 +43,13 @@ public BlockContinue tryContinue(ParserState parserState) {
}
}

@Override
public List<DefinitionMap<?>> getDefinitions() {
var map = new DefinitionMap<>(FootnoteDefinition.class);
map.putIfAbsent(block.getLabel(), block);
return List.of(map);
}

public static class Factory implements BlockParserFactory {

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.commonmark.ext.footnotes.internal;

import org.commonmark.ext.footnotes.FootnoteDefinition;
import org.commonmark.ext.footnotes.FootnoteReference;
import org.commonmark.parser.InlineParserContext;
import org.commonmark.parser.beta.BracketInfo;
Expand All @@ -13,13 +14,14 @@ public BracketResult process(BracketInfo bracketInfo, Scanner scanner, InlinePar
// TODO: Does parsing need to be more strict here?
var text = bracketInfo.text();
if (text.startsWith("^")) {
// TODO: Do we need to check if a definition exists before doing this? (That would be the same as reference
// links.)

// For footnotes, we only ever consume the text part of the link, not the label part (if any).
var position = bracketInfo.afterTextBracket();
var label = text.substring(1);
return BracketResult.replaceWith(new FootnoteReference(label), position);
// Check if we have a definition, otherwise ignore (same behavior as for link reference definitions)
var def = context.getDefinition(FootnoteDefinition.class, label);
if (def != null) {
// For footnotes, we only ever consume the text part of the link, not the label part (if any).
var position = bracketInfo.afterTextBracket();
return BracketResult.replaceWith(new FootnoteReference(label), position);
}
}
return BracketResult.none();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ public void testReference() {
assertEquals("foo", ref.getLabel());
}

@Test
public void testReferenceNoDefinition() {
var doc = PARSER.parse("Test [^foo]\n");
assertNull(tryFind(doc, FootnoteReference.class));
}

// Interesting test cases:

// Test [foo][^bar]
Expand Down
33 changes: 33 additions & 0 deletions commonmark/src/main/java/org/commonmark/internal/Definitions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.commonmark.internal;

import org.commonmark.node.DefinitionMap;

import java.util.HashMap;
import java.util.Map;

public class Definitions {

private final Map<Class<?>, DefinitionMap<?>> definitionsByType = new HashMap<>();

public <D> void addDefinitions(DefinitionMap<D> definitionMap) {
var existingMap = getMap(definitionMap.getType());
if (existingMap == null) {
definitionsByType.put(definitionMap.getType(), definitionMap);
} else {
existingMap.addAll(definitionMap);
}
}

public <V> V getDefinition(Class<V> type, String label) {
var definitionMap = getMap(type);
if (definitionMap == null) {
return null;
}
return definitionMap.get(label);
}

private <V> DefinitionMap<V> getMap(Class<V> type) {
//noinspection unchecked
return (DefinitionMap<V>) definitionsByType.get(type);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public class DocumentParser implements ParserState {
private final List<BracketProcessor> bracketProcessors;
private final IncludeSourceSpans includeSourceSpans;
private final DocumentBlockParser documentBlockParser;
private final DefinitionMap<LinkReferenceDefinition> linkReferenceDefinitions = new DefinitionMap<>();
private final Definitions definitions = new Definitions();

private final List<OpenBlockParser> openBlockParsers = new ArrayList<>();
private final List<BlockParser> allBlockParsers = new ArrayList<>();
Expand Down Expand Up @@ -462,35 +462,11 @@ private BlockStartImpl findBlockStart(BlockParser blockParser) {
return null;
}

/**
* Finalize a block. Close it and do any necessary postprocessing, e.g. setting the content of blocks and
* collecting link reference definitions from paragraphs.
*/
private void finalize(BlockParser blockParser) {
if (blockParser instanceof ParagraphParser) {
addDefinitionsFrom((ParagraphParser) blockParser);
}

blockParser.closeBlock();
}

private void addDefinitionsFrom(ParagraphParser paragraphParser) {
// TODO: Generalize this allow block parsers to add definitions by their types.
// We'll keep a map for each type, e.g. one for LinkReferenceDefinition, one for FootnoteDefinition, etc :)
// The context then allows lookup with the type and label
for (LinkReferenceDefinition definition : paragraphParser.getDefinitions()) {
// Add nodes into document before paragraph.
paragraphParser.getBlock().insertBefore(definition);

linkReferenceDefinitions.putIfAbsent(definition.getLabel(), definition);
}
}

/**
* Walk through a block & children recursively, parsing string content into inline content where appropriate.
*/
private void processInlines() {
var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, bracketProcessors, linkReferenceDefinitions);
var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, bracketProcessors, definitions);
var inlineParser = inlineParserFactory.create(context);

for (var blockParser : allBlockParsers) {
Expand Down Expand Up @@ -529,7 +505,7 @@ private Block prepareActiveBlockParserForReplacement() {
// block parser got the current paragraph content using MatchedBlockParser#getContentString. In case the
// paragraph started with link reference definitions, we parse and strip them before the block parser gets
// the content. We want to keep them.
// If no replacement happens, we collect the definitions as part of finalizing paragraph blocks.
// If no replacement happens, we collect the definitions as part of finalizing blocks.
addDefinitionsFrom(paragraphParser);
}

Expand All @@ -556,6 +532,21 @@ private void closeBlockParsers(int count) {
}
}

/**
* Finalize a block. Close it and do any necessary postprocessing, e.g. setting the content of blocks and
* collecting link reference definitions from paragraphs.
*/
private void finalize(BlockParser blockParser) {
addDefinitionsFrom(blockParser);
blockParser.closeBlock();
}

private void addDefinitionsFrom(BlockParser blockParser) {
for (var definitionMap : blockParser.getDefinitions()) {
definitions.addDefinitions(definitionMap);
}
}

/**
* Prepares the input line replacing {@code \0}
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.commonmark.internal;

import org.commonmark.node.DefinitionMap;
import org.commonmark.node.LinkReferenceDefinition;
import org.commonmark.parser.InlineParserContext;
import org.commonmark.parser.beta.BracketProcessor;
Expand All @@ -14,16 +13,16 @@ public class InlineParserContextImpl implements InlineParserContext {
private final List<InlineContentParserFactory> inlineContentParserFactories;
private final List<DelimiterProcessor> delimiterProcessors;
private final List<BracketProcessor> bracketProcessors;
private final DefinitionMap<LinkReferenceDefinition> linkReferenceDefinitions;
private final Definitions definitions;

public InlineParserContextImpl(List<InlineContentParserFactory> inlineContentParserFactories,
List<DelimiterProcessor> delimiterProcessors,
List<BracketProcessor> bracketProcessors,
DefinitionMap<LinkReferenceDefinition> linkReferenceDefinitions) {
Definitions definitions) {
this.inlineContentParserFactories = inlineContentParserFactories;
this.delimiterProcessors = delimiterProcessors;
this.bracketProcessors = bracketProcessors;
this.linkReferenceDefinitions = linkReferenceDefinitions;
this.definitions = definitions;
}

@Override
Expand All @@ -43,6 +42,11 @@ public List<BracketProcessor> getCustomBracketProcessors() {

@Override
public LinkReferenceDefinition getLinkReferenceDefinition(String label) {
return linkReferenceDefinitions.get(label);
return definitions.getDefinition(LinkReferenceDefinition.class, label);
}

@Override
public <D> D getDefinition(Class<D> type, String label) {
return definitions.getDefinition(type, label);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.commonmark.internal.util.Escaping;
import org.commonmark.internal.util.LinkScanner;
import org.commonmark.node.DefinitionMap;
import org.commonmark.node.LinkReferenceDefinition;
import org.commonmark.node.SourceSpan;
import org.commonmark.parser.SourceLine;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package org.commonmark.internal;

import org.commonmark.node.Block;
import org.commonmark.node.LinkReferenceDefinition;
import org.commonmark.node.Paragraph;
import org.commonmark.node.SourceSpan;
import org.commonmark.node.*;
import org.commonmark.parser.InlineParser;
import org.commonmark.parser.SourceLine;
import org.commonmark.parser.SourceLines;
Expand All @@ -12,6 +9,7 @@
import org.commonmark.parser.block.ParserState;

import java.util.List;
import java.util.Map;

public class ParagraphParser extends AbstractBlockParser {

Expand Down Expand Up @@ -49,8 +47,21 @@ public void addSourceSpan(SourceSpan sourceSpan) {
linkReferenceDefinitionParser.addSourceSpan(sourceSpan);
}

@Override
public List<DefinitionMap<?>> getDefinitions() {
var map = new DefinitionMap<>(LinkReferenceDefinition.class);
for (var def : linkReferenceDefinitionParser.getDefinitions()) {
map.putIfAbsent(def.getLabel(), def);
}
return List.of(map);
}

@Override
public void closeBlock() {
for (var def : linkReferenceDefinitionParser.getDefinitions()) {
block.insertBefore(def);
}

if (linkReferenceDefinitionParser.getParagraphLines().isEmpty()) {
block.unlink();
} else {
Expand All @@ -69,8 +80,4 @@ public void parseInlines(InlineParser inlineParser) {
public SourceLines getParagraphLines() {
return linkReferenceDefinitionParser.getParagraphLines();
}

public List<LinkReferenceDefinition> getDefinitions() {
return linkReferenceDefinitionParser.getDefinitions();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.commonmark.node.Image;
import org.commonmark.node.Link;
import org.commonmark.node.LinkReferenceDefinition;
import org.commonmark.parser.InlineParserContext;
import org.commonmark.parser.beta.BracketInfo;
import org.commonmark.parser.beta.BracketProcessor;
Expand All @@ -14,7 +15,7 @@ public class CoreBracketProcessor implements BracketProcessor {
public BracketResult process(BracketInfo bracketInfo, Scanner scanner, InlineParserContext context) {
var label = bracketInfo.label();
var ref = label != null && !label.isEmpty() ? label : bracketInfo.text();
var def = context.getLinkReferenceDefinition(ref);
var def = context.getDefinition(LinkReferenceDefinition.class, ref);
if (def != null) {
if (bracketInfo.openerType() == BracketInfo.OpenerType.IMAGE) {
return BracketResult.wrapTextIn(new Image(def.getDestination(), def.getTitle()), scanner.position());
Expand Down
30 changes: 25 additions & 5 deletions commonmark/src/main/java/org/commonmark/node/DefinitionMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,40 @@

import org.commonmark.internal.util.Escaping;

import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;

/**
* A map that can be used to store and lookup reference definitions by a label. The labels are case-insensitive and
* normalized, the same way as for {@link LinkReferenceDefinition} nodes.
*
* @param <V> the type of value
* @param <D> the type of value
*/
public class DefinitionMap<V> {
public class DefinitionMap<D> {

private final Class<D> type;
// LinkedHashMap for determinism and to preserve document order
private final Map<String, V> definitions = new LinkedHashMap<>();
private final Map<String, D> definitions = new LinkedHashMap<>();

public DefinitionMap(Class<D> type) {
this.type = type;
}

public Class<D> getType() {
return type;
}

public void addAll(DefinitionMap<D> that) {
for (var entry : that.definitions.entrySet()) {
definitions.putIfAbsent(entry.getKey(), entry.getValue());
}
}

/**
* Store a new definition unless one is already in the map.
*/
public void putIfAbsent(String label, V definition) {
public void putIfAbsent(String label, D definition) {
String normalizedLabel = Escaping.normalizeLabelContent(label);

// spec: When there are multiple matching link reference definitions, the first is used
Expand All @@ -31,8 +47,12 @@ public void putIfAbsent(String label, V definition) {
*
* @return the value or null
*/
public V get(String label) {
public D get(String label) {
String normalizedLabel = Escaping.normalizeLabelContent(label);
return definitions.get(normalizedLabel);
}

public Collection<D> values() {
return definitions.values();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,23 @@ public interface InlineParserContext {
/**
* Look up a {@link LinkReferenceDefinition} for a given label.
* <p>
* Note that the label is not normalized yet; implementations are responsible for normalizing before lookup.
* Note that the passed in label does not need to be normalized; implementations are responsible for doing the
* normalization before lookup.
*
* @param label the link label to look up
* @return the definition if one exists, {@code null} otherwise
* @deprecated use {@link #getDefinition} with {@link LinkReferenceDefinition} instead
*/
@Deprecated
LinkReferenceDefinition getLinkReferenceDefinition(String label);

/**
* Look up a definition of a type for a given label.
* <p>
* Note that the passed in label does not need to be normalized; implementations are responsible for doing the
* normalization before lookup.
*
* @return the definition if one exists, null otherwise
*/
<D> D getDefinition(Class<D> type, String label);
}
3 changes: 2 additions & 1 deletion commonmark/src/main/java/org/commonmark/parser/Parser.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.commonmark.parser;

import org.commonmark.Extension;
import org.commonmark.internal.Definitions;
import org.commonmark.internal.DocumentParser;
import org.commonmark.internal.InlineParserContextImpl;
import org.commonmark.internal.InlineParserImpl;
Expand Down Expand Up @@ -49,7 +50,7 @@ private Parser(Builder builder) {
// Try to construct an inline parser. Invalid configuration might result in an exception, which we want to
// detect as soon as possible.
var context = new InlineParserContextImpl(
inlineContentParserFactories, delimiterProcessors, bracketProcessors, new DefinitionMap<>());
inlineContentParserFactories, delimiterProcessors, bracketProcessors, new Definitions());
this.inlineParserFactory.create(context);
}

Expand Down
Loading

0 comments on commit 4c2f729

Please sign in to comment.