Skip to content

Commit 216e13b

Browse files
committed
Implement SLR parse table generation
1 parent a3fac9a commit 216e13b

File tree

5 files changed

+195
-184
lines changed

5 files changed

+195
-184
lines changed

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/ISymbol.java

+13
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,23 @@
77
public interface ISymbol {
88

99
String name();
10+
1011
boolean isNullable();
12+
1113
void setNullable(boolean nullable);
14+
15+
ICharacterClass getFirst();
16+
17+
void setFirst(ICharacterClass first);
18+
19+
ICharacterClass getFollow();
20+
21+
void setFollow(ICharacterClass follow);
22+
1223
ICharacterClass followRestriction();
24+
1325
List<ICharacterClass[]> followRestrictionLookahead();
26+
1427
org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol();
1528
void normalizeFollowRestrictionLookahead();
1629

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java

+25-6
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
import java.util.Map;
66
import java.util.Set;
77

8+
import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
89
import org.metaborg.parsetable.characterclasses.ICharacterClass;
9-
10-
import org.metaborg.sdf2table.grammar.ISymbol;
1110
import org.metaborg.parsetable.symbols.SortCardinality;
1211
import org.metaborg.parsetable.symbols.SyntaxContext;
1312
import org.metaborg.sdf2table.deepconflicts.Context;
@@ -24,12 +23,15 @@ public abstract class Symbol implements Serializable, ISymbol {
2423
protected List<ICharacterClass[]> followRestrictionsLookahead;
2524

2625
private boolean nullable = false;
26+
private ICharacterClass first = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
27+
private ICharacterClass follow = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
2728

28-
/* (non-Javadoc)
29+
/*
30+
* (non-Javadoc)
31+
*
2932
* @see org.metaborg.sdf2table.grammar.ISymbol#name()
3033
*/
31-
@Override
32-
public abstract String name();
34+
@Override public abstract String name();
3335

3436
public boolean isNullable() {
3537
return nullable;
@@ -39,6 +41,22 @@ public void setNullable(boolean nullable) {
3941
this.nullable = nullable;
4042
}
4143

44+
@Override public ICharacterClass getFirst() {
45+
return first;
46+
}
47+
48+
@Override public void setFirst(ICharacterClass first) {
49+
this.first = first;
50+
}
51+
52+
@Override public ICharacterClass getFollow() {
53+
return follow;
54+
}
55+
56+
@Override public void setFollow(ICharacterClass follow) {
57+
this.follow = follow;
58+
}
59+
4260
@Override public String toString() {
4361
return name();
4462
}
@@ -147,5 +165,6 @@ public org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol() {
147165
return toParseTableSymbol(null, null);
148166
}
149167

150-
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext, SortCardinality cardinality);
168+
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext,
169+
SortCardinality cardinality);
151170
}

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java

+140-59
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,19 @@
11
package org.metaborg.sdf2table.parsetable;
22

33
import java.io.Serializable;
4-
import java.util.Collections;
5-
import java.util.List;
6-
import java.util.Map;
7-
import java.util.Queue;
8-
import java.util.Set;
4+
import java.util.*;
95

106
import org.metaborg.parsetable.IParseTable;
117
import org.metaborg.parsetable.states.IState;
12-
import org.metaborg.sdf2table.deepconflicts.Context;
13-
import org.metaborg.sdf2table.deepconflicts.ContextPosition;
14-
import org.metaborg.sdf2table.deepconflicts.ContextType;
15-
import org.metaborg.sdf2table.deepconflicts.ContextualProduction;
16-
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
17-
import org.metaborg.sdf2table.deepconflicts.DeepConflictsAnalyzer;
18-
import org.metaborg.sdf2table.grammar.ContextFreeSymbol;
19-
import org.metaborg.sdf2table.grammar.GeneralAttribute;
20-
import org.metaborg.sdf2table.grammar.IAttribute;
21-
import org.metaborg.sdf2table.grammar.IProduction;
22-
import org.metaborg.sdf2table.grammar.ISymbol;
23-
import org.metaborg.sdf2table.grammar.IterSepSymbol;
24-
import org.metaborg.sdf2table.grammar.IterStarSepSymbol;
25-
import org.metaborg.sdf2table.grammar.IterStarSymbol;
26-
import org.metaborg.sdf2table.grammar.IterSymbol;
27-
import org.metaborg.sdf2table.grammar.Layout;
28-
import org.metaborg.sdf2table.grammar.LexicalSymbol;
29-
import org.metaborg.sdf2table.grammar.NormGrammar;
30-
import org.metaborg.sdf2table.grammar.OptionalSymbol;
31-
import org.metaborg.sdf2table.grammar.Priority;
32-
import org.metaborg.sdf2table.grammar.Production;
33-
import org.metaborg.sdf2table.grammar.Sort;
34-
import org.metaborg.sdf2table.grammar.Symbol;
8+
import org.metaborg.sdf2table.deepconflicts.*;
9+
import org.metaborg.sdf2table.grammar.*;
3510
import org.metaborg.sdf2table.util.CheckOverlap;
3611
import org.metaborg.sdf2table.util.Graph;
3712
import org.metaborg.sdf2table.util.SCCNodes;
3813
import org.metaborg.util.log.ILogger;
3914
import org.metaborg.util.log.LoggerUtils;
4015

41-
import com.google.common.collect.BiMap;
42-
import com.google.common.collect.HashBiMap;
43-
import com.google.common.collect.HashMultimap;
44-
import com.google.common.collect.Lists;
45-
import com.google.common.collect.Maps;
46-
import com.google.common.collect.Queues;
47-
import com.google.common.collect.SetMultimap;
48-
import com.google.common.collect.Sets;
16+
import com.google.common.collect.*;
4917

5018
public class ParseTable implements IParseTable, Serializable {
5119

@@ -59,7 +27,7 @@ public class ParseTable implements IParseTable, Serializable {
5927

6028
public static final int FIRST_PRODUCTION_LABEL = 257;
6129
public static final int INITIAL_STATE_NUMBER = 0;
62-
public static final int VERSION_NUMBER = 6;
30+
public static final int VERSION_NUMBER = 7;
6331

6432
private NormGrammar grammar;
6533

@@ -140,6 +108,10 @@ public ParseTable(NormGrammar grammar, ParseTableConfiguration config) {
140108
// create JSGLR parse table productions
141109
createJSGLRParseTableProductions(productionLabels);
142110

111+
// calculate FIRST-set and FOLLOW-set
112+
calculateFirst();
113+
calculateFollow();
114+
143115
// create states if the table should not be generated dynamically
144116
initialProduction = grammar.getInitialProduction();
145117

@@ -186,6 +158,137 @@ private void calculateNullable() {
186158
} while(markedNullable);
187159
}
188160

161+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
162+
private void calculateFirst() {
163+
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
164+
Set<ISymbol> symbols = grammar.getSymbols();
165+
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
166+
167+
for(ISymbol s : symbols) {
168+
// The FIRST set of a CharacterClass symbol is equal to the character class it represents.
169+
if(s instanceof CharacterClassSymbol) {
170+
s.setFirst(((CharacterClassSymbol) s).getCC());
171+
continue;
172+
}
173+
174+
for(IProduction p : symbolProductionsMapping.get(s)) {
175+
// Direct contributions:
176+
// If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
177+
for(ISymbol rhs : p.rightHand()) {
178+
// Then, a is in FIRST(A).
179+
if(rhs instanceof CharacterClassSymbol) {
180+
s.setFirst(((CharacterClassSymbol) rhs).getCC());
181+
break;
182+
}
183+
184+
// Indirect contributions: calculate contains-the-FIRSTs-of
185+
// If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
186+
// Then, A contains-the-FIRSTs-of B
187+
containsTheFirstOf.put(s, rhs);
188+
189+
if(!rhs.isNullable())
190+
break;
191+
}
192+
}
193+
}
194+
195+
// Indirect contributions: Tarjan's algorithm for strongly connected components
196+
final int DONE = symbols.size();
197+
final Map<ISymbol, Integer> low = new HashMap<>();
198+
final Stack<ISymbol> stack = new Stack<>();
199+
for(ISymbol v : symbols) {
200+
if(low.get(v) == null /* CLEAN */)
201+
traverseFirst(v, containsTheFirstOf, DONE, low, stack);
202+
}
203+
}
204+
205+
private void traverseFirst(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf, int DONE,
206+
Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
207+
stack.push(v);
208+
int top1 = stack.size() - 1;
209+
low.put(v, top1);
210+
for(ISymbol w : containsTheFirstOf.get(v)) {
211+
if(low.get(w) == null /* CLEAN */) {
212+
traverseFirst(w, containsTheFirstOf, DONE, low, stack);
213+
}
214+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
215+
v.setFirst(v.getFirst().union(w.getFirst())); // union!
216+
if(low.get(w) < low.get(v))
217+
low.put(v, low.get(w));
218+
}
219+
if(low.get(v) == top1) // v is the root of this SCC
220+
while(stack.size() - 1 >= top1) {
221+
ISymbol w = stack.pop();
222+
w.setFirst(v.getFirst()); // distribute!
223+
low.put(w, DONE);
224+
}
225+
}
226+
227+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
228+
// and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
229+
private void calculateFollow() {
230+
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
231+
Set<ISymbol> symbols = grammar.getSymbols();
232+
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
233+
SetMultimap<ISymbol, ISymbol> containsTheFollowOf = HashMultimap.create();
234+
235+
for(ISymbol s : symbols) {
236+
for(IProduction p : symbolProductionsMapping.get(s)) {
237+
List<ISymbol> rightHand = p.rightHand();
238+
for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) {
239+
ISymbol symbolI = rightHand.get(i);
240+
241+
// If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
242+
for(int j = i + 1; j < rightHandSize; j++) {
243+
// If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
244+
ISymbol symbolJ = rightHand.get(j);
245+
containsTheFirstOf.put(symbolI, symbolJ);
246+
247+
if(!symbolJ.isNullable())
248+
break;
249+
}
250+
251+
// If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
252+
containsTheFollowOf.put(symbolI, s);
253+
}
254+
}
255+
}
256+
257+
// Indirect contributions: Tarjan's algorithm for strongly connected components
258+
final int DONE = symbols.size();
259+
final Map<ISymbol, Integer> low = new HashMap<>();
260+
final Stack<ISymbol> stack = new Stack<>();
261+
for(ISymbol v : symbols) {
262+
if(low.get(v) == null /* CLEAN */)
263+
traverseFollow(v, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
264+
}
265+
}
266+
267+
private void traverseFollow(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf,
268+
SetMultimap<ISymbol, ISymbol> containsTheFollowOf, int DONE, Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
269+
stack.push(v);
270+
int top1 = stack.size() - 1;
271+
low.put(v, top1);
272+
for(ISymbol w : containsTheFirstOf.get(v)) {
273+
v.setFollow(v.getFollow().union(w.getFirst())); // union!
274+
}
275+
for(ISymbol w : containsTheFollowOf.get(v)) {
276+
if(low.get(w) == null /* CLEAN */) {
277+
traverseFollow(w, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
278+
}
279+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
280+
v.setFollow(v.getFollow().union(w.getFollow())); // union!
281+
if(low.get(w) < low.get(v))
282+
low.put(v, low.get(w));
283+
}
284+
if(low.get(v) == top1) // v is the root of this SCC
285+
while(stack.size() - 1 >= top1) {
286+
ISymbol w = stack.pop();
287+
w.setFollow(v.getFollow()); // distribute!
288+
low.put(w, DONE);
289+
}
290+
}
291+
189292
private void calculateRecursion() {
190293
// direct and indirect left recursion :
191294
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
@@ -440,28 +543,6 @@ private boolean mutuallyRecursive(Priority p) {
440543
|| grammar.getRightRecursiveSymbolsMapping().get(p.higher().getLhs()).contains(p.lower().leftHand());
441544
}
442545

443-
/*
444-
* TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
445-
* getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
446-
*
447-
* tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
448-
* getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
449-
* first_components); } } }
450-
*
451-
*
452-
* private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
453-
* the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
454-
*
455-
* for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
456-
* stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
457-
* if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
458-
* d.index); } }
459-
*
460-
* TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
461-
* component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
462-
* while(t != v); components.add(component); } }
463-
*/
464-
465546
private void extractExpressionGrammars(SCCNodes<ISymbol> scc) {
466547

467548
for(ISymbol s : grammar.getSymbols()) {

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/State.java

+17-12
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@
55
import java.util.Map;
66
import java.util.Set;
77

8-
import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
9-
import org.metaborg.parsetable.query.IActionQuery;
10-
import org.metaborg.parsetable.states.IState;
118
import org.metaborg.parsetable.actions.IAction;
129
import org.metaborg.parsetable.actions.IGoto;
1310
import org.metaborg.parsetable.actions.IReduce;
11+
import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
1412
import org.metaborg.parsetable.characterclasses.ICharacterClass;
15-
import org.metaborg.sdf2table.grammar.IProduction;
16-
import org.metaborg.sdf2table.grammar.ISymbol;
17-
import org.metaborg.sdf2table.grammar.CharacterClassSymbol;
18-
import org.metaborg.sdf2table.grammar.Symbol;
1913
import org.metaborg.parsetable.query.ActionsForCharacterDisjointSorted;
2014
import org.metaborg.parsetable.query.ActionsPerCharacterClass;
15+
import org.metaborg.parsetable.query.IActionQuery;
2116
import org.metaborg.parsetable.query.IActionsForCharacter;
22-
17+
import org.metaborg.parsetable.states.IState;
18+
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
19+
import org.metaborg.sdf2table.grammar.CharacterClassSymbol;
20+
import org.metaborg.sdf2table.grammar.IProduction;
21+
import org.metaborg.sdf2table.grammar.ISymbol;
22+
import org.metaborg.sdf2table.grammar.Symbol;
2323

2424
import com.google.common.collect.LinkedHashMultimap;
2525
import com.google.common.collect.Maps;
@@ -137,19 +137,24 @@ public void doShift() {
137137

138138
public void doReduces() {
139139
// for each item p_i : A = A0 ... AN .
140-
// add a reduce action reduce([0-256] / follow(A), p_i)
140+
// add a reduce action reduce(FOLLOW(A) / follow-restriction(A), p_i) -- SLR(1) parsing
141141
for(LRItem item : items) {
142142

143143
if(item.getDotPosition() == item.getProd().rightHand().size()) {
144144
int prod_label = pt.productionLabels().get(item.getProd());
145145

146146
ISymbol leftHandSymbol = item.getProd().leftHand();
147147
ICharacterClass fr = leftHandSymbol.followRestriction();
148+
149+
ICharacterClass final_range = leftHandSymbol instanceof ContextualSymbol
150+
? ((ContextualSymbol) leftHandSymbol).getOrigSymbol().getFollow() : leftHandSymbol.getFollow();
151+
// Previous line used to be the following in LR(0). TODO add option to switch between LR(0) and SLR(1)
152+
// ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;
153+
148154
if((fr == null || fr.isEmpty()) && leftHandSymbol.followRestrictionLookahead() == null) {
149-
addReduceAction(item.getProd(), prod_label, CharacterClassFactory.FULL_RANGE, null);
155+
addReduceAction(item.getProd(), prod_label, final_range, null);
150156
} else {
151-
ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;
152-
// Not based on first and follow sets thus, only considering the follow restrictions
157+
// Considering the follow restrictions
153158
if(fr != null && !fr.isEmpty()) {
154159
final_range = final_range.difference(leftHandSymbol.followRestriction());
155160
}

0 commit comments

Comments
 (0)