Skip to content

Commit ca85161

Browse files
committed
Implement SLR parse table generation
1 parent ba9191d commit ca85161

File tree

5 files changed

+188
-139
lines changed

5 files changed

+188
-139
lines changed

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/ISymbol.java

+13
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,23 @@
77
public interface ISymbol {
88

99
String name();
10+
1011
boolean isNullable();
12+
1113
void setNullable(boolean nullable);
14+
15+
ICharacterClass getFirst();
16+
17+
void setFirst(ICharacterClass first);
18+
19+
ICharacterClass getFollow();
20+
21+
void setFollow(ICharacterClass follow);
22+
1223
ICharacterClass followRestriction();
24+
1325
List<ICharacterClass[]> followRestrictionLookahead();
26+
1427
org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol();
1528
void normalizeFollowRestrictionLookahead();
1629

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java

+25-6
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
import java.util.Map;
66
import java.util.Set;
77

8+
import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
89
import org.metaborg.parsetable.characterclasses.ICharacterClass;
9-
10-
import org.metaborg.sdf2table.grammar.ISymbol;
1110
import org.metaborg.parsetable.symbols.SortCardinality;
1211
import org.metaborg.parsetable.symbols.SyntaxContext;
1312
import org.metaborg.sdf2table.deepconflicts.Context;
@@ -24,12 +23,15 @@ public abstract class Symbol implements Serializable, ISymbol {
2423
protected List<ICharacterClass[]> followRestrictionsLookahead;
2524

2625
private boolean nullable = false;
26+
private ICharacterClass first = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
27+
private ICharacterClass follow = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
2728

28-
/* (non-Javadoc)
29+
/*
30+
* (non-Javadoc)
31+
*
2932
* @see org.metaborg.sdf2table.grammar.ISymbol#name()
3033
*/
31-
@Override
32-
public abstract String name();
34+
@Override public abstract String name();
3335

3436
public boolean isNullable() {
3537
return nullable;
@@ -39,6 +41,22 @@ public void setNullable(boolean nullable) {
3941
this.nullable = nullable;
4042
}
4143

44+
@Override public ICharacterClass getFirst() {
45+
return first;
46+
}
47+
48+
@Override public void setFirst(ICharacterClass first) {
49+
this.first = first;
50+
}
51+
52+
@Override public ICharacterClass getFollow() {
53+
return follow;
54+
}
55+
56+
@Override public void setFollow(ICharacterClass follow) {
57+
this.follow = follow;
58+
}
59+
4260
@Override public String toString() {
4361
return name();
4462
}
@@ -147,5 +165,6 @@ public org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol() {
147165
return toParseTableSymbol(null, null);
148166
}
149167

150-
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext, SortCardinality cardinality);
168+
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext,
169+
SortCardinality cardinality);
151170
}

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java

+135-22
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ public ParseTable(NormGrammar grammar, ParseTableConfiguration config) {
115115
// create JSGLR parse table productions
116116
createJSGLRParseTableProductions(productionLabels);
117117

118+
// calculate FIRST-set and FOLLOW-set
119+
calculateFirst();
120+
calculateFollow();
121+
118122
// create states if the table should not be generated dynamically
119123
initialProduction = grammar.getInitialProduction();
120124

@@ -162,6 +166,137 @@ private void calculateNullable() {
162166
} while(markedNullable);
163167
}
164168

169+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
170+
private void calculateFirst() {
171+
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
172+
Set<ISymbol> symbols = grammar.getSymbols();
173+
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
174+
175+
for(ISymbol s : symbols) {
176+
// The FIRST set of a CharacterClass symbol is equal to the character class it represents.
177+
if(s instanceof CharacterClassSymbol) {
178+
s.setFirst(((CharacterClassSymbol) s).getCC());
179+
continue;
180+
}
181+
182+
for(IProduction p : symbolProductionsMapping.get(s)) {
183+
// Direct contributions:
184+
// If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
185+
for(ISymbol rhs : p.rightHand()) {
186+
// Then, a is in FIRST(A).
187+
if(rhs instanceof CharacterClassSymbol) {
188+
s.setFirst(((CharacterClassSymbol) rhs).getCC());
189+
break;
190+
}
191+
192+
// Indirect contributions: calculate contains-the-FIRSTs-of
193+
// If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
194+
// Then, A contains-the-FIRSTs-of B
195+
containsTheFirstOf.put(s, rhs);
196+
197+
if(!rhs.isNullable())
198+
break;
199+
}
200+
}
201+
}
202+
203+
// Indirect contributions: Tarjan's algorithm for strongly connected components
204+
final int DONE = symbols.size();
205+
final Map<ISymbol, Integer> low = new HashMap<>();
206+
final Stack<ISymbol> stack = new Stack<>();
207+
for(ISymbol v : symbols) {
208+
if(low.get(v) == null /* CLEAN */)
209+
traverseFirst(v, containsTheFirstOf, DONE, low, stack);
210+
}
211+
}
212+
213+
private void traverseFirst(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf, int DONE,
214+
Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
215+
stack.push(v);
216+
int top1 = stack.size() - 1;
217+
low.put(v, top1);
218+
for(ISymbol w : containsTheFirstOf.get(v)) {
219+
if(low.get(w) == null /* CLEAN */) {
220+
traverseFirst(w, containsTheFirstOf, DONE, low, stack);
221+
}
222+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
223+
v.setFirst(v.getFirst().union(w.getFirst())); // union!
224+
if(low.get(w) < low.get(v))
225+
low.put(v, low.get(w));
226+
}
227+
if(low.get(v) == top1) // v is the root of this SCC
228+
while(stack.size() - 1 >= top1) {
229+
ISymbol w = stack.pop();
230+
w.setFirst(v.getFirst()); // distribute!
231+
low.put(w, DONE);
232+
}
233+
}
234+
235+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
236+
// and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
237+
private void calculateFollow() {
238+
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
239+
Set<ISymbol> symbols = grammar.getSymbols();
240+
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
241+
SetMultimap<ISymbol, ISymbol> containsTheFollowOf = HashMultimap.create();
242+
243+
for(ISymbol s : symbols) {
244+
for(IProduction p : symbolProductionsMapping.get(s)) {
245+
List<ISymbol> rightHand = p.rightHand();
246+
for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) {
247+
ISymbol symbolI = rightHand.get(i);
248+
249+
// If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
250+
for(int j = i + 1; j < rightHandSize; j++) {
251+
// If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
252+
ISymbol symbolJ = rightHand.get(j);
253+
containsTheFirstOf.put(symbolI, symbolJ);
254+
255+
if(!symbolJ.isNullable())
256+
break;
257+
}
258+
259+
// If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
260+
containsTheFollowOf.put(symbolI, s);
261+
}
262+
}
263+
}
264+
265+
// Indirect contributions: Tarjan's algorithm for strongly connected components
266+
final int DONE = symbols.size();
267+
final Map<ISymbol, Integer> low = new HashMap<>();
268+
final Stack<ISymbol> stack = new Stack<>();
269+
for(ISymbol v : symbols) {
270+
if(low.get(v) == null /* CLEAN */)
271+
traverseFollow(v, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
272+
}
273+
}
274+
275+
private void traverseFollow(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf,
276+
SetMultimap<ISymbol, ISymbol> containsTheFollowOf, int DONE, Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
277+
stack.push(v);
278+
int top1 = stack.size() - 1;
279+
low.put(v, top1);
280+
for(ISymbol w : containsTheFirstOf.get(v)) {
281+
v.setFollow(v.getFollow().union(w.getFirst())); // union!
282+
}
283+
for(ISymbol w : containsTheFollowOf.get(v)) {
284+
if(low.get(w) == null /* CLEAN */) {
285+
traverseFollow(w, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
286+
}
287+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
288+
v.setFollow(v.getFollow().union(w.getFollow())); // union!
289+
if(low.get(w) < low.get(v))
290+
low.put(v, low.get(w));
291+
}
292+
if(low.get(v) == top1) // v is the root of this SCC
293+
while(stack.size() - 1 >= top1) {
294+
ISymbol w = stack.pop();
295+
w.setFollow(v.getFollow()); // distribute!
296+
low.put(w, DONE);
297+
}
298+
}
299+
165300
private void calculateRecursion() {
166301
// direct and indirect left recursion :
167302
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
@@ -415,28 +550,6 @@ private boolean mutuallyRecursive(Priority p) {
415550
|| grammar.getRightRecursiveSymbolsMapping().get(p.higher().getLhs()).contains(p.lower().leftHand());
416551
}
417552

418-
/*
419-
* TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
420-
* getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
421-
*
422-
* tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
423-
* getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
424-
* first_components); } } }
425-
*
426-
*
427-
* private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
428-
* the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
429-
*
430-
* for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
431-
* stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
432-
* if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
433-
* d.index); } }
434-
*
435-
* TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
436-
* component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
437-
* while(t != v); components.add(component); } }
438-
*/
439-
440553
private void extractExpressionGrammars(SCCNodes<ISymbol> scc) {
441554

442555
for(ISymbol s : grammar.getSymbols()) {

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/State.java

+15-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
import org.metaborg.sdf2table.grammar.IProduction;
2020
import org.metaborg.sdf2table.grammar.ISymbol;
2121
import org.metaborg.sdf2table.grammar.Symbol;
22+
import org.metaborg.parsetable.states.IState;
23+
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
24+
import org.metaborg.sdf2table.grammar.CharacterClassSymbol;
25+
import org.metaborg.sdf2table.grammar.IProduction;
26+
import org.metaborg.sdf2table.grammar.ISymbol;
27+
import org.metaborg.sdf2table.grammar.Symbol;
2228

2329
import com.google.common.collect.LinkedHashMultimap;
2430
import com.google.common.collect.Maps;
@@ -135,19 +141,24 @@ public void doShift() {
135141

136142
public void doReduces() {
137143
// for each item p_i : A = A0 ... AN .
138-
// add a reduce action reduce([0-MAX_CHAR,eof] / follow(A), p_i)
144+
// add a reduce action reduce(FOLLOW(A) / follow-restriction(A), p_i) -- SLR(1) parsing
139145
for(LRItem item : items) {
140146

141147
if(item.getDotPosition() == item.getProd().arity()) {
142148
int prod_label = pt.productionLabels().get(item.getProd());
143149

144150
ISymbol leftHandSymbol = item.getProd().leftHand();
145151
ICharacterClass fr = leftHandSymbol.followRestriction();
152+
153+
ICharacterClass final_range = leftHandSymbol instanceof ContextualSymbol
154+
? ((ContextualSymbol) leftHandSymbol).getOrigSymbol().getFollow() : leftHandSymbol.getFollow();
155+
// Previous line used to be the following in LR(0). TODO add option to switch between LR(0) and SLR(1)
156+
// ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;
157+
146158
if((fr == null || fr.isEmpty()) && leftHandSymbol.followRestrictionLookahead() == null) {
147-
addReduceAction(item.getProd(), prod_label, CharacterClassFactory.FULL_RANGE, null);
159+
addReduceAction(item.getProd(), prod_label, final_range, null);
148160
} else {
149-
ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;
150-
// Not based on first and follow sets thus, only considering the follow restrictions
161+
// Considering the follow restrictions
151162
if(fr != null && !fr.isEmpty()) {
152163
final_range = final_range.difference(leftHandSymbol.followRestriction());
153164
}

0 commit comments

Comments
 (0)