1
1
package org .metaborg .sdf2table .parsetable ;
2
2
3
3
import java .io .Serializable ;
4
- import java .util .Collections ;
5
- import java .util .List ;
6
- import java .util .Map ;
7
- import java .util .Queue ;
8
- import java .util .Set ;
4
+ import java .util .*;
9
5
10
6
import org .metaborg .parsetable .IParseTable ;
11
7
import org .metaborg .parsetable .states .IState ;
12
- import org .metaborg .sdf2table .deepconflicts .Context ;
13
- import org .metaborg .sdf2table .deepconflicts .ContextPosition ;
14
- import org .metaborg .sdf2table .deepconflicts .ContextType ;
15
- import org .metaborg .sdf2table .deepconflicts .ContextualProduction ;
16
- import org .metaborg .sdf2table .deepconflicts .ContextualSymbol ;
17
- import org .metaborg .sdf2table .deepconflicts .DeepConflictsAnalyzer ;
18
- import org .metaborg .sdf2table .grammar .ContextFreeSymbol ;
19
- import org .metaborg .sdf2table .grammar .GeneralAttribute ;
20
- import org .metaborg .sdf2table .grammar .IAttribute ;
21
- import org .metaborg .sdf2table .grammar .IProduction ;
22
- import org .metaborg .sdf2table .grammar .ISymbol ;
23
- import org .metaborg .sdf2table .grammar .IterSepSymbol ;
24
- import org .metaborg .sdf2table .grammar .IterStarSepSymbol ;
25
- import org .metaborg .sdf2table .grammar .IterStarSymbol ;
26
- import org .metaborg .sdf2table .grammar .IterSymbol ;
27
- import org .metaborg .sdf2table .grammar .Layout ;
28
- import org .metaborg .sdf2table .grammar .LexicalSymbol ;
29
- import org .metaborg .sdf2table .grammar .NormGrammar ;
30
- import org .metaborg .sdf2table .grammar .OptionalSymbol ;
31
- import org .metaborg .sdf2table .grammar .Priority ;
32
- import org .metaborg .sdf2table .grammar .Production ;
33
- import org .metaborg .sdf2table .grammar .Sort ;
34
- import org .metaborg .sdf2table .grammar .Symbol ;
8
+ import org .metaborg .sdf2table .deepconflicts .*;
9
+ import org .metaborg .sdf2table .grammar .*;
35
10
import org .metaborg .sdf2table .util .CheckOverlap ;
36
11
import org .metaborg .sdf2table .util .Graph ;
37
12
import org .metaborg .sdf2table .util .SCCNodes ;
38
13
import org .metaborg .util .log .ILogger ;
39
14
import org .metaborg .util .log .LoggerUtils ;
40
15
41
- import com .google .common .collect .BiMap ;
42
- import com .google .common .collect .HashBiMap ;
43
- import com .google .common .collect .HashMultimap ;
44
- import com .google .common .collect .Lists ;
45
- import com .google .common .collect .Maps ;
46
- import com .google .common .collect .Queues ;
47
- import com .google .common .collect .SetMultimap ;
48
- import com .google .common .collect .Sets ;
16
+ import com .google .common .collect .*;
49
17
50
18
public class ParseTable implements IParseTable , Serializable {
51
19
@@ -59,7 +27,7 @@ public class ParseTable implements IParseTable, Serializable {
59
27
60
28
public static final int FIRST_PRODUCTION_LABEL = 257 ;
61
29
public static final int INITIAL_STATE_NUMBER = 0 ;
62
- public static final int VERSION_NUMBER = 6 ;
30
+ public static final int VERSION_NUMBER = 7 ;
63
31
64
32
private NormGrammar grammar ;
65
33
@@ -140,6 +108,10 @@ public ParseTable(NormGrammar grammar, ParseTableConfiguration config) {
140
108
// create JSGLR parse table productions
141
109
createJSGLRParseTableProductions (productionLabels );
142
110
111
+ // calculate FIRST-set and FOLLOW-set
112
+ calculateFirst ();
113
+ calculateFollow ();
114
+
143
115
// create states if the table should not be generated dynamically
144
116
initialProduction = grammar .getInitialProduction ();
145
117
@@ -186,6 +158,137 @@ private void calculateNullable() {
186
158
} while (markedNullable );
187
159
}
188
160
161
+ // Based on https://compilers.iecc.com/comparch/article/01-04-079
162
+ private void calculateFirst () {
163
+ SetMultimap <ISymbol , IProduction > symbolProductionsMapping = grammar .getSymbolProductionsMapping ();
164
+ Set <ISymbol > symbols = grammar .getSymbols ();
165
+ SetMultimap <ISymbol , ISymbol > containsTheFirstOf = HashMultimap .create ();
166
+
167
+ for (ISymbol s : symbols ) {
168
+ // The FIRST set of a CharacterClass symbol is equal to the character class it represents.
169
+ if (s instanceof CharacterClassSymbol ) {
170
+ s .setFirst (((CharacterClassSymbol ) s ).getCC ());
171
+ continue ;
172
+ }
173
+
174
+ for (IProduction p : symbolProductionsMapping .get (s )) {
175
+ // Direct contributions:
176
+ // If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
177
+ for (ISymbol rhs : p .rightHand ()) {
178
+ // Then, a is in FIRST(A).
179
+ if (rhs instanceof CharacterClassSymbol ) {
180
+ s .setFirst (((CharacterClassSymbol ) rhs ).getCC ());
181
+ break ;
182
+ }
183
+
184
+ // Indirect contributions: calculate contains-the-FIRSTs-of
185
+ // If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
186
+ // Then, A contains-the-FIRSTs-of B
187
+ containsTheFirstOf .put (s , rhs );
188
+
189
+ if (!rhs .isNullable ())
190
+ break ;
191
+ }
192
+ }
193
+ }
194
+
195
+ // Indirect contributions: Tarjan's algorithm for strongly connected components
196
+ final int DONE = symbols .size ();
197
+ final Map <ISymbol , Integer > low = new HashMap <>();
198
+ final Stack <ISymbol > stack = new Stack <>();
199
+ for (ISymbol v : symbols ) {
200
+ if (low .get (v ) == null /* CLEAN */ )
201
+ traverseFirst (v , containsTheFirstOf , DONE , low , stack );
202
+ }
203
+ }
204
+
205
+ private void traverseFirst (ISymbol v , SetMultimap <ISymbol , ISymbol > containsTheFirstOf , int DONE ,
206
+ Map <ISymbol , Integer > low , Stack <ISymbol > stack ) {
207
+ stack .push (v );
208
+ int top1 = stack .size () - 1 ;
209
+ low .put (v , top1 );
210
+ for (ISymbol w : containsTheFirstOf .get (v )) {
211
+ if (low .get (w ) == null /* CLEAN */ ) {
212
+ traverseFirst (w , containsTheFirstOf , DONE , low , stack );
213
+ }
214
+ // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
215
+ v .setFirst (v .getFirst ().union (w .getFirst ())); // union!
216
+ if (low .get (w ) < low .get (v ))
217
+ low .put (v , low .get (w ));
218
+ }
219
+ if (low .get (v ) == top1 ) // v is the root of this SCC
220
+ while (stack .size () - 1 >= top1 ) {
221
+ ISymbol w = stack .pop ();
222
+ w .setFirst (v .getFirst ()); // distribute!
223
+ low .put (w , DONE );
224
+ }
225
+ }
226
+
227
+ // Based on https://compilers.iecc.com/comparch/article/01-04-079
228
+ // and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
229
+ private void calculateFollow () {
230
+ SetMultimap <ISymbol , IProduction > symbolProductionsMapping = grammar .getSymbolProductionsMapping ();
231
+ Set <ISymbol > symbols = grammar .getSymbols ();
232
+ SetMultimap <ISymbol , ISymbol > containsTheFirstOf = HashMultimap .create ();
233
+ SetMultimap <ISymbol , ISymbol > containsTheFollowOf = HashMultimap .create ();
234
+
235
+ for (ISymbol s : symbols ) {
236
+ for (IProduction p : symbolProductionsMapping .get (s )) {
237
+ List <ISymbol > rightHand = p .rightHand ();
238
+ for (int i = 0 , rightHandSize = rightHand .size (); i < rightHandSize ; i ++) {
239
+ ISymbol symbolI = rightHand .get (i );
240
+
241
+ // If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
242
+ for (int j = i + 1 ; j < rightHandSize ; j ++) {
243
+ // If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
244
+ ISymbol symbolJ = rightHand .get (j );
245
+ containsTheFirstOf .put (symbolI , symbolJ );
246
+
247
+ if (!symbolJ .isNullable ())
248
+ break ;
249
+ }
250
+
251
+ // If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
252
+ containsTheFollowOf .put (symbolI , s );
253
+ }
254
+ }
255
+ }
256
+
257
+ // Indirect contributions: Tarjan's algorithm for strongly connected components
258
+ final int DONE = symbols .size ();
259
+ final Map <ISymbol , Integer > low = new HashMap <>();
260
+ final Stack <ISymbol > stack = new Stack <>();
261
+ for (ISymbol v : symbols ) {
262
+ if (low .get (v ) == null /* CLEAN */ )
263
+ traverseFollow (v , containsTheFirstOf , containsTheFollowOf , DONE , low , stack );
264
+ }
265
+ }
266
+
267
+ private void traverseFollow (ISymbol v , SetMultimap <ISymbol , ISymbol > containsTheFirstOf ,
268
+ SetMultimap <ISymbol , ISymbol > containsTheFollowOf , int DONE , Map <ISymbol , Integer > low , Stack <ISymbol > stack ) {
269
+ stack .push (v );
270
+ int top1 = stack .size () - 1 ;
271
+ low .put (v , top1 );
272
+ for (ISymbol w : containsTheFirstOf .get (v )) {
273
+ v .setFollow (v .getFollow ().union (w .getFirst ())); // union!
274
+ }
275
+ for (ISymbol w : containsTheFollowOf .get (v )) {
276
+ if (low .get (w ) == null /* CLEAN */ ) {
277
+ traverseFollow (w , containsTheFirstOf , containsTheFollowOf , DONE , low , stack );
278
+ }
279
+ // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
280
+ v .setFollow (v .getFollow ().union (w .getFollow ())); // union!
281
+ if (low .get (w ) < low .get (v ))
282
+ low .put (v , low .get (w ));
283
+ }
284
+ if (low .get (v ) == top1 ) // v is the root of this SCC
285
+ while (stack .size () - 1 >= top1 ) {
286
+ ISymbol w = stack .pop ();
287
+ w .setFollow (v .getFollow ()); // distribute!
288
+ low .put (w , DONE );
289
+ }
290
+ }
291
+
189
292
private void calculateRecursion () {
190
293
// direct and indirect left recursion :
191
294
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
@@ -440,28 +543,6 @@ private boolean mutuallyRecursive(Priority p) {
440
543
|| grammar .getRightRecursiveSymbolsMapping ().get (p .higher ().getLhs ()).contains (p .lower ().leftHand ());
441
544
}
442
545
443
- /*
444
- * TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
445
- * getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
446
- *
447
- * tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
448
- * getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
449
- * first_components); } } }
450
- *
451
- *
452
- * private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
453
- * the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
454
- *
455
- * for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
456
- * stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
457
- * if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
458
- * d.index); } }
459
- *
460
- * TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
461
- * component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
462
- * while(t != v); components.add(component); } }
463
- */
464
-
465
546
private void extractExpressionGrammars (SCCNodes <ISymbol > scc ) {
466
547
467
548
for (ISymbol s : grammar .getSymbols ()) {
0 commit comments