@@ -115,6 +115,10 @@ public ParseTable(NormGrammar grammar, ParseTableConfiguration config) {
115
115
// create JSGLR parse table productions
116
116
createJSGLRParseTableProductions (productionLabels );
117
117
118
+ // calculate FIRST-set and FOLLOW-set
119
+ calculateFirst ();
120
+ calculateFollow ();
121
+
118
122
// create states if the table should not be generated dynamically
119
123
initialProduction = grammar .getInitialProduction ();
120
124
@@ -162,6 +166,137 @@ private void calculateNullable() {
162
166
} while (markedNullable );
163
167
}
164
168
169
+ // Based on https://compilers.iecc.com/comparch/article/01-04-079
170
+ private void calculateFirst () {
171
+ SetMultimap <ISymbol , IProduction > symbolProductionsMapping = grammar .getSymbolProductionsMapping ();
172
+ Set <ISymbol > symbols = grammar .getSymbols ();
173
+ SetMultimap <ISymbol , ISymbol > containsTheFirstOf = HashMultimap .create ();
174
+
175
+ for (ISymbol s : symbols ) {
176
+ // The FIRST set of a CharacterClass symbol is equal to the character class it represents.
177
+ if (s instanceof CharacterClassSymbol ) {
178
+ s .setFirst (((CharacterClassSymbol ) s ).getCC ());
179
+ continue ;
180
+ }
181
+
182
+ for (IProduction p : symbolProductionsMapping .get (s )) {
183
+ // Direct contributions:
184
+ // If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
185
+ for (ISymbol rhs : p .rightHand ()) {
186
+ // Then, a is in FIRST(A).
187
+ if (rhs instanceof CharacterClassSymbol ) {
188
+ s .setFirst (((CharacterClassSymbol ) rhs ).getCC ());
189
+ break ;
190
+ }
191
+
192
+ // Indirect contributions: calculate contains-the-FIRSTs-of
193
+ // If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
194
+ // Then, A contains-the-FIRSTs-of B
195
+ containsTheFirstOf .put (s , rhs );
196
+
197
+ if (!rhs .isNullable ())
198
+ break ;
199
+ }
200
+ }
201
+ }
202
+
203
+ // Indirect contributions: Tarjan's algorithm for strongly connected components
204
+ final int DONE = symbols .size ();
205
+ final Map <ISymbol , Integer > low = new HashMap <>();
206
+ final Stack <ISymbol > stack = new Stack <>();
207
+ for (ISymbol v : symbols ) {
208
+ if (low .get (v ) == null /* CLEAN */ )
209
+ traverseFirst (v , containsTheFirstOf , DONE , low , stack );
210
+ }
211
+ }
212
+
213
+ private void traverseFirst (ISymbol v , SetMultimap <ISymbol , ISymbol > containsTheFirstOf , int DONE ,
214
+ Map <ISymbol , Integer > low , Stack <ISymbol > stack ) {
215
+ stack .push (v );
216
+ int top1 = stack .size () - 1 ;
217
+ low .put (v , top1 );
218
+ for (ISymbol w : containsTheFirstOf .get (v )) {
219
+ if (low .get (w ) == null /* CLEAN */ ) {
220
+ traverseFirst (w , containsTheFirstOf , DONE , low , stack );
221
+ }
222
+ // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
223
+ v .setFirst (v .getFirst ().union (w .getFirst ())); // union!
224
+ if (low .get (w ) < low .get (v ))
225
+ low .put (v , low .get (w ));
226
+ }
227
+ if (low .get (v ) == top1 ) // v is the root of this SCC
228
+ while (stack .size () - 1 >= top1 ) {
229
+ ISymbol w = stack .pop ();
230
+ w .setFirst (v .getFirst ()); // distribute!
231
+ low .put (w , DONE );
232
+ }
233
+ }
234
+
235
+ // Based on https://compilers.iecc.com/comparch/article/01-04-079
236
+ // and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
237
+ private void calculateFollow () {
238
+ SetMultimap <ISymbol , IProduction > symbolProductionsMapping = grammar .getSymbolProductionsMapping ();
239
+ Set <ISymbol > symbols = grammar .getSymbols ();
240
+ SetMultimap <ISymbol , ISymbol > containsTheFirstOf = HashMultimap .create ();
241
+ SetMultimap <ISymbol , ISymbol > containsTheFollowOf = HashMultimap .create ();
242
+
243
+ for (ISymbol s : symbols ) {
244
+ for (IProduction p : symbolProductionsMapping .get (s )) {
245
+ List <ISymbol > rightHand = p .rightHand ();
246
+ for (int i = 0 , rightHandSize = rightHand .size (); i < rightHandSize ; i ++) {
247
+ ISymbol symbolI = rightHand .get (i );
248
+
249
+ // If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
250
+ for (int j = i + 1 ; j < rightHandSize ; j ++) {
251
+ // If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
252
+ ISymbol symbolJ = rightHand .get (j );
253
+ containsTheFirstOf .put (symbolI , symbolJ );
254
+
255
+ if (!symbolJ .isNullable ())
256
+ break ;
257
+ }
258
+
259
+ // If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
260
+ containsTheFollowOf .put (symbolI , s );
261
+ }
262
+ }
263
+ }
264
+
265
+ // Indirect contributions: Tarjan's algorithm for strongly connected components
266
+ final int DONE = symbols .size ();
267
+ final Map <ISymbol , Integer > low = new HashMap <>();
268
+ final Stack <ISymbol > stack = new Stack <>();
269
+ for (ISymbol v : symbols ) {
270
+ if (low .get (v ) == null /* CLEAN */ )
271
+ traverseFollow (v , containsTheFirstOf , containsTheFollowOf , DONE , low , stack );
272
+ }
273
+ }
274
+
275
+ private void traverseFollow (ISymbol v , SetMultimap <ISymbol , ISymbol > containsTheFirstOf ,
276
+ SetMultimap <ISymbol , ISymbol > containsTheFollowOf , int DONE , Map <ISymbol , Integer > low , Stack <ISymbol > stack ) {
277
+ stack .push (v );
278
+ int top1 = stack .size () - 1 ;
279
+ low .put (v , top1 );
280
+ for (ISymbol w : containsTheFirstOf .get (v )) {
281
+ v .setFollow (v .getFollow ().union (w .getFirst ())); // union!
282
+ }
283
+ for (ISymbol w : containsTheFollowOf .get (v )) {
284
+ if (low .get (w ) == null /* CLEAN */ ) {
285
+ traverseFollow (w , containsTheFirstOf , containsTheFollowOf , DONE , low , stack );
286
+ }
287
+ // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
288
+ v .setFollow (v .getFollow ().union (w .getFollow ())); // union!
289
+ if (low .get (w ) < low .get (v ))
290
+ low .put (v , low .get (w ));
291
+ }
292
+ if (low .get (v ) == top1 ) // v is the root of this SCC
293
+ while (stack .size () - 1 >= top1 ) {
294
+ ISymbol w = stack .pop ();
295
+ w .setFollow (v .getFollow ()); // distribute!
296
+ low .put (w , DONE );
297
+ }
298
+ }
299
+
165
300
private void calculateRecursion () {
166
301
// direct and indirect left recursion :
167
302
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
@@ -415,28 +550,6 @@ private boolean mutuallyRecursive(Priority p) {
415
550
|| grammar .getRightRecursiveSymbolsMapping ().get (p .higher ().getLhs ()).contains (p .lower ().leftHand ());
416
551
}
417
552
418
- /*
419
- * TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
420
- * getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
421
- *
422
- * tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
423
- * getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
424
- * first_components); } } }
425
- *
426
- *
427
- * private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
428
- * the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
429
- *
430
- * for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
431
- * stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
432
- * if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
433
- * d.index); } }
434
- *
435
- * TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
436
- * component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
437
- * while(t != v); components.add(component); } }
438
- */
439
-
440
553
private void extractExpressionGrammars (SCCNodes <ISymbol > scc ) {
441
554
442
555
for (ISymbol s : grammar .getSymbols ()) {
0 commit comments