Improvements to Lexer Errors and Errors in General (#79)

Added better unexpected messages to lexer instructions, allowed JumpTable to have better unexpected messages, cut unexpected messages short at first newline OR space.
j-mie6 · Mar 5, 2021 · 4515ca3 · 4515ca3
1 parent edafd97
commit 4515ca3
Show file tree

Hide file tree

Showing 10 changed files with 54 additions and 50 deletions.
diff --git a/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala
@@ -88,9 +88,9 @@ private [parsley] final class <|>[A, B](_p: =>Parsley[A], _q: =>Parsley[B]) exte
             val end = state.freshLabel()
             val default = state.freshLabel()
             val merge = state.freshLabel()
-            val (roots, leads, ls, expecteds) = foldTablified(tablified, state, mutable.Map.empty, Nil, Nil, mutable.Map.empty)
+            val (roots, leads, ls, size, expecteds) = foldTablified(tablified, state, mutable.Map.empty, Nil, Nil, 0, mutable.Set.empty)
             //println(leads, tablified)
-            instrs += new instructions.JumpTable(leads, ls, default, merge, expecteds)
+            instrs += new instructions.JumpTable(leads, ls, default, merge, size, expecteds)
             codeGenRoots(roots, ls, end) >> {
                 instrs += new instructions.Catch(merge) //This instruction is reachable as default - 1
                 instrs += new instructions.Label(default)
@@ -157,29 +157,29 @@ private [parsley] final class <|>[A, B](_p: =>Parsley[A], _q: =>Parsley[B]) exte
                                roots: mutable.Map[Char, List[Parsley[_]]],
                                leads: List[Char],
                                labels: List[Int],
-                               expecteds: mutable.Map[Char, Set[ErrorItem]]):
-        (List[List[Parsley[_]]], List[Char], List[Int], Map[Char, Set[ErrorItem]]) = tablified match {
-        case (_, None)::tablified_ => foldTablified(tablified_, labelGen, roots, leads, labels, expecteds)
+                               size: Int,
+                               expecteds: mutable.Set[ErrorItem]):
+        (List[List[Parsley[_]]], List[Char], List[Int], Int, Set[ErrorItem]) = tablified match {
+        case (_, None)::tablified_ => foldTablified(tablified_, labelGen, roots, leads, labels, size, expecteds)
         case (root, Some(lead))::tablified_ =>
-            val (c: Char, expected: ErrorItem) = lead match {
-                case ct@CharTok(d) => (d, ct.expected.fold[ErrorItem](Raw(d))(Desc(_)))
-                case st@StringTok(s) => (s.head, st.expected.fold[ErrorItem](Raw(s))(Desc(_)))
-                case st@Specific(s) => (s.head, Desc(st.expected.getOrElse(s)))
-                case op@MaxOp(o) => (o.head, Desc(op.expected.getOrElse(o)))
-                case sl: StringLiteral => ('"', Desc(sl.expected.getOrElse("string")))
-                case rs: RawStringLiteral => ('"', Desc(rs.expected.getOrElse("string")))
+            val (c: Char, expected: ErrorItem, _size: Int) = lead match {
+                case ct@CharTok(d) => (d, ct.expected.fold[ErrorItem](Raw(d))(Desc(_)), 1)
+                case st@StringTok(s) => (s.head, st.expected.fold[ErrorItem](Raw(s))(Desc(_)), s.size)
+                case st@Specific(s) => (s.head, Desc(st.expected.getOrElse(s)), s.size)
+                case op@MaxOp(o) => (o.head, Desc(op.expected.getOrElse(o)), o.size)
+                case sl: StringLiteral => ('"', Desc(sl.expected.getOrElse("string")), 1)
+                case rs: RawStringLiteral => ('"', Desc(rs.expected.getOrElse("string")), 1)
             }
+            expecteds += expected
             if (roots.contains(c)) {
                 roots(c) = root::roots(c)
-                expecteds(c) = expecteds(c) + expected
-                foldTablified(tablified_, labelGen, roots, leads, labelGen.freshLabel() :: labels, expecteds)
+                foldTablified(tablified_, labelGen, roots, leads, labelGen.freshLabel() :: labels, Math.max(size, _size), expecteds)
             }
             else {
                 roots(c) = root::Nil
-                expecteds(c) = Set(expected)
-                foldTablified(tablified_, labelGen, roots, c::leads, labelGen.freshLabel() :: labels, expecteds)
+                foldTablified(tablified_, labelGen, roots, c::leads, labelGen.freshLabel() :: labels, Math.max(size, _size), expecteds)
             }
-        case Nil => (leads.map(roots(_)), leads, labels, expecteds.toMap)
+        case Nil => (leads.map(roots(_)), leads, labels, size, expecteds.toSet)
     }
     @tailrec private def tablable(p: Parsley[_]): Option[Parsley[_]] = p match {
         // CODO: Numeric parsers by leading digit (This one would require changing the foldTablified function a bit)

diff --git a/src/main/scala/parsley/internal/errors/Errors.scala b/src/main/scala/parsley/internal/errors/Errors.scala
@@ -81,7 +81,8 @@ private [internal] case class Raw(cs: String) extends ErrorItem {
         case "\t"            => "tab"
         case " "             => "space"
         case Unprintable(up) => f"unprintable character (\\u${up.head.toInt}%04X)"
-        case cs              => "\"" + cs.takeWhile(_ != '\n') + "\""
+        // Do we want this only in unexpecteds?
+        case cs              => "\"" + cs.takeWhile(c => c != '\n' && c != ' ') + "\""
     }
 }
 private [internal] object Raw {

diff --git a/src/main/scala/parsley/internal/machine/Context.scala b/src/main/scala/parsley/internal/machine/Context.scala
@@ -6,7 +6,7 @@ import parsley.{Failure, Result, Success}
 import parsley.internal.errors.{ErrorItem, LineBuilder}
 import parsley.internal.machine.errors.{
     ErrorItemBuilder,
-    DefuncError, ClassicExpectedError, ClassicExpectedErrorWithReason, ClassicFancyError, ClassicUnexpectedError, WithHints,
+    DefuncError, ClassicExpectedError, ClassicExpectedErrorWithReason, ClassicFancyError, ClassicUnexpectedError, WithHints, TokenError,
     DefuncHints, EmptyHints, MergeHints, ReplaceHint, PopHints, AddError
 }
 
@@ -182,6 +182,9 @@ private [parsley] final class Context(private [machine] var instrs: Array[Instr]
     private [machine] def expectedFail(expected: Option[ErrorItem], reason: String): Unit = {
         this.fail(new ClassicExpectedErrorWithReason(offset, line, col, expected, reason))
     }
+    private [machine] def expectedTokenFail(expected: Option[ErrorItem], size: Int): Unit = {
+        this.fail(new TokenError(offset, line, col, expected, size))
+    }
     private [machine] def fail(error: DefuncError): Unit = {
         this.pushError(error)
         this.fail()

diff --git a/src/main/scala/parsley/internal/machine/errors/DefuncError.scala b/src/main/scala/parsley/internal/machine/errors/DefuncError.scala
@@ -41,7 +41,7 @@ private [errors] object BaseError {
         case err: ClassicUnexpectedError => Some(err.expected)
         case err: EmptyError => Some(err.expected)
         case err: EmptyErrorWithReason => Some(err.expected)
-        case err: StringTokError => Some(err.expected)
+        case err: TokenError => Some(err.expected)
         case _ => None
     }
 }
@@ -82,7 +82,7 @@ private [machine] case class EmptyError(offset: Int, line: Int, col: Int, expect
         TrivialError(offset, line, col, None, expectedSet(expected), ParseError.NoReason)
     }
 }
-private [machine] case class StringTokError(offset: Int, line: Int, col: Int, expected: Option[ErrorItem], size: Int) extends DefuncError {
+private [machine] case class TokenError(offset: Int, line: Int, col: Int, expected: Option[ErrorItem], size: Int) extends DefuncError {
     val isTrivialError: Boolean = true
     val isExpectedEmpty: Boolean = expected.isEmpty
     override def asParseError(implicit builder: ErrorItemBuilder): ParseError = {
@@ -96,11 +96,11 @@ private [machine] case class EmptyErrorWithReason(offset: Int, line: Int, col: I
         TrivialError(offset, line, col, None, expectedSet(expected), Set(reason))
     }
 }
-private [machine] case class MultiExpectedError(offset: Int, line: Int, col: Int, expected: Set[ErrorItem]) extends DefuncError {
+private [machine] case class MultiExpectedError(offset: Int, line: Int, col: Int, expected: Set[ErrorItem], size: Int) extends DefuncError {
     val isTrivialError: Boolean = true
     val isExpectedEmpty: Boolean = expected.isEmpty
     override def asParseError(implicit builder: ErrorItemBuilder): ParseError = {
-        TrivialError(offset, line, col, Some(builder(offset)), expected, ParseError.NoReason)
+        TrivialError(offset, line, col, Some(builder(offset, size)), expected, ParseError.NoReason)
     }
 }
 

diff --git a/src/main/scala/parsley/internal/machine/instructions/IntrinsicInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/IntrinsicInstrs.scala
@@ -2,7 +2,7 @@ package parsley.internal.machine.instructions
 
 import parsley.internal.errors.{Desc, Raw}
 import parsley.internal.machine.{Context, Good}
-import parsley.internal.machine.errors.{EmptyError, EmptyErrorWithReason, StringTokError}
+import parsley.internal.machine.errors.{EmptyError, EmptyErrorWithReason}
 
 import scala.annotation.tailrec
 
@@ -77,9 +77,8 @@ private [internal] final class StringTok private [instructions] (s: String, x: A
         if (j < sz && i < ctx.inputsz && ctx.input.charAt(i) == cs(j)) go(ctx, i + 1, j + 1)
         else if (j < sz) {
             // The offset, line and column haven't been edited yet, so are in the right place
-            val err = new StringTokError(ctx.offset, ctx.line, ctx.col, errorItem, sz)
+            ctx.expectedTokenFail(errorItem, sz)
             ctx.offset = i
-            ctx.fail(err)
         }
         else {
             ctx.col = colAdjust(ctx.col)

diff --git a/src/main/scala/parsley/internal/machine/instructions/OptInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/OptInstrs.scala
@@ -104,10 +104,10 @@ private [internal] final class AlwaysRecoverWith[A](x: A) extends Instr {
 private [internal] final class JumpTable(prefixes: List[Char], labels: List[Int],
         private [this] var default: Int,
         private [this] var merge: Int,
-        _expecteds: Map[Char, Set[ErrorItem]]) extends Instr {
+        private [this] val size: Int,
+        private [this] val errorItems: Set[ErrorItem]) extends Instr {
     private [this] var defaultPreamble: Int = _
     private [this] val jumpTable = mutable.LongMap(prefixes.map(_.toLong).zip(labels): _*)
-    val errorItems = _expecteds.toSet[(Char, Set[ErrorItem])].flatMap(_._2)
 
     override def apply(ctx: Context): Unit = {
         if (ctx.moreInput) {
@@ -127,7 +127,7 @@ private [internal] final class JumpTable(prefixes: List[Char], labels: List[Int]
     }
 
     private def addErrors(ctx: Context): Unit = {
-        ctx.errs = new ErrorStack(new MultiExpectedError(ctx.offset, ctx.line, ctx.col, errorItems), ctx.errs)
+        ctx.errs = new ErrorStack(new MultiExpectedError(ctx.offset, ctx.line, ctx.col, errorItems, size), ctx.errs)
         ctx.pushHandler(merge)
     }
 

diff --git a/src/main/scala/parsley/internal/machine/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/TokenInstrs.scala
@@ -55,7 +55,7 @@ private [instructions] abstract class WhiteSpaceLike(start: String, end: String,
         spaces(ctx)
         val startsMulti = ctx.moreInput && ctx.input.startsWith(start, ctx.offset)
         if (startsMulti && multiLineComment(ctx)) multisOnly(ctx)
-        else if (startsMulti) ctx.expectedFail(expected = endOfComment)
+        else if (startsMulti) ctx.expectedTokenFail(expected = endOfComment, end.length)
         else ctx.pushAndContinue(())
     }
 
@@ -68,7 +68,7 @@ private [instructions] abstract class WhiteSpaceLike(start: String, end: String,
         if (ctx.moreInput && ctx.input.startsWith(sharedPrefix, ctx.offset)) {
             val startsMulti = ctx.input.startsWith(factoredStart, ctx.offset + sharedPrefix.length)
             if (startsMulti && multiLineComment(ctx)) singlesAndMultis(ctx)
-            else if (startsMulti) ctx.expectedFail(expected = endOfComment)
+            else if (startsMulti) ctx.expectedTokenFail(expected = endOfComment, end.length)
             else if (ctx.input.startsWith(factoredLine, ctx.offset + sharedPrefix.length)) {
                 singleLineComment(ctx)
                 singlesAndMultis(ctx)
@@ -89,13 +89,14 @@ private [instructions] abstract class WhiteSpaceLike(start: String, end: String,
 
 private [internal] final class TokenComment(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) {
     private [this] final val comment = Some(Desc("comment"))
+    private [this] final val openingSize = Math.max(start.size, line.size)
 
     // PRE: one of the comments is supported
     // PRE: Multi-line comments may not prefix single-line, but single-line may prefix multi-line
     override def apply(ctx: Context): Unit = {
         val startsMulti = multiAllowed && ctx.input.startsWith(start, ctx.offset)
         // If neither comment is available we fail
-        if (!ctx.moreInput || (!lineAllowed || !ctx.input.startsWith(line, ctx.offset)) && !startsMulti) ctx.expectedFail(expected = comment)
+        if (!ctx.moreInput || (!lineAllowed || !ctx.input.startsWith(line, ctx.offset)) && !startsMulti) ctx.expectedTokenFail(expected = comment, openingSize)
         // One of the comments must be available
         else if (startsMulti && multiLineComment(ctx)) ctx.pushAndContinue(())
         else if (startsMulti) ctx.expectedFail(expected = endOfComment)
@@ -177,7 +178,7 @@ private [instructions] abstract class TokenSpecificAllowTrailing(_specific: Stri
 
     @tailrec final private def readSpecific(ctx: Context, i: Int, j: Int): Unit = {
         if (j < strsz && readCharCaseHandled(ctx, i) == specific(j)) readSpecific(ctx, i + 1, j + 1)
-        else if (j < strsz) ctx.expectedFail(expected)
+        else if (j < strsz) ctx.expectedTokenFail(expected, strsz)
         else {
             ctx.saveState()
             ctx.fastUncheckedConsumeChars(strsz)
@@ -187,15 +188,15 @@ private [instructions] abstract class TokenSpecificAllowTrailing(_specific: Stri
 
     final override def apply(ctx: Context): Unit = {
         if (ctx.inputsz >= ctx.offset + strsz) readSpecific(ctx, ctx.offset, 0)
-        else ctx.expectedFail(expected)
+        else ctx.expectedTokenFail(expected, strsz)
     }
 }
 
 private [internal] final class TokenSpecific(_specific: String, letter: TokenSet, caseSensitive: Boolean, expected: Option[String])
     extends TokenSpecificAllowTrailing(_specific, caseSensitive, expected) {
     override def postprocess(ctx: Context, i: Int): Unit = {
         if (i < ctx.inputsz && letter(ctx.input.charAt(i))) {
-            ctx.expectedFail(expectedEnd)
+            ctx.expectedFail(expectedEnd) //This should only report a single token
             ctx.restoreState()
         }
         else {
@@ -219,7 +220,7 @@ private [internal] final class TokenMaxOp(operator: String, _ops: Set[String], e
         lazy val ops_ = ops.suffixes(ctx.input.charAt(i))
         val possibleOpsRemain = i < ctx.inputsz && ops.nonEmpty
         if (possibleOpsRemain && ops_.contains("")) {
-            ctx.expectedFail(expectedEnd)
+            ctx.expectedFail(expectedEnd) //This should only report a single token
             ctx.restoreState()
         }
         else if (possibleOpsRemain) go(ctx, i + 1, ops_)

diff --git a/src/main/scala/parsley/internal/machine/instructions/TokenStringInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/TokenStringInstrs.scala
@@ -11,7 +11,7 @@ private [internal] class TokenEscape(_expected: Option[String]) extends Instr wi
     override def apply(ctx: Context): Unit = escape(ctx) match {
         case TokenEscape.EscapeChar(escapeChar) =>ctx.pushAndContinue(escapeChar)
         case TokenEscape.BadCode => ctx.expectedFail(expected, reason = "invalid escape sequence")
-        case TokenEscape.NoParse => ctx.expectedFail(expected)
+        case TokenEscape.NoParse => ctx.expectedTokenFail(expected, 3)
     }
 
     private final def consumeAndReturn(ctx: Context, n: Int, c: Char) = {

diff --git a/src/test/scala/parsley/internal/machine/errors/DefuncErrorTests.scala b/src/test/scala/parsley/internal/machine/errors/DefuncErrorTests.scala
@@ -59,14 +59,14 @@ class DefuncErrorTests extends ParsleyTest {
         EmptyError(0, 0, 0, Some(EndOfInput)) should not be 'expectedEmpty
     }
 
-    "StringTokError" should "evaluate to TrivialError" in {
-        val err = StringTokError(0, 0, 0, None, 1)
+    "TokenError" should "evaluate to TrivialError" in {
+        val err = TokenError(0, 0, 0, None, 1)
         err should be a 'trivialError
         err.asParseError shouldBe a [TrivialError]
     }
     it should "only be empty when its label is" in {
-        StringTokError(0, 0, 0, None, 1) shouldBe 'expectedEmpty
-        StringTokError(0, 0, 0, Some(EndOfInput), 1) should not be 'expectedEmpty
+        TokenError(0, 0, 0, None, 1) shouldBe 'expectedEmpty
+        TokenError(0, 0, 0, Some(EndOfInput), 1) should not be 'expectedEmpty
     }
 
     "EmptyErrorWithReason" should "evaluate to TrivialError" in {
@@ -80,17 +80,17 @@ class DefuncErrorTests extends ParsleyTest {
     }
 
     "MultiExpectedError" should "evaluate to TrivialError" in {
-        val err = MultiExpectedError(0, 0, 0, Set.empty)
+        val err = MultiExpectedError(0, 0, 0, Set.empty, 1)
         err should be a 'trivialError
         err.asParseError shouldBe a [TrivialError]
     }
     it should "only be empty when its label is" in {
-        MultiExpectedError(0, 0, 0, Set.empty) shouldBe 'expectedEmpty
-        MultiExpectedError(0, 0, 0, Set(EndOfInput)) should not be 'expectedEmpty
+        MultiExpectedError(0, 0, 0, Set.empty, 1) shouldBe 'expectedEmpty
+        MultiExpectedError(0, 0, 0, Set(EndOfInput), 1) should not be 'expectedEmpty
     }
 
     "MergedErrors" should "be trivial if both children are" in {
-        val err = MergedErrors(EmptyError(0, 0, 0, None), MultiExpectedError(0, 0, 0, Set.empty))
+        val err = MergedErrors(EmptyError(0, 0, 0, None), MultiExpectedError(0, 0, 0, Set.empty, 1))
         err should be a 'trivialError
         err.asParseError shouldBe a [TrivialError]
     }
@@ -135,8 +135,8 @@ class DefuncErrorTests extends ParsleyTest {
         MergedErrors(EmptyError(0, 0, 0, Some(EndOfInput)), EmptyError(0, 0, 0, Some(EndOfInput))) should not be 'expectedEmpty
     }
     they should "contain all the expecteds from both branches when appropriate" in {
-        val err = MergedErrors(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b"))),
-                               MultiExpectedError(0, 0, 0, Set(Raw("b"), Raw("c"))))
+        val err = MergedErrors(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b")), 1),
+                               MultiExpectedError(0, 0, 0, Set(Raw("b"), Raw("c")), 1))
         err.asParseError.asInstanceOf[TrivialError].expecteds should contain only (Raw("a"), Raw("b"), Raw("c"))
     }
 
@@ -187,8 +187,8 @@ class DefuncErrorTests extends ParsleyTest {
         WithLabel(EmptyError(0, 0, 0, Some(Desc("x"))), "a") should not be 'expectedEmpty
     }
     it should "replace all expected" in {
-        val errShow = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b"))), "x")
-        val errHide = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b"))), "")
+        val errShow = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b")), 1), "x")
+        val errHide = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b")), 1), "")
         errShow.asParseError.asInstanceOf[TrivialError].expecteds should contain only (Desc("x"))
         errHide.asParseError.asInstanceOf[TrivialError].expecteds shouldBe empty
     }

diff --git a/src/test/scala/parsley/internal/machine/errors/DefuncHintsTests.scala b/src/test/scala/parsley/internal/machine/errors/DefuncHintsTests.scala
@@ -12,7 +12,7 @@ import scala.annotation.nowarn
 class DefuncHintsTests extends ParsleyTest {
     def mkErr(labels: String*): DefuncError = {
         assert(labels.nonEmpty)
-        MultiExpectedError(0, 0, 0, labels.map(Desc(_)).toSet)
+        MultiExpectedError(0, 0, 0, labels.map(Desc(_)).toSet, 1)
     }
 
     "EmptyHints" should "have size 0" in {