From 4515ca3c02e728ec396c0ccb44cb550b80cc2bf7 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 5 Mar 2021 18:57:27 +0000 Subject: [PATCH] Improvements to Lexer Errors and Errors in General (#79) Added better unexpected messages to lexer instructions, allowed JumpTable to have better unexpected messages, cut unexpected messages short at first newline OR space. --- .../deepembedding/AlternativeEmbedding.scala | 34 +++++++++---------- .../parsley/internal/errors/Errors.scala | 3 +- .../parsley/internal/machine/Context.scala | 5 ++- .../internal/machine/errors/DefuncError.scala | 8 ++--- .../instructions/IntrinsicInstrs.scala | 5 ++- .../machine/instructions/OptInstrs.scala | 6 ++-- .../machine/instructions/TokenInstrs.scala | 15 ++++---- .../instructions/TokenStringInstrs.scala | 2 +- .../machine/errors/DefuncErrorTests.scala | 24 ++++++------- .../machine/errors/DefuncHintsTests.scala | 2 +- 10 files changed, 54 insertions(+), 50 deletions(-) diff --git a/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala index 594cbe6a4..da3624084 100644 --- a/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala @@ -88,9 +88,9 @@ private [parsley] final class <|>[A, B](_p: =>Parsley[A], _q: =>Parsley[B]) exte val end = state.freshLabel() val default = state.freshLabel() val merge = state.freshLabel() - val (roots, leads, ls, expecteds) = foldTablified(tablified, state, mutable.Map.empty, Nil, Nil, mutable.Map.empty) + val (roots, leads, ls, size, expecteds) = foldTablified(tablified, state, mutable.Map.empty, Nil, Nil, 0, mutable.Set.empty) //println(leads, tablified) - instrs += new instructions.JumpTable(leads, ls, default, merge, expecteds) + instrs += new instructions.JumpTable(leads, ls, default, merge, size, expecteds) codeGenRoots(roots, ls, end) >> { instrs += new instructions.Catch(merge) //This instruction is reachable as default - 1 instrs += new instructions.Label(default) @@ -157,29 +157,29 @@ private [parsley] final class <|>[A, B](_p: =>Parsley[A], _q: =>Parsley[B]) exte roots: mutable.Map[Char, List[Parsley[_]]], leads: List[Char], labels: List[Int], - expecteds: mutable.Map[Char, Set[ErrorItem]]): - (List[List[Parsley[_]]], List[Char], List[Int], Map[Char, Set[ErrorItem]]) = tablified match { - case (_, None)::tablified_ => foldTablified(tablified_, labelGen, roots, leads, labels, expecteds) + size: Int, + expecteds: mutable.Set[ErrorItem]): + (List[List[Parsley[_]]], List[Char], List[Int], Int, Set[ErrorItem]) = tablified match { + case (_, None)::tablified_ => foldTablified(tablified_, labelGen, roots, leads, labels, size, expecteds) case (root, Some(lead))::tablified_ => - val (c: Char, expected: ErrorItem) = lead match { - case ct@CharTok(d) => (d, ct.expected.fold[ErrorItem](Raw(d))(Desc(_))) - case st@StringTok(s) => (s.head, st.expected.fold[ErrorItem](Raw(s))(Desc(_))) - case st@Specific(s) => (s.head, Desc(st.expected.getOrElse(s))) - case op@MaxOp(o) => (o.head, Desc(op.expected.getOrElse(o))) - case sl: StringLiteral => ('"', Desc(sl.expected.getOrElse("string"))) - case rs: RawStringLiteral => ('"', Desc(rs.expected.getOrElse("string"))) + val (c: Char, expected: ErrorItem, _size: Int) = lead match { + case ct@CharTok(d) => (d, ct.expected.fold[ErrorItem](Raw(d))(Desc(_)), 1) + case st@StringTok(s) => (s.head, st.expected.fold[ErrorItem](Raw(s))(Desc(_)), s.size) + case st@Specific(s) => (s.head, Desc(st.expected.getOrElse(s)), s.size) + case op@MaxOp(o) => (o.head, Desc(op.expected.getOrElse(o)), o.size) + case sl: StringLiteral => ('"', Desc(sl.expected.getOrElse("string")), 1) + case rs: RawStringLiteral => ('"', Desc(rs.expected.getOrElse("string")), 1) } + expecteds += expected if (roots.contains(c)) { roots(c) = root::roots(c) - expecteds(c) = expecteds(c) + expected - foldTablified(tablified_, labelGen, roots, leads, labelGen.freshLabel() :: labels, expecteds) + foldTablified(tablified_, labelGen, roots, leads, labelGen.freshLabel() :: labels, Math.max(size, _size), expecteds) } else { roots(c) = root::Nil - expecteds(c) = Set(expected) - foldTablified(tablified_, labelGen, roots, c::leads, labelGen.freshLabel() :: labels, expecteds) + foldTablified(tablified_, labelGen, roots, c::leads, labelGen.freshLabel() :: labels, Math.max(size, _size), expecteds) } - case Nil => (leads.map(roots(_)), leads, labels, expecteds.toMap) + case Nil => (leads.map(roots(_)), leads, labels, size, expecteds.toSet) } @tailrec private def tablable(p: Parsley[_]): Option[Parsley[_]] = p match { // CODO: Numeric parsers by leading digit (This one would require changing the foldTablified function a bit) diff --git a/src/main/scala/parsley/internal/errors/Errors.scala b/src/main/scala/parsley/internal/errors/Errors.scala index 92176db9b..9835911de 100644 --- a/src/main/scala/parsley/internal/errors/Errors.scala +++ b/src/main/scala/parsley/internal/errors/Errors.scala @@ -81,7 +81,8 @@ private [internal] case class Raw(cs: String) extends ErrorItem { case "\t" => "tab" case " " => "space" case Unprintable(up) => f"unprintable character (\\u${up.head.toInt}%04X)" - case cs => "\"" + cs.takeWhile(_ != '\n') + "\"" + // Do we want this only in unexpecteds? + case cs => "\"" + cs.takeWhile(c => c != '\n' && c != ' ') + "\"" } } private [internal] object Raw { diff --git a/src/main/scala/parsley/internal/machine/Context.scala b/src/main/scala/parsley/internal/machine/Context.scala index 614d0f958..9d38fb9c6 100644 --- a/src/main/scala/parsley/internal/machine/Context.scala +++ b/src/main/scala/parsley/internal/machine/Context.scala @@ -6,7 +6,7 @@ import parsley.{Failure, Result, Success} import parsley.internal.errors.{ErrorItem, LineBuilder} import parsley.internal.machine.errors.{ ErrorItemBuilder, - DefuncError, ClassicExpectedError, ClassicExpectedErrorWithReason, ClassicFancyError, ClassicUnexpectedError, WithHints, + DefuncError, ClassicExpectedError, ClassicExpectedErrorWithReason, ClassicFancyError, ClassicUnexpectedError, WithHints, TokenError, DefuncHints, EmptyHints, MergeHints, ReplaceHint, PopHints, AddError } @@ -182,6 +182,9 @@ private [parsley] final class Context(private [machine] var instrs: Array[Instr] private [machine] def expectedFail(expected: Option[ErrorItem], reason: String): Unit = { this.fail(new ClassicExpectedErrorWithReason(offset, line, col, expected, reason)) } + private [machine] def expectedTokenFail(expected: Option[ErrorItem], size: Int): Unit = { + this.fail(new TokenError(offset, line, col, expected, size)) + } private [machine] def fail(error: DefuncError): Unit = { this.pushError(error) this.fail() diff --git a/src/main/scala/parsley/internal/machine/errors/DefuncError.scala b/src/main/scala/parsley/internal/machine/errors/DefuncError.scala index c0a990a17..f09183207 100644 --- a/src/main/scala/parsley/internal/machine/errors/DefuncError.scala +++ b/src/main/scala/parsley/internal/machine/errors/DefuncError.scala @@ -41,7 +41,7 @@ private [errors] object BaseError { case err: ClassicUnexpectedError => Some(err.expected) case err: EmptyError => Some(err.expected) case err: EmptyErrorWithReason => Some(err.expected) - case err: StringTokError => Some(err.expected) + case err: TokenError => Some(err.expected) case _ => None } } @@ -82,7 +82,7 @@ private [machine] case class EmptyError(offset: Int, line: Int, col: Int, expect TrivialError(offset, line, col, None, expectedSet(expected), ParseError.NoReason) } } -private [machine] case class StringTokError(offset: Int, line: Int, col: Int, expected: Option[ErrorItem], size: Int) extends DefuncError { +private [machine] case class TokenError(offset: Int, line: Int, col: Int, expected: Option[ErrorItem], size: Int) extends DefuncError { val isTrivialError: Boolean = true val isExpectedEmpty: Boolean = expected.isEmpty override def asParseError(implicit builder: ErrorItemBuilder): ParseError = { @@ -96,11 +96,11 @@ private [machine] case class EmptyErrorWithReason(offset: Int, line: Int, col: I TrivialError(offset, line, col, None, expectedSet(expected), Set(reason)) } } -private [machine] case class MultiExpectedError(offset: Int, line: Int, col: Int, expected: Set[ErrorItem]) extends DefuncError { +private [machine] case class MultiExpectedError(offset: Int, line: Int, col: Int, expected: Set[ErrorItem], size: Int) extends DefuncError { val isTrivialError: Boolean = true val isExpectedEmpty: Boolean = expected.isEmpty override def asParseError(implicit builder: ErrorItemBuilder): ParseError = { - TrivialError(offset, line, col, Some(builder(offset)), expected, ParseError.NoReason) + TrivialError(offset, line, col, Some(builder(offset, size)), expected, ParseError.NoReason) } } diff --git a/src/main/scala/parsley/internal/machine/instructions/IntrinsicInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/IntrinsicInstrs.scala index e3a5a5e3c..11f44c054 100644 --- a/src/main/scala/parsley/internal/machine/instructions/IntrinsicInstrs.scala +++ b/src/main/scala/parsley/internal/machine/instructions/IntrinsicInstrs.scala @@ -2,7 +2,7 @@ package parsley.internal.machine.instructions import parsley.internal.errors.{Desc, Raw} import parsley.internal.machine.{Context, Good} -import parsley.internal.machine.errors.{EmptyError, EmptyErrorWithReason, StringTokError} +import parsley.internal.machine.errors.{EmptyError, EmptyErrorWithReason} import scala.annotation.tailrec @@ -77,9 +77,8 @@ private [internal] final class StringTok private [instructions] (s: String, x: A if (j < sz && i < ctx.inputsz && ctx.input.charAt(i) == cs(j)) go(ctx, i + 1, j + 1) else if (j < sz) { // The offset, line and column haven't been edited yet, so are in the right place - val err = new StringTokError(ctx.offset, ctx.line, ctx.col, errorItem, sz) + ctx.expectedTokenFail(errorItem, sz) ctx.offset = i - ctx.fail(err) } else { ctx.col = colAdjust(ctx.col) diff --git a/src/main/scala/parsley/internal/machine/instructions/OptInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/OptInstrs.scala index af316237e..127ec6d35 100644 --- a/src/main/scala/parsley/internal/machine/instructions/OptInstrs.scala +++ b/src/main/scala/parsley/internal/machine/instructions/OptInstrs.scala @@ -104,10 +104,10 @@ private [internal] final class AlwaysRecoverWith[A](x: A) extends Instr { private [internal] final class JumpTable(prefixes: List[Char], labels: List[Int], private [this] var default: Int, private [this] var merge: Int, - _expecteds: Map[Char, Set[ErrorItem]]) extends Instr { + private [this] val size: Int, + private [this] val errorItems: Set[ErrorItem]) extends Instr { private [this] var defaultPreamble: Int = _ private [this] val jumpTable = mutable.LongMap(prefixes.map(_.toLong).zip(labels): _*) - val errorItems = _expecteds.toSet[(Char, Set[ErrorItem])].flatMap(_._2) override def apply(ctx: Context): Unit = { if (ctx.moreInput) { @@ -127,7 +127,7 @@ private [internal] final class JumpTable(prefixes: List[Char], labels: List[Int] } private def addErrors(ctx: Context): Unit = { - ctx.errs = new ErrorStack(new MultiExpectedError(ctx.offset, ctx.line, ctx.col, errorItems), ctx.errs) + ctx.errs = new ErrorStack(new MultiExpectedError(ctx.offset, ctx.line, ctx.col, errorItems, size), ctx.errs) ctx.pushHandler(merge) } diff --git a/src/main/scala/parsley/internal/machine/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/TokenInstrs.scala index 8df0d90c1..5467cc08f 100644 --- a/src/main/scala/parsley/internal/machine/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/machine/instructions/TokenInstrs.scala @@ -55,7 +55,7 @@ private [instructions] abstract class WhiteSpaceLike(start: String, end: String, spaces(ctx) val startsMulti = ctx.moreInput && ctx.input.startsWith(start, ctx.offset) if (startsMulti && multiLineComment(ctx)) multisOnly(ctx) - else if (startsMulti) ctx.expectedFail(expected = endOfComment) + else if (startsMulti) ctx.expectedTokenFail(expected = endOfComment, end.length) else ctx.pushAndContinue(()) } @@ -68,7 +68,7 @@ private [instructions] abstract class WhiteSpaceLike(start: String, end: String, if (ctx.moreInput && ctx.input.startsWith(sharedPrefix, ctx.offset)) { val startsMulti = ctx.input.startsWith(factoredStart, ctx.offset + sharedPrefix.length) if (startsMulti && multiLineComment(ctx)) singlesAndMultis(ctx) - else if (startsMulti) ctx.expectedFail(expected = endOfComment) + else if (startsMulti) ctx.expectedTokenFail(expected = endOfComment, end.length) else if (ctx.input.startsWith(factoredLine, ctx.offset + sharedPrefix.length)) { singleLineComment(ctx) singlesAndMultis(ctx) @@ -89,13 +89,14 @@ private [instructions] abstract class WhiteSpaceLike(start: String, end: String, private [internal] final class TokenComment(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) { private [this] final val comment = Some(Desc("comment")) + private [this] final val openingSize = Math.max(start.size, line.size) // PRE: one of the comments is supported // PRE: Multi-line comments may not prefix single-line, but single-line may prefix multi-line override def apply(ctx: Context): Unit = { val startsMulti = multiAllowed && ctx.input.startsWith(start, ctx.offset) // If neither comment is available we fail - if (!ctx.moreInput || (!lineAllowed || !ctx.input.startsWith(line, ctx.offset)) && !startsMulti) ctx.expectedFail(expected = comment) + if (!ctx.moreInput || (!lineAllowed || !ctx.input.startsWith(line, ctx.offset)) && !startsMulti) ctx.expectedTokenFail(expected = comment, openingSize) // One of the comments must be available else if (startsMulti && multiLineComment(ctx)) ctx.pushAndContinue(()) else if (startsMulti) ctx.expectedFail(expected = endOfComment) @@ -177,7 +178,7 @@ private [instructions] abstract class TokenSpecificAllowTrailing(_specific: Stri @tailrec final private def readSpecific(ctx: Context, i: Int, j: Int): Unit = { if (j < strsz && readCharCaseHandled(ctx, i) == specific(j)) readSpecific(ctx, i + 1, j + 1) - else if (j < strsz) ctx.expectedFail(expected) + else if (j < strsz) ctx.expectedTokenFail(expected, strsz) else { ctx.saveState() ctx.fastUncheckedConsumeChars(strsz) @@ -187,7 +188,7 @@ private [instructions] abstract class TokenSpecificAllowTrailing(_specific: Stri final override def apply(ctx: Context): Unit = { if (ctx.inputsz >= ctx.offset + strsz) readSpecific(ctx, ctx.offset, 0) - else ctx.expectedFail(expected) + else ctx.expectedTokenFail(expected, strsz) } } @@ -195,7 +196,7 @@ private [internal] final class TokenSpecific(_specific: String, letter: TokenSet extends TokenSpecificAllowTrailing(_specific, caseSensitive, expected) { override def postprocess(ctx: Context, i: Int): Unit = { if (i < ctx.inputsz && letter(ctx.input.charAt(i))) { - ctx.expectedFail(expectedEnd) + ctx.expectedFail(expectedEnd) //This should only report a single token ctx.restoreState() } else { @@ -219,7 +220,7 @@ private [internal] final class TokenMaxOp(operator: String, _ops: Set[String], e lazy val ops_ = ops.suffixes(ctx.input.charAt(i)) val possibleOpsRemain = i < ctx.inputsz && ops.nonEmpty if (possibleOpsRemain && ops_.contains("")) { - ctx.expectedFail(expectedEnd) + ctx.expectedFail(expectedEnd) //This should only report a single token ctx.restoreState() } else if (possibleOpsRemain) go(ctx, i + 1, ops_) diff --git a/src/main/scala/parsley/internal/machine/instructions/TokenStringInstrs.scala b/src/main/scala/parsley/internal/machine/instructions/TokenStringInstrs.scala index da9cbf7c8..c3ae86344 100644 --- a/src/main/scala/parsley/internal/machine/instructions/TokenStringInstrs.scala +++ b/src/main/scala/parsley/internal/machine/instructions/TokenStringInstrs.scala @@ -11,7 +11,7 @@ private [internal] class TokenEscape(_expected: Option[String]) extends Instr wi override def apply(ctx: Context): Unit = escape(ctx) match { case TokenEscape.EscapeChar(escapeChar) =>ctx.pushAndContinue(escapeChar) case TokenEscape.BadCode => ctx.expectedFail(expected, reason = "invalid escape sequence") - case TokenEscape.NoParse => ctx.expectedFail(expected) + case TokenEscape.NoParse => ctx.expectedTokenFail(expected, 3) } private final def consumeAndReturn(ctx: Context, n: Int, c: Char) = { diff --git a/src/test/scala/parsley/internal/machine/errors/DefuncErrorTests.scala b/src/test/scala/parsley/internal/machine/errors/DefuncErrorTests.scala index 9c197833a..3beb5f748 100644 --- a/src/test/scala/parsley/internal/machine/errors/DefuncErrorTests.scala +++ b/src/test/scala/parsley/internal/machine/errors/DefuncErrorTests.scala @@ -59,14 +59,14 @@ class DefuncErrorTests extends ParsleyTest { EmptyError(0, 0, 0, Some(EndOfInput)) should not be 'expectedEmpty } - "StringTokError" should "evaluate to TrivialError" in { - val err = StringTokError(0, 0, 0, None, 1) + "TokenError" should "evaluate to TrivialError" in { + val err = TokenError(0, 0, 0, None, 1) err should be a 'trivialError err.asParseError shouldBe a [TrivialError] } it should "only be empty when its label is" in { - StringTokError(0, 0, 0, None, 1) shouldBe 'expectedEmpty - StringTokError(0, 0, 0, Some(EndOfInput), 1) should not be 'expectedEmpty + TokenError(0, 0, 0, None, 1) shouldBe 'expectedEmpty + TokenError(0, 0, 0, Some(EndOfInput), 1) should not be 'expectedEmpty } "EmptyErrorWithReason" should "evaluate to TrivialError" in { @@ -80,17 +80,17 @@ class DefuncErrorTests extends ParsleyTest { } "MultiExpectedError" should "evaluate to TrivialError" in { - val err = MultiExpectedError(0, 0, 0, Set.empty) + val err = MultiExpectedError(0, 0, 0, Set.empty, 1) err should be a 'trivialError err.asParseError shouldBe a [TrivialError] } it should "only be empty when its label is" in { - MultiExpectedError(0, 0, 0, Set.empty) shouldBe 'expectedEmpty - MultiExpectedError(0, 0, 0, Set(EndOfInput)) should not be 'expectedEmpty + MultiExpectedError(0, 0, 0, Set.empty, 1) shouldBe 'expectedEmpty + MultiExpectedError(0, 0, 0, Set(EndOfInput), 1) should not be 'expectedEmpty } "MergedErrors" should "be trivial if both children are" in { - val err = MergedErrors(EmptyError(0, 0, 0, None), MultiExpectedError(0, 0, 0, Set.empty)) + val err = MergedErrors(EmptyError(0, 0, 0, None), MultiExpectedError(0, 0, 0, Set.empty, 1)) err should be a 'trivialError err.asParseError shouldBe a [TrivialError] } @@ -135,8 +135,8 @@ class DefuncErrorTests extends ParsleyTest { MergedErrors(EmptyError(0, 0, 0, Some(EndOfInput)), EmptyError(0, 0, 0, Some(EndOfInput))) should not be 'expectedEmpty } they should "contain all the expecteds from both branches when appropriate" in { - val err = MergedErrors(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b"))), - MultiExpectedError(0, 0, 0, Set(Raw("b"), Raw("c")))) + val err = MergedErrors(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b")), 1), + MultiExpectedError(0, 0, 0, Set(Raw("b"), Raw("c")), 1)) err.asParseError.asInstanceOf[TrivialError].expecteds should contain only (Raw("a"), Raw("b"), Raw("c")) } @@ -187,8 +187,8 @@ class DefuncErrorTests extends ParsleyTest { WithLabel(EmptyError(0, 0, 0, Some(Desc("x"))), "a") should not be 'expectedEmpty } it should "replace all expected" in { - val errShow = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b"))), "x") - val errHide = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b"))), "") + val errShow = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b")), 1), "x") + val errHide = WithLabel(MultiExpectedError(0, 0, 0, Set(Raw("a"), Raw("b")), 1), "") errShow.asParseError.asInstanceOf[TrivialError].expecteds should contain only (Desc("x")) errHide.asParseError.asInstanceOf[TrivialError].expecteds shouldBe empty } diff --git a/src/test/scala/parsley/internal/machine/errors/DefuncHintsTests.scala b/src/test/scala/parsley/internal/machine/errors/DefuncHintsTests.scala index a4079fbe4..623347b2d 100644 --- a/src/test/scala/parsley/internal/machine/errors/DefuncHintsTests.scala +++ b/src/test/scala/parsley/internal/machine/errors/DefuncHintsTests.scala @@ -12,7 +12,7 @@ import scala.annotation.nowarn class DefuncHintsTests extends ParsleyTest { def mkErr(labels: String*): DefuncError = { assert(labels.nonEmpty) - MultiExpectedError(0, 0, 0, labels.map(Desc(_)).toSet) + MultiExpectedError(0, 0, 0, labels.map(Desc(_)).toSet, 1) } "EmptyHints" should "have size 0" in {