From 5451ccee2c03955ddf2c025915c711156a0ea851 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 00:26:07 +0000 Subject: [PATCH 01/34] Factored out the common logic in TokenMaxOp --- .../internal/instructions/TokenInstrs.scala | 102 ++++++++---------- 1 file changed, 45 insertions(+), 57 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 254dd8e59..36022688d 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -822,22 +822,19 @@ private [internal] final class TokenUserOperator(start: TokenSet, letter: TokenS private [internal] final class TokenOperator(start: TokenSet, letter: TokenSet, reservedOps: Set[String], _expected: UnsafeOption[String]) extends TokenLexi("operator", "non-reserved operator")(start, letter, reservedOps.andThen(!_), _expected) -private [instructions] abstract class TokenSpecific(_specific: String, letter: TokenSet, caseSensitive: Boolean, _expected: UnsafeOption[String]) extends Instr +private [instructions] abstract class TokenSpecific(_specific: String, caseSensitive: Boolean, _expected: UnsafeOption[String]) extends Instr { private final val expected = if (_expected == null) _specific else _expected - private final val expectedEnd = if (_expected == null) "end of " + _specific else _expected + protected final val expectedEnd = if (_expected == null) "end of " + _specific else _expected private final val specific = (if (caseSensitive) _specific else _specific.toLowerCase).toCharArray - + private final val strsz = specific.length + protected def postprocess(ctx: Context, i: Int): Unit final override def apply(ctx: Context): Unit = { - ctx.saveState() - val strsz = this.specific.length - val inputsz = ctx.inputsz val input = ctx.input var i = ctx.offset var j = 0 - val specific = this.specific - if (inputsz >= i + strsz) + if (ctx.inputsz >= i + strsz) { while (j < strsz) { @@ -850,76 +847,67 @@ private [instructions] abstract class TokenSpecific(_specific: String, letter: T i += 1 j += 1 } + ctx.saveState() ctx.fastUncheckedConsumeChars(strsz) - if (i < inputsz && letter(input(i))) ctx.fail(expectedEnd) - else ctx.pushAndContinue(()) + postprocess(ctx, i) } else ctx.fail(expected) - if (ctx.status eq Good) ctx.states = ctx.states.tail - else ctx.restoreState() } - final override def toString: String = s"TokenSpecific(${_specific})" + override def toString: String = s"TokenSpecific(${_specific})" +} + +private [internal] abstract class TokenSpecificNoTrailLetter(keyword: String, letter: TokenSet, caseSensitive: Boolean, expected: UnsafeOption[String]) + extends TokenSpecific(keyword, caseSensitive, expected) { + final override def postprocess(ctx: Context, i: Int): Unit = { + if (i < ctx.inputsz && letter(ctx.input(i))) { + ctx.fail(expectedEnd) + ctx.restoreState() + } + else { + ctx.states = ctx.states.tail + ctx.pushAndContinue(()) + } + } } private [internal] final class TokenKeyword(keyword: String, letter: TokenSet, caseSensitive: Boolean, expected: UnsafeOption[String]) - extends TokenSpecific(keyword, letter, caseSensitive, expected) + extends TokenSpecificNoTrailLetter(keyword, letter, caseSensitive, expected) private [internal] final class TokenOperator_(operator: String, letter: TokenSet, expected: UnsafeOption[String]) - extends TokenSpecific(operator, letter, true, expected) + extends TokenSpecificNoTrailLetter(operator, letter, true, expected) // This can be combined into the above -private [internal] class TokenMaxOp(_operator: String, _ops: Set[String], _expected: UnsafeOption[String]) extends Instr +private [internal] class TokenMaxOp(operator: String, _ops: Set[String], expected: UnsafeOption[String]) + extends TokenSpecific(operator, true, expected) { - val expected: UnsafeOption[String] = if (_expected == null) _operator else _expected - val expectedEnd: UnsafeOption[String] = if (_expected == null) "end of " + _operator else _expected - val operator = _operator.toCharArray - val ops = for (op <- _ops.toList if op.length > _operator.length && op.startsWith(_operator)) yield op.substring(_operator.length) + // TODO: We want a Trie backed map here, not whatever this is + private val ops = for (op <- _ops.toList if op.length > operator.length && op.startsWith(operator)) yield op.substring(operator.length) - override def apply(ctx: Context): Unit = - { - val inputsz: Int = ctx.inputsz - val input = ctx.input - var i = ctx.offset - var j = 0 - val operator = this.operator - val strsz: Int = operator.length - if (inputsz >= i + strsz) + override def postprocess(ctx: Context, _i: Int): Unit = { + var i = _i + if (i < ctx.inputsz) { - while (j < strsz) + var ops = this.ops + while (ops.nonEmpty && i < ctx.inputsz) { - if (input(i) != operator(j)) + val c = ctx.input(i) + ops = for (op <- ops if op.charAt(0) == c) yield { - ctx.fail(expected) - return - } - i += 1 - j += 1 - } - if (i < inputsz) - { - var ops = this.ops - while (ops.nonEmpty && i < inputsz) - { - val c = input(i) - ops = for (op <- ops if op.charAt(0) == c) yield + val op_ = op.substring(1) + if (op_.isEmpty) { - val op_ = op.substring(1) - if (op_.isEmpty) - { - ctx.fail(expectedEnd) - return - } - op_ + ctx.fail(expectedEnd) + ctx.restoreState() + return } - i += 1 + op_ } + i += 1 } - ctx.fastUncheckedConsumeChars(strsz) - ctx.pushAndContinue(()) } - else ctx.fail(expected) + ctx.states = ctx.states.tail + ctx.pushAndContinue(()) } - - override def toString: String = s"TokenMaxOp(${_operator})" + override def toString: String = s"TokenMaxOp(${operator})" } \ No newline at end of file From 1038e864911910a40421169d78b3f27656655768 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 12:21:46 +0000 Subject: [PATCH 02/34] Added test coverage, hopefully? --- .github/workflows/ci.yaml | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index afe167ae2..d4fbc75d1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,11 +3,13 @@ on: [push, pull_request] env: CI: true CI_SNAPSHOT_RELEASE: +publishSigned - SCALA_VERSION: 2.12.12 + SCALA_VERSION: 2.13.4 + JAVA_VERSION: adopt@1.11 + UBUNTU: ubuntu-20.04 jobs: validate: name: Scala ${{ matrix.scala }}, Java ${{ matrix.java }} - runs-on: ubuntu-20.04 + runs-on: $UBUNTU strategy: fail-fast: false matrix: @@ -34,3 +36,28 @@ jobs: run: sbt ++$SCALA_VERSION test - name: Scaladoc run: sbt ++$SCALA_VERSION doc + coverage: + needs: [validate] + name: Test Coverage + runs-on: $UBUNTU + steps: + - uses: actions/checkout@v2.3.4 + - uses: olafurpg/setup-scala@v10 + with: + java-version: $JAVA_VERSION + - uses: actions/cache@v1 + with: + path: ~/.cache/coursier + key: sbt-coursier-cache + - uses: actions/cache@v1 + with: + path: ~/.sbt + key: sbt-${{ hashFiles('**/build.sbt') }} + - run: sbt clean coverage test + - uses: paambaati/codeclimate-action@v2.7.5 + env: + CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} + with: + coverageCommand: sbt coverageReport + coverageLocations: ${{github.workspace}}/target/scala-2.13/coverage-report/cobertura.xml:cobertura + debug: true \ No newline at end of file From 732bd9b39f46865c4b8910cbdfdcb3e33e33036e Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 12:23:39 +0000 Subject: [PATCH 03/34] Removed ubuntu env --- .github/workflows/ci.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d4fbc75d1..0ff8de8ae 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -5,11 +5,10 @@ env: CI_SNAPSHOT_RELEASE: +publishSigned SCALA_VERSION: 2.13.4 JAVA_VERSION: adopt@1.11 - UBUNTU: ubuntu-20.04 jobs: validate: name: Scala ${{ matrix.scala }}, Java ${{ matrix.java }} - runs-on: $UBUNTU + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: @@ -39,7 +38,7 @@ jobs: coverage: needs: [validate] name: Test Coverage - runs-on: $UBUNTU + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2.3.4 - uses: olafurpg/setup-scala@v10 From 63fda90f8bfa8acda580b75ca681381cde614709 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 12:27:46 +0000 Subject: [PATCH 04/34] Hopefully fixed it properly this time, surely there is a way of making github action variables... --- .github/workflows/ci.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0ff8de8ae..4c864864a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -4,7 +4,6 @@ env: CI: true CI_SNAPSHOT_RELEASE: +publishSigned SCALA_VERSION: 2.13.4 - JAVA_VERSION: adopt@1.11 jobs: validate: name: Scala ${{ matrix.scala }}, Java ${{ matrix.java }} @@ -43,7 +42,7 @@ jobs: - uses: actions/checkout@v2.3.4 - uses: olafurpg/setup-scala@v10 with: - java-version: $JAVA_VERSION + java-version: adopt@1.11 - uses: actions/cache@v1 with: path: ~/.cache/coursier From 3de9d6e72056b7c4c7e5fa008977cda902bc5535 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 12:49:50 +0000 Subject: [PATCH 05/34] Added new badges to readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b423abb24..26a75eae1 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,8 @@ its competitive performance, but for best effect a parser should be compiled onc To make recursive parsers work in this AST format, you must ensure that recursion is done by knot-tying: you should define all recursive parsers with `val` and introduce `lazy val` where necessary for the compiler to accept the definition. -## Bug Reports [![Percentage of issues still open](https://isitmaintained.com/badge/open/j-mie6/parsley.svg)](https://isitmaintained.com/project/j-mie6/parsley "Percentage of issues still open") [![Maintainability](https://api.codeclimate.com/v1/badges/337556ceb02f4d6dc599/maintainability)](https://codeclimate.com/github/j-mie6/parsley/maintainability) +## Bug Reports [![Percentage of issues still open](https://isitmaintained.com/badge/open/j-mie6/Parsley.svg)](https://isitmaintained.com/project/j-mie6/Parsley "Percentage of issues still open") [![Maintainability](https://img.shields.io/codeclimate/maintainability/j-mie6/Parsley)](https://codeclimate.com/github/j-mie6/Parsley) [![Test Coverage](https://img.shields.io/codeclimate/coverage-letter/j-mie6/Parsley)](https://codeclimate.com/github/j-mie6/Parsley) + If you encounter a bug when using Parsley, try and minimise the example of the parser (and the input) that triggers the bug. If possible, make a self contained example: this will help me to identify the issue without too much issue. From 379049a353530a409a417a1dca835b35313e1251 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 14:35:16 +0000 Subject: [PATCH 06/34] Updated the coverage to exclude non-tested debug code --- src/main/scala/parsley/Implicits.scala | 2 ++ .../parsley/internal/deepembedding/Cont.scala | 7 ++-- .../deepembedding/GeneralisedEmbedding.scala | 10 ++++++ .../internal/deepembedding/Parsley.scala | 4 +++ .../deepembedding/PrimitiveEmbedding.scala | 2 ++ .../deepembedding/TokenEmbedding.scala | 4 ++- .../internal/instructions/Context.scala | 2 ++ .../internal/instructions/CoreInstrs.scala | 34 +++++++++++++++++++ .../instructions/IntrinsicInstrs.scala | 30 ++++++++++++++++ .../instructions/IterativeInstrs.scala | 16 +++++++++ .../internal/instructions/OptInstrs.scala | 14 ++++++++ .../instructions/PrimitiveInstrs.scala | 18 ++++++++++ .../internal/instructions/TokenInstrs.scala | 24 +++++++++++++ .../internal/instructions/package.scala | 8 +++++ src/main/scala/parsley/package.scala | 2 ++ 15 files changed, 171 insertions(+), 6 deletions(-) diff --git a/src/main/scala/parsley/Implicits.scala b/src/main/scala/parsley/Implicits.scala index 48c973332..0a2d1df26 100644 --- a/src/main/scala/parsley/Implicits.scala +++ b/src/main/scala/parsley/Implicits.scala @@ -10,7 +10,9 @@ import scala.language.implicitConversions */ object Implicits { + // $COVERAGE-OFF$ @inline implicit def voidImplicitly[P](p: P)(implicit con: P => Parsley[_]): Parsley[Unit] = void(p) @inline implicit def stringLift(str: String): Parsley[String] = string(str) @inline implicit def charLift(c: Char): Parsley[Char] = char(c) + // $COVERAGE-ON$ } diff --git a/src/main/scala/parsley/internal/deepembedding/Cont.scala b/src/main/scala/parsley/internal/deepembedding/Cont.scala index d3e89f780..a18299416 100644 --- a/src/main/scala/parsley/internal/deepembedding/Cont.scala +++ b/src/main/scala/parsley/internal/deepembedding/Cont.scala @@ -21,8 +21,10 @@ private [deepembedding] abstract class ContOps[Cont[_, +_]] def unwrap[R](wrapped: Cont[R, R]): R def map[R, A, B](c: =>Cont[R, A], f: A => B): Cont[R, B] def flatMap[R, A, B](c: =>Cont[R, A], f: A => Cont[R, B]): Cont[R, B] + // $COVERAGE-OFF$ def >>[R, A, B](c: =>Cont[R, A], k: =>Cont[R, B]): Cont[R, B] = flatMap[R, A, B](c, _ => k) def |>[R, A, B](c: =>Cont[R, A], x: =>B): Cont[R, B] = map[R, A, B](c, _ => x) + // $COVERAGE-ON$ } private [deepembedding] object ContOps { @@ -65,11 +67,6 @@ private [deepembedding] object Cont new Cont(k => new Thunk(() => mx.cont(_ => k(y)))) } } - - def callCC[R, A, B](f: (A => Cont[R, B]) => Cont[R, A]): Cont[R, A] = - { - new Cont[R, A](k => f(x => new Cont[R, B](_ => k(x))).cont(k)) - } } private [deepembedding] class Id[R, +A](val x: A) extends AnyVal diff --git a/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala index 2e18aa105..de3f68cf5 100644 --- a/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala @@ -13,7 +13,9 @@ private [parsley] abstract class Singleton[A](pretty: String, instr: instruction final override def codeGen[Cont[_, +_]: ContOps](implicit instrs: InstrBuffer, state: CodeGenState): Cont[Unit, Unit] = { result(instrs += instr) } + // $COVERAGE-OFF$ final override def prettyASTAux[Cont[_, +_]: ContOps]: Cont[String, String] = result(pretty) + // $COVERAGE-ON$ } private [deepembedding] abstract class SingletonExpect[A](pretty: String, builder: UnsafeOption[String] => SingletonExpect[A], instr: instructions.Instr) @@ -27,7 +29,9 @@ private [deepembedding] abstract class SingletonExpect[A](pretty: String, builde final override def codeGen[Cont[_, +_]: ContOps](implicit instrs: InstrBuffer, state: CodeGenState): Cont[Unit, Unit] = { result(instrs += instr) } + // $COVERAGE-OFF$ final override def prettyASTAux[Cont[_, +_]: ContOps]: Cont[String, String] = result(pretty) + // $COVERAGE-ON$ } private [deepembedding] abstract class Unary[A, B](_p: =>Parsley[A])(pretty: String => String, empty: String => Unary[A, B]) extends Parsley[B] { @@ -51,7 +55,9 @@ private [deepembedding] abstract class Unary[A, B](_p: =>Parsley[A])(pretty: Str size = p.size + numInstrs this } + // $COVERAGE-OFF$ override def prettyASTAux[Cont[_, +_]: ContOps]: Cont[String,String] = for (c <- p.prettyASTAux) yield pretty(c) + // $COVERAGE-ON$ } private [deepembedding] abstract class Binary[A, B, C](_left: =>Parsley[A], _right: =>Parsley[B])(pretty: (String, String) => String, empty: =>Binary[A, B, C]) @@ -80,9 +86,11 @@ private [deepembedding] abstract class Binary[A, B, C](_left: =>Parsley[A], _rig size = leftRepeats * left.size + rightRepeats * right.size + numInstrs this } + // $COVERAGE-OFF$ override def prettyASTAux[Cont[_, +_]: ContOps]: Cont[String,String] = { for (l <- left.prettyASTAux; r <- right.prettyASTAux) yield pretty(l, r) } + // $COVERAGE-ON$ } private [deepembedding] abstract class Ternary[A, B, C, D](_first: =>Parsley[A], _second: =>Parsley[B], _third: =>Parsley[C]) @@ -113,6 +121,8 @@ private [deepembedding] abstract class Ternary[A, B, C, D](_first: =>Parsley[A], third = _third first.findLets >> second.findLets >> third.findLets } + // $COVERAGE-OFF$ override def prettyASTAux[Cont[_, +_]: ContOps]: Cont[String, String] = for (f <- first.prettyASTAux; s <- second.prettyASTAux; t <- third.prettyASTAux) yield pretty(f, s, t) + // $COVERAGE-ON$ } \ No newline at end of file diff --git a/src/main/scala/parsley/internal/deepembedding/Parsley.scala b/src/main/scala/parsley/internal/deepembedding/Parsley.scala index 6f73267d6..d93d28e23 100644 --- a/src/main/scala/parsley/internal/deepembedding/Parsley.scala +++ b/src/main/scala/parsley/internal/deepembedding/Parsley.scala @@ -25,7 +25,9 @@ private [parsley] abstract class Parsley[+A] private [deepembedding] final protected type U = Any final protected type V = Any + // $COVERAGE-OFF$ final private [parsley] def prettyAST: String = {force(); safeCall((g: GenOps) => perform(prettyASTAux(g))(g))} + // $COVERAGE-ON$ final def unsafe(): Unit = safe = false final def force(): Unit = instrs @@ -182,5 +184,7 @@ private [parsley] class LetFinderState { private [parsley] class SubMap(val subMap: Map[Parsley[_], Parsley[_]]) extends AnyVal { def apply[A](p: Parsley[A]): Parsley[A] = subMap.getOrElse(p, p).asInstanceOf[Parsley[A]] + // $COVERAGE-OFF$ override def toString: String = subMap.toString + // $COVERAGE-ON$ } \ No newline at end of file diff --git a/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala index 7b7632ca1..79a2381d7 100644 --- a/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala @@ -74,11 +74,13 @@ private [parsley] final class ErrorRelabel[+A](_p: =>Parsley[A], msg: String) ex else p.optimised } override def findLetsAux[Cont[_, +_]: ContOps](implicit seen: Set[Parsley[_]], state: LetFinderState): Cont[Unit, Unit] = p.findLets + // $COVERAGE-OFF$ override def optimise: Parsley[A] = throw new Exception("Error relabelling should not be in optimisation!") override def codeGen[Cont[_, +_]: ContOps](implicit instrs: InstrBuffer, state: CodeGenState): Cont[Unit, Unit] = { throw new Exception("Error relabelling should not be in code gen!") } override def prettyASTAux[Cont[_, +_]: ContOps]: Cont[String, String] = for (c <- p.prettyASTAux) yield s"($c ? $msg)" + // $COVERAGE-ON$ } private [parsley] final class Debug[A](_p: =>Parsley[A], name: String, break: Breakpoint) extends Unary[A, A](_p)(identity[String], _ => Debug.empty(name, break)) { diff --git a/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala index 468d269a9..b3fb435dd 100644 --- a/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala @@ -63,6 +63,7 @@ private [parsley] object Sign { } } +// $COVERAGE-OFF$ private [deepembedding] object Keyword { def unapply(self: Keyword): Option[String] = Some(self.keyword) } @@ -71,4 +72,5 @@ private [deepembedding] object Operator { } private [deepembedding] object MaxOp { def unapply(self: MaxOp): Option[String] = Some(self.operator) -} \ No newline at end of file +} +// $COVERAGE-ON$ \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/Context.scala b/src/main/scala/parsley/internal/instructions/Context.scala index 77c00fb26..a433cd916 100644 --- a/src/main/scala/parsley/internal/instructions/Context.scala +++ b/src/main/scala/parsley/internal/instructions/Context.scala @@ -47,6 +47,7 @@ final class Context private [parsley] (private [instructions] var instrs: Array[ private [instructions] var startcol: Int = 1 var sourceName: String = "input" + // $COVERAGE-OFF$ //override def toString: String = pretty private [instructions] def pretty: String = { s"""[ @@ -64,6 +65,7 @@ final class Context private [parsley] (private [instructions] var instrs: Array[ | registers = ${regs.zipWithIndex.map{case (r, i) => s"r$i = $r"}.mkString("\n ")} |]""".stripMargin } + // $COVERAGE-ON$ def pos: (Int, Int) = (startline, startcol) def pos_=(pos: (Int, Int)): Unit = { diff --git a/src/main/scala/parsley/internal/instructions/CoreInstrs.scala b/src/main/scala/parsley/internal/instructions/CoreInstrs.scala index ded0779ef..87b9d7c1a 100644 --- a/src/main/scala/parsley/internal/instructions/CoreInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/CoreInstrs.scala @@ -10,7 +10,9 @@ import scala.annotation.tailrec // Stack Manipulators private [internal] final class Push[A](x: A) extends Instr { override def apply(ctx: Context): Unit = ctx.pushAndContinue(x) + // $COVERAGE-OFF$ override def toString: String = s"Push($x)" + // $COVERAGE-ON$ } private [internal] object Pop extends Instr { @@ -18,7 +20,9 @@ private [internal] object Pop extends Instr { ctx.stack.pop_() ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = "Pop" + // $COVERAGE-ON$ } private [internal] object Flip extends Instr { @@ -27,7 +31,9 @@ private [internal] object Flip extends Instr { ctx.stack(1) = ctx.stack.upeek ctx.exchangeAndContinue(x) } + // $COVERAGE-OFF$ override def toString: String = "Flip" + // $COVERAGE-ON$ } // Applicative Functors @@ -37,14 +43,18 @@ private [internal] object Apply extends Instr { val f = ctx.stack.peek[Any => Any] ctx.exchangeAndContinue(f(x)) } + // $COVERAGE-OFF$ override def toString: String = "Apply" + // $COVERAGE-ON$ } // Monadic private [internal] final class DynCall[-A](f: A => Array[Instr], expected: UnsafeOption[String]) extends Instr { private [DynCall] val g = f.asInstanceOf[Any => Array[Instr]] override def apply(ctx: Context): Unit = ctx.call(g(ctx.stack.upop()), 0, expected) + // $COVERAGE-OFF$ override def toString: String = "DynCall(?)" + // $COVERAGE-ON$ } // Control Flow @@ -55,17 +65,23 @@ private [internal] final class Call(_instrs: =>Array[Instr], expected: UnsafeOpt } override def apply(ctx: Context): Unit = ctx.call(stateSafeCopy(instrs, pindices), 0, expected) + // $COVERAGE-OFF$ override def toString: String = "Call" + // $COVERAGE-ON$ } private [internal] final class GoSub(var label: Int, expected: UnsafeOption[String]) extends JumpInstr { override def apply(ctx: Context): Unit = ctx.call(ctx.instrs, label, expected) + // $COVERAGE-OFF$ override def toString: String = s"GoSub($label)" + // $COVERAGE-ON$ } private [internal] object Return extends Instr { override def apply(ctx: Context): Unit = ctx.ret() + // $COVERAGE-OFF$ override def toString: String = "Return" + // $COVERAGE-ON$ } private [internal] final class Empty(expected: UnsafeOption[String]) extends Instr { @@ -74,7 +90,9 @@ private [internal] final class Empty(expected: UnsafeOption[String]) extends Ins ctx.fail(expected) if (strip) ctx.unexpected = null } + // $COVERAGE-OFF$ override def toString: String = "Empty" + // $COVERAGE-ON$ } private [internal] final class PushHandler(var label: Int) extends JumpInstr { @@ -83,7 +101,9 @@ private [internal] final class PushHandler(var label: Int) extends JumpInstr { ctx.saveState() ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"PushHandler($label)" + // $COVERAGE-ON$ } private [internal] final class PushFallthrough(var label: Int) extends JumpInstr { @@ -91,7 +111,9 @@ private [internal] final class PushFallthrough(var label: Int) extends JumpInstr ctx.pushHandler(label) ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"PushFallthrough($label)" + // $COVERAGE-ON$ } private [internal] final class InputCheck(var label: Int) extends JumpInstr { @@ -100,12 +122,16 @@ private [internal] final class InputCheck(var label: Int) extends JumpInstr { ctx.pushHandler(label) ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"InputCheck($label)" + // $COVERAGE-ON$ } private [internal] final class Jump(var label: Int) extends JumpInstr { override def apply(ctx: Context): Unit = ctx.pc = label + // $COVERAGE-OFF$ override def toString: String = s"Jump($label)" + // $COVERAGE-ON$ } private [internal] final class JumpGood(var label: Int) extends JumpInstr { @@ -114,14 +140,18 @@ private [internal] final class JumpGood(var label: Int) extends JumpInstr { ctx.checkStack = ctx.checkStack.tail ctx.pc = label } + // $COVERAGE-OFF$ override def toString: String = s"JumpGood($label)" + // $COVERAGE-ON$ } private [internal] object Catch extends Instr { override def apply(ctx: Context): Unit = ctx.catchNoConsumed { ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"Catch" + // $COVERAGE-ON$ } // Debugging Instructions @@ -155,7 +185,9 @@ private [internal] final class LogBegin(var label: Int, val name: String, break: ctx.pushHandler(label) ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"LogBegin($label, $name)" + // $COVERAGE-ON$ } private [internal] final class LogEnd(val name: String, break: Boolean) extends Instr with Logger { @@ -173,5 +205,7 @@ private [internal] final class LogEnd(val name: String, break: Boolean) extends println(preludeString('<', ctx, end)) if (break) doBreak(ctx) } + // $COVERAGE-OFF$ override def toString: String = s"LogEnd($name)" + // $COVERAGE-ON$ } \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/IntrinsicInstrs.scala b/src/main/scala/parsley/internal/instructions/IntrinsicInstrs.scala index b536a7e27..dd84d8d95 100644 --- a/src/main/scala/parsley/internal/instructions/IntrinsicInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/IntrinsicInstrs.scala @@ -11,7 +11,9 @@ private [internal] final class Lift2[A, B, C](f: (A, B) => C) extends Instr { val y = ctx.stack.upop() ctx.exchangeAndContinue(g(ctx.stack.peek, y)) } + // $COVERAGE-OFF$ override def toString: String = "Lift2(f)" + // $COVERAGE-ON$ } private [internal] final class Lift3[A, B, C, D](f: (A, B, C) => D) extends Instr { @@ -21,7 +23,9 @@ private [internal] final class Lift3[A, B, C, D](f: (A, B, C) => D) extends Inst val y = ctx.stack.upop() ctx.exchangeAndContinue(g(ctx.stack.peek, y, z)) } + // $COVERAGE-OFF$ override def toString: String = "Lift3(f)" + // $COVERAGE-ON$ } private [internal] class CharTok(c: Char, x: Any, _expected: UnsafeOption[String]) extends Instr { @@ -33,7 +37,9 @@ private [internal] class CharTok(c: Char, x: Any, _expected: UnsafeOption[String } else ctx.fail(expected) } + // $COVERAGE-OFF$ override def toString: String = if (x == c) s"Chr($c)" else s"ChrPerform($c, $x)" + // $COVERAGE-ON$ } private [internal] final class StringTok private [instructions] (s: String, x: Any, _expected: UnsafeOption[String]) extends Instr { @@ -74,7 +80,9 @@ private [internal] final class StringTok private [instructions] (s: String, x: A } override def apply(ctx: Context): Unit = go(ctx, ctx.offset, 0) + // $COVERAGE-OFF$ override def toString: String = if (x.isInstanceOf[String] && (s eq x.asInstanceOf[String])) s"Str($s)" else s"StrPerform($s, $x)" + // $COVERAGE-ON$ } private [internal] final class If(var label: Int) extends JumpInstr { @@ -82,7 +90,9 @@ private [internal] final class If(var label: Int) extends JumpInstr { if (ctx.stack.pop()) ctx.pc = label else ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"If(true: $label)" + // $COVERAGE-ON$ } private [internal] final class Filter[A](pred: A=>Boolean, expected: UnsafeOption[String]) extends Instr { @@ -95,7 +105,9 @@ private [internal] final class Filter[A](pred: A=>Boolean, expected: UnsafeOptio if (strip) ctx.unexpected = null } } + // $COVERAGE-OFF$ override def toString: String = "Filter(?)" + // $COVERAGE-ON$ } private [internal] final class Guard[A](pred: A=>Boolean, msg: String, expected: UnsafeOption[String]) extends Instr { @@ -104,7 +116,9 @@ private [internal] final class Guard[A](pred: A=>Boolean, msg: String, expected: if (pred_(ctx.stack.upeek)) ctx.inc() else ctx.failWithMessage(expected, msg) } + // $COVERAGE-OFF$ override def toString: String = s"Guard(?, $msg)" + // $COVERAGE-ON$ } private [internal] final class FastGuard[A](pred: A=>Boolean, msggen: A=>String, expected: UnsafeOption[String]) extends Instr { @@ -114,19 +128,25 @@ private [internal] final class FastGuard[A](pred: A=>Boolean, msggen: A=>String, if (pred_(ctx.stack.upeek)) ctx.inc() else ctx.failWithMessage(expected, msggen_(ctx.stack.upop())) } + // $COVERAGE-OFF$ override def toString: String = "FastGuard(?, ?)" + // $COVERAGE-ON$ } private [internal] final class FastFail[A](msggen: A=>String, expected: UnsafeOption[String]) extends Instr { private [this] val msggen_ = msggen.asInstanceOf[Any => String] override def apply(ctx: Context): Unit = ctx.failWithMessage(expected, msggen_(ctx.stack.upop())) + // $COVERAGE-OFF$ override def toString: String = "FastFail(?)" + // $COVERAGE-ON$ } private [internal] final class FastUnexpected[A](msggen: A=>String, expected: UnsafeOption[String]) extends Instr { private [this] val msggen_ = msggen.asInstanceOf[Any => String] override def apply(ctx: Context): Unit = ctx.unexpectedFail(expected = expected, unexpected = msggen_(ctx.stack.upop())) + // $COVERAGE-OFF$ override def toString: String = "FastUnexpected(?)" + // $COVERAGE-ON$ } private [internal] final class NotFollowedBy(expected: UnsafeOption[String]) extends Instr { @@ -144,13 +164,17 @@ private [internal] final class NotFollowedBy(expected: UnsafeOption[String]) ext ctx.pushAndContinue(()) } } + // $COVERAGE-OFF$ override def toString: String = "NotFollowedBy" + // $COVERAGE-ON$ } private [internal] class Eof(_expected: UnsafeOption[String]) extends Instr { val expected: String = if (_expected == null) "end of input" else _expected override def apply(ctx: Context): Unit = if (ctx.offset == ctx.inputsz) ctx.pushAndContinue(()) else ctx.fail(expected) + // $COVERAGE-OFF$ override final def toString: String = "Eof" + // $COVERAGE-ON$ } private [internal] final class Modify[S](v: Int, f: S => S) extends Instr { @@ -159,7 +183,9 @@ private [internal] final class Modify[S](v: Int, f: S => S) extends Instr { ctx.copyOnWrite(v, g(ctx.regs(v))) ctx.pushAndContinue(()) } + // $COVERAGE-OFF$ override def toString: String = s"Modify($v, f)" + // $COVERAGE-ON$ } private [internal] final class LocalEntry(v: Int) extends Instr { @@ -169,7 +195,9 @@ private [internal] final class LocalEntry(v: Int) extends Instr { ctx.copyOnWrite(v, ctx.stack.upop()) ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"LocalEntry($v)" + // $COVERAGE-ON$ } private [internal] final class LocalExit[S](v: Int) extends Instr { @@ -181,7 +209,9 @@ private [internal] final class LocalExit[S](v: Int) extends Instr { else ctx.fail() ctx.states = ctx.states.tail } + // $COVERAGE-OFF$ override def toString: String = s"LocalExit($v)" + // $COVERAGE-ON$ } // Companion Objects diff --git a/src/main/scala/parsley/internal/instructions/IterativeInstrs.scala b/src/main/scala/parsley/internal/instructions/IterativeInstrs.scala index 4a3dee5e4..511fadacf 100644 --- a/src/main/scala/parsley/internal/instructions/IterativeInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/IterativeInstrs.scala @@ -22,7 +22,9 @@ private [internal] final class Many(var label: Int) extends JumpInstr with State acc.clear() } } + // $COVERAGE-OFF$ override def toString: String = s"Many($label)" + // $COVERAGE-ON$ override def copy: Many = new Many(label) } private [internal] final class SkipMany(var label: Int) extends JumpInstr { @@ -37,7 +39,9 @@ private [internal] final class SkipMany(var label: Int) extends JumpInstr { ctx.pushAndContinue(()) } } + // $COVERAGE-OFF$ override def toString: String = s"SkipMany($label)" + // $COVERAGE-ON$ } private [internal] final class ChainPost(var label: Int) extends JumpInstr with Stateful { @@ -67,7 +71,9 @@ private [internal] final class ChainPost(var label: Int) extends JumpInstr with acc = null } } + // $COVERAGE-OFF$ override def toString: String = s"ChainPost($label)" + // $COVERAGE-ON$ override def copy: ChainPost = new ChainPost(label) } @@ -90,7 +96,9 @@ private [internal] final class ChainPre(var label: Int) extends JumpInstr with S acc = null } } + // $COVERAGE-OFF$ override def toString: String = s"ChainPre($label)" + // $COVERAGE-ON$ override def copy: ChainPre = new ChainPre(label) } private [internal] final class Chainl[A, B](var label: Int, _wrap: A => B) extends JumpInstr with Stateful { @@ -121,7 +129,9 @@ private [internal] final class Chainl[A, B](var label: Int, _wrap: A => B) exten acc = null } } + // $COVERAGE-OFF$ override def toString: String = s"Chainl($label)" + // $COVERAGE-ON$ override def copy: Chainl[A, B] = new Chainl(label, wrap) } @@ -169,7 +179,9 @@ private [internal] final class Chainr[A, B](var label: Int, _wrap: A => B) exten acc = null } } + // $COVERAGE-OFF$ override def toString: String = s"Chainr($label)" + // $COVERAGE-ON$ override def copy: Chainr[A, B] = new Chainr(label, wrap) } @@ -196,7 +208,9 @@ private [internal] final class SepEndBy1(var label: Int) extends JumpInstr with acc.clear() } } + // $COVERAGE-OFF$ override def toString: String = s"SepEndBy1($label)" + // $COVERAGE-ON$ override def copy: SepEndBy1 = new SepEndBy1(label) } @@ -218,6 +232,8 @@ private [internal] final class ManyUntil(var label: Int) extends JumpInstr with // ManyUntil is a fallthrough handler, it must be visited during failure, but does nothing to the external state else { acc.clear(); ctx.fail() } } + // $COVERAGE-OFF$ override def toString: String = s"ManyUntil($label)" + // $COVERAGE-ON$ override def copy: ManyUntil = new ManyUntil(label) } \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/OptInstrs.scala b/src/main/scala/parsley/internal/instructions/OptInstrs.scala index d9fd022e7..31df9d41c 100644 --- a/src/main/scala/parsley/internal/instructions/OptInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/OptInstrs.scala @@ -11,12 +11,16 @@ import scala.collection.mutable private [internal] final class Perform[-A, +B](f: A => B) extends Instr { private [Perform] val g = f.asInstanceOf[Any => B] override def apply(ctx: Context): Unit = ctx.exchangeAndContinue(g(ctx.stack.upeek)) + // $COVERAGE-OFF$ override def toString: String = "Perform(?)" + // $COVERAGE-ON$ } private [internal] final class Exchange[A](private [Exchange] val x: A) extends Instr { override def apply(ctx: Context): Unit = ctx.exchangeAndContinue(x) + // $COVERAGE-OFF$ override def toString: String = s"Ex($x)" + // $COVERAGE-ON$ } private [internal] final class SatisfyExchange[A](f: Char => Boolean, x: A, expected: UnsafeOption[String]) extends Instr { @@ -27,7 +31,9 @@ private [internal] final class SatisfyExchange[A](f: Char => Boolean, x: A, expe } else ctx.fail(expected) } + // $COVERAGE-OFF$ override def toString: String = s"SatEx(?, $x)" + // $COVERAGE-ON$ } private [internal] final class JumpGoodAttempt(var label: Int) extends JumpInstr { @@ -43,14 +49,18 @@ private [internal] final class JumpGoodAttempt(var label: Int) extends JumpInstr ctx.inc() } } + // $COVERAGE-OFF$ override def toString: String = s"JumpGood'($label)" + // $COVERAGE-ON$ } private [internal] final class RecoverWith[A](x: A) extends Instr { override def apply(ctx: Context): Unit = ctx.catchNoConsumed { ctx.pushAndContinue(x) } + // $COVERAGE-OFF$ override def toString: String = s"Recover($x)" + // $COVERAGE-ON$ } private [internal] final class AlwaysRecoverWith[A](x: A) extends Instr { @@ -66,7 +76,9 @@ private [internal] final class AlwaysRecoverWith[A](x: A) extends Instr { ctx.pushAndContinue(x) } } + // $COVERAGE-OFF$ override def toString: String = s"AlwaysRecover($x)" + // $COVERAGE-ON$ } private [internal] final class JumpTable(prefixes: List[Char], labels: List[Int], private [this] var default: Int, _expecteds: List[UnsafeOption[String]]) @@ -112,7 +124,9 @@ private [internal] final class JumpTable(prefixes: List[Char], labels: List[Int] default = labels(default) defaultPreamble = default - 1 } + // $COVERAGE-OFF$ override def toString: String = s"JumpTable(${jumpTable.map{case (k, v) => k.toChar -> v}.mkString(", ")}, _ -> $default)" + // $COVERAGE-ON$ } private [internal] object CharTokFastPerform { diff --git a/src/main/scala/parsley/internal/instructions/PrimitiveInstrs.scala b/src/main/scala/parsley/internal/instructions/PrimitiveInstrs.scala index d84c94bde..101648fb0 100644 --- a/src/main/scala/parsley/internal/instructions/PrimitiveInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/PrimitiveInstrs.scala @@ -9,17 +9,23 @@ private [internal] final class Satisfies(f: Char => Boolean, expected: UnsafeOpt if (ctx.moreInput && f(ctx.nextChar)) ctx.pushAndContinue(ctx.consumeChar()) else ctx.fail(expected) } + // $COVERAGE-OFF$ override def toString: String = "Sat(?)" + // $COVERAGE-ON$ } private [internal] final class Fail(msg: String, expected: UnsafeOption[String]) extends Instr { override def apply(ctx: Context): Unit = ctx.failWithMessage(expected, msg) + // $COVERAGE-OFF$ override def toString: String = s"Fail($msg)" + // $COVERAGE-ON$ } private [internal] final class Unexpected(msg: String, expected: UnsafeOption[String]) extends Instr { override def apply(ctx: Context): Unit = ctx.unexpectedFail(expected = expected, unexpected = msg) + // $COVERAGE-OFF$ override def toString: String = s"Unexpected($msg)" + // $COVERAGE-ON$ } private [internal] object Attempt extends Instr { @@ -36,7 +42,9 @@ private [internal] object Attempt extends Instr { ctx.fail() } } + // $COVERAGE-OFF$ override def toString: String = "Attempt" + // $COVERAGE-ON$ } private [internal] object Look extends Instr { @@ -51,24 +59,32 @@ private [internal] object Look extends Instr { ctx.fail() } } + // $COVERAGE-OFF$ override def toString: String = "Look" + // $COVERAGE-ON$ } // Position Extractors private [internal] object Line extends Instr { override def apply(ctx: Context): Unit = ctx.pushAndContinue(ctx.line) + // $COVERAGE-OFF$ override def toString: String = "Line" + // $COVERAGE-ON$ } private [internal] object Col extends Instr { override def apply(ctx: Context): Unit = ctx.pushAndContinue(ctx.col) + // $COVERAGE-OFF$ override def toString: String = "Col" + // $COVERAGE-ON$ } // Register-Manipulators private [internal] final class Get(v: Int) extends Instr { override def apply(ctx: Context): Unit = ctx.pushAndContinue(ctx.regs(v)) + // $COVERAGE-OFF$ override def toString: String = s"Get($v)" + // $COVERAGE-ON$ } private [internal] final class Put(v: Int) extends Instr { @@ -76,5 +92,7 @@ private [internal] final class Put(v: Int) extends Instr { ctx.copyOnWrite(v, ctx.stack.peekAndExchange(())) ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = s"Put($v)" + // $COVERAGE-ON$ } \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 36022688d..3519b8731 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -81,7 +81,9 @@ private [internal] class TokenComment(start: String, end: String, line: String, } true } + // $COVERAGE-OFF$ override def toString: String = "TokenComment" + // $COVERAGE-ON$ } // TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later @@ -114,7 +116,9 @@ private [internal] final class TokenSkipComments(start: String, end: String, lin } ctx.pushAndContinue(()) } + // $COVERAGE-OFF$ override def toString: String = "TokenSkipComments" + // $COVERAGE-ON$ } private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: String, line: String, nested: Boolean) @@ -163,7 +167,9 @@ private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: } private def spaces(ctx: Context): Unit = while (ctx.moreInput && ws(ctx.nextChar)) ctx.consumeChar() + // $COVERAGE-OFF$ override def toString: String = "TokenWhiteSpace" + // $COVERAGE-ON$ } private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[String]) extends Instr @@ -193,7 +199,9 @@ private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[S ctx.inc() } + // $COVERAGE-OFF$ override def toString: String = "TokenSign" + // $COVERAGE-ON$ } private [instructions] sealed trait NumericReader { @@ -288,7 +296,9 @@ private [internal] final class TokenNatural(_expected: UnsafeOption[String]) ext else ctx.fail(expected) } + // $COVERAGE-OFF$ override def toString: String = "TokenNatural" + // $COVERAGE-ON$ } private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr @@ -366,7 +376,9 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten else true } + // $COVERAGE-OFF$ override def toString: String = "TokenFloat" + // $COVERAGE-ON$ } private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with Stateful with NumericReader @@ -644,7 +656,9 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In else false } + // $COVERAGE-OFF$ override def toString: String = "TokenEscape" + // $COVERAGE-ON$ override def copy: TokenEscape = new TokenEscape(expected) } @@ -718,7 +732,9 @@ private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption n } + // $COVERAGE-OFF$ override def toString: String = "TokenString" + // $COVERAGE-ON$ override def copy: TokenString = new TokenString(ws, _expected) } @@ -766,7 +782,9 @@ private [internal] final class TokenRawString(_expected: UnsafeOption[String]) e else ctx.fail(expectedEos) } + // $COVERAGE-OFF$ override def toString: String = "TokenRawString" + // $COVERAGE-ON$ } private [instructions] abstract class TokenLexi(name: String, illegalName: String) @@ -810,7 +828,9 @@ private [instructions] abstract class TokenLexi(name: String, illegalName: Strin } } + // $COVERAGE-OFF$ final override def toString: String = s"TokenLexi($name)" + // $COVERAGE-ON$ } private [internal] final class TokenIdentifier(start: TokenSet, letter: TokenSet, keywords: Set[String], _expected: UnsafeOption[String]) @@ -854,7 +874,9 @@ private [instructions] abstract class TokenSpecific(_specific: String, caseSensi else ctx.fail(expected) } + // $COVERAGE-OFF$ override def toString: String = s"TokenSpecific(${_specific})" + // $COVERAGE-ON$ } private [internal] abstract class TokenSpecificNoTrailLetter(keyword: String, letter: TokenSet, caseSensitive: Boolean, expected: UnsafeOption[String]) @@ -909,5 +931,7 @@ private [internal] class TokenMaxOp(operator: String, _ops: Set[String], expecte ctx.states = ctx.states.tail ctx.pushAndContinue(()) } + // $COVERAGE-OFF$ override def toString: String = s"TokenMaxOp(${operator})" + // $COVERAGE-ON$ } \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/package.scala b/src/main/scala/parsley/internal/instructions/package.scala index f40f2b8a2..be07b9cff 100644 --- a/src/main/scala/parsley/internal/instructions/package.scala +++ b/src/main/scala/parsley/internal/instructions/package.scala @@ -24,9 +24,12 @@ package object instructions // It's 2018 and Labels are making a come-back, along with 2 pass assembly private [internal] final class Label(val i: Int) extends Instr { + // $COVERAGE-OFF$ def apply(ctx: Context): Unit = throw new Exception("Cannot execute label") + // $COVERAGE-ON$ } + // $COVERAGE-OFF$ final private [parsley] def pretty(instrs: Array[Instr]): String = { val n = instrs.length val digits = if (n != 0) Math.log10(n).toInt + 1 else 0 @@ -43,6 +46,7 @@ package object instructions s"$paddedIdx [$paddedHex]: $instr" }.mkString(";\n") } + // $COVERAGE-ON$ final private [internal] def stateSafeCopy(instrs: Array[Instr], pindices: Array[Int]): Array[Instr] = { val nstateful = pindices.length @@ -73,8 +77,10 @@ package object instructions def empty[A]: Stack[A] = null @inline def isEmpty(s: Stack[_]): Boolean = s == null @tailrec def drop[A](s: Stack[A], n: Int): Stack[A] = if (n > 0 && !isEmpty(s)) drop(s.tail, n - 1) else s + // $COVERAGE-OFF$ def map[A, B](s: Stack[A], f: A => B): Stack[B] = if (!isEmpty(s)) new Stack(f(s.head), map(s.tail, f)) else empty def mkString(s: Stack[_], sep: String): String = if (isEmpty(s)) "" else s.head.toString + sep + mkString(s.tail, sep) + // $COVERAGE-ON$ def push[A](s: Stack[A], x: A): Stack[A] = new Stack(x, s) } @@ -118,9 +124,11 @@ package object instructions // This is off by one, but that's fine, if everything is also off by one :P def usize: Int = sp + // $COVERAGE-OFF$ def size: Int = usize + 1 def isEmpty: Boolean = sp == -1 def mkString(sep: String): String = array.take(sp + 1).reverse.mkString(sep) + // $COVERAGE-ON$ def clear(): Unit = { sp = -1 var i = array.length-1 diff --git a/src/main/scala/parsley/package.scala b/src/main/scala/parsley/package.scala index 57c0e59bb..2f38c4dc6 100644 --- a/src/main/scala/parsley/package.scala +++ b/src/main/scala/parsley/package.scala @@ -6,6 +6,7 @@ import scala.language.implicitConversions package object parsley { + // $COVERAGE-OFF$ // Public API /** This method is responsible for actually executing parsers. Given a `Parsley[A]` and an input * string, will parse the string with the parser. The result is either a `Success` or a `Failure`. @@ -81,4 +82,5 @@ package object parsley @implicitAmbiguous("Must specify the type for get operation; S cannot be Nothing") implicit def neqAmbig1[A] : A =!= A = null implicit def neqAmbig2[A] : A =!= A = null + // $COVERAGE-ON$ } From 8b936f833afe2449ea8471148722979b876ef872 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 15:21:55 +0000 Subject: [PATCH 07/34] Factored out some of the common string logic --- .../parsley/internal/deepembedding/Cont.scala | 10 +- .../internal/instructions/TokenInstrs.scala | 174 +++++++++--------- 2 files changed, 84 insertions(+), 100 deletions(-) diff --git a/src/main/scala/parsley/internal/deepembedding/Cont.scala b/src/main/scala/parsley/internal/deepembedding/Cont.scala index a18299416..0f026eaf3 100644 --- a/src/main/scala/parsley/internal/deepembedding/Cont.scala +++ b/src/main/scala/parsley/internal/deepembedding/Cont.scala @@ -62,10 +62,6 @@ private [deepembedding] object Cont { new Cont(k => new Thunk(() => mx.cont(_ => my.cont(k)))) } - override def |>[R, A, B](mx: => Cont[R, A], y: => B): Cont[R, B] = - { - new Cont(k => new Thunk(() => mx.cont(_ => k(y)))) - } } } @@ -79,10 +75,6 @@ private [deepembedding] object Id override def map[R, A, B](c: =>Id[R, A], f: A => B): Id[R, B] = new Id(f(c.x)) override def flatMap[R, A, B](c: =>Id[R, A], f: A => Id[R, B]): Id[R, B] = f(c.x) override def >>[R, A, B](c: => Id[R, A], k: => Id[R, B]): Id[R, B] = {c; k} - override def |>[R, A, B](c: => Id[R, A], x: => B): Id[R, B] = - { - c.x - new Id(x) - } + override def |>[R, A, B](c: => Id[R, A], x: => B): Id[R, B] = {c; new Id(x)} } } diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 3519b8731..c831808fb 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -662,17 +662,15 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In override def copy: TokenEscape = new TokenEscape(expected) } -private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption[String]) extends TokenEscape(_expected) -{ - val expectedString = if (_expected == null) "string" else _expected - val expectedEos = if (_expected == null) "end of string" else _expected - val expectedEscape = if (_expected == null) "escape code" else _expected - val expectedGap = if (_expected == null) "end of string gap" else _expected - val expectedChar = if (_expected == null) "string character" else _expected +private [instructions] sealed trait TokenStringLike extends Instr { + protected val expected: UnsafeOption[String] + final protected lazy val expectedString = if (expected == null) "string" else expected + final protected lazy val expectedEos = if (expected == null) "end of string" else expected + final protected lazy val expectedChar = if (expected == null) "string character" else expected - override def apply(ctx: Context): Unit = + protected def restOfString(ctx: Context, builder: StringBuilder): Unit + final override def apply(ctx: Context): Unit = { - badCode = false if (ctx.moreInput && ctx.nextChar == '"') { ctx.fastUncheckedConsumeChars(1) @@ -680,111 +678,105 @@ private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption } else ctx.fail(expectedString) } +} - @tailrec def restOfString(ctx: Context, builder: StringBuilder): Unit = - { - if (ctx.moreInput) ctx.nextChar match - { - case '"' => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(builder.toString) - case '\\' => - ctx.fastUncheckedConsumeChars(1) - if (spaces(ctx) != 0) - { - if (ctx.moreInput && ctx.nextChar == '\\') - { +private [internal] final class TokenRawString(_expected: UnsafeOption[String]) extends TokenStringLike +{ + override val expected = _expected + override def restOfString(ctx: Context, builder: StringBuilder): Unit = { + @tailrec def go(): Unit = { + if (ctx.moreInput) ctx.nextChar match { + case '"' => + ctx.fastUncheckedConsumeChars(1) + ctx.pushAndContinue(builder.toString) + case '\\' => + ctx.fastUncheckedConsumeChars(1) + builder += '\\' + if (ctx.moreInput && ctx.nextChar > '\u0016') { + builder += ctx.nextChar ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, builder) + go() } - else ctx.fail(expectedGap) - } - else if (ctx.moreInput && ctx.nextChar == '&') - { + else ctx.fail(expectedChar) + case c if c > '\u0016' => + builder += c ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, builder) - } - else if (escape(ctx)) restOfString(ctx, builder += escapeChar) - else - { - ctx.fail(expectedEscape) - if (badCode) ctx.raw ::= "invalid escape sequence" - } - case c => - if (c > '\u0016') - { - ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, builder += c) - } - else ctx.fail(expectedChar) - } - else ctx.fail(expectedEos) - } - - private def spaces(ctx: Context): Int = - { - var n = 0 - while (ctx.moreInput && ws(ctx.nextChar)) - { - ctx.consumeChar() - n += 1 + go() + case _ => ctx.fail(expectedChar) + } + else ctx.fail(expectedEos) } - n + go() } // $COVERAGE-OFF$ - override def toString: String = "TokenString" + override def toString: String = "TokenRawString" // $COVERAGE-ON$ - override def copy: TokenString = new TokenString(ws, _expected) } -private [internal] final class TokenRawString(_expected: UnsafeOption[String]) extends Instr +private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption[String]) extends TokenEscape(_expected) with TokenStringLike { - val expectedString = if (_expected == null) "string" else _expected - val expectedEos = if (_expected == null) "end of string" else _expected - val expectedChar = if (_expected == null) "string character" else _expected + override val expected = _expected + val expectedEscape = if (_expected == null) "escape code" else _expected + val expectedGap = if (_expected == null) "end of string gap" else _expected - override def apply(ctx: Context): Unit = - { - if (ctx.moreInput && ctx.nextChar == '"') - { - ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, new StringBuilder()) + override def restOfString(ctx: Context, builder: StringBuilder): Unit = { + @tailrec def go(): Unit = { + if (ctx.moreInput) ctx.nextChar match { + case '"' => + ctx.fastUncheckedConsumeChars(1) + ctx.pushAndContinue(builder.toString) + case '\\' => + ctx.fastUncheckedConsumeChars(1) + if (spaces(ctx) != 0) + { + if (ctx.moreInput && ctx.nextChar == '\\') + { + ctx.fastUncheckedConsumeChars(1) + go() + } + else ctx.fail(expectedGap) + } + else if (ctx.moreInput && ctx.nextChar == '&') + { + ctx.fastUncheckedConsumeChars(1) + go() + } + else if (escape(ctx)) { + builder += escapeChar + go() + } + else + { + ctx.fail(expectedEscape) + if (badCode) ctx.raw ::= "invalid escape sequence" + } + case c if c > '\u0016' => + builder += c + ctx.fastUncheckedConsumeChars(1) + go() + case _ => ctx.fail(expectedChar) + } + else ctx.fail(expectedEos) } - else ctx.fail(expectedString) + go() } - @tailrec def restOfString(ctx: Context, builder: StringBuilder): Unit = + private def spaces(ctx: Context): Int = { - if (ctx.moreInput) ctx.nextChar match + var n = 0 + while (ctx.moreInput && ws(ctx.nextChar)) { - case '"' => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(builder.toString) - case '\\' => - ctx.fastUncheckedConsumeChars(1) - builder += '\\' - if (ctx.moreInput && ctx.nextChar > '\u0016') - { - builder += ctx.nextChar - ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, builder) - } - else ctx.fail(expectedChar) - case c => - if (c > '\u0016') - { - ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, builder += c) - } - else ctx.fail(expectedChar) + ctx.consumeChar() + n += 1 } - else ctx.fail(expectedEos) + n } // $COVERAGE-OFF$ - override def toString: String = "TokenRawString" + override def toString: String = "TokenString" // $COVERAGE-ON$ + override def copy: TokenString = new TokenString(ws, _expected) } private [instructions] abstract class TokenLexi(name: String, illegalName: String) From 99c5d5563af8a01346fb1747538c376a7e512088 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 15:35:05 +0000 Subject: [PATCH 08/34] Further factoring out --- .../internal/instructions/TokenInstrs.scala | 116 ++++++++---------- 1 file changed, 54 insertions(+), 62 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index c831808fb..6868bae52 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -668,7 +668,24 @@ private [instructions] sealed trait TokenStringLike extends Instr { final protected lazy val expectedEos = if (expected == null) "end of string" else expected final protected lazy val expectedChar = if (expected == null) "string character" else expected - protected def restOfString(ctx: Context, builder: StringBuilder): Unit + // All failures must be handled by this function + protected def handleEscaped(ctx: Context, builder: StringBuilder): Boolean + @tailrec private final def restOfString(ctx: Context, builder: StringBuilder): Unit = { + if (ctx.moreInput) ctx.nextChar match { + case '"' => + ctx.fastUncheckedConsumeChars(1) + ctx.pushAndContinue(builder.toString) + case '\\' => + ctx.fastUncheckedConsumeChars(1) + if (handleEscaped(ctx, builder)) restOfString(ctx, builder) + case c if c > '\u0016' => + builder += c + ctx.fastUncheckedConsumeChars(1) + restOfString(ctx, builder) + case _ => ctx.fail(expectedChar) + } + else ctx.fail(expectedEos) + } final override def apply(ctx: Context): Unit = { if (ctx.moreInput && ctx.nextChar == '"') @@ -683,30 +700,17 @@ private [instructions] sealed trait TokenStringLike extends Instr { private [internal] final class TokenRawString(_expected: UnsafeOption[String]) extends TokenStringLike { override val expected = _expected - override def restOfString(ctx: Context, builder: StringBuilder): Unit = { - @tailrec def go(): Unit = { - if (ctx.moreInput) ctx.nextChar match { - case '"' => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(builder.toString) - case '\\' => - ctx.fastUncheckedConsumeChars(1) - builder += '\\' - if (ctx.moreInput && ctx.nextChar > '\u0016') { - builder += ctx.nextChar - ctx.fastUncheckedConsumeChars(1) - go() - } - else ctx.fail(expectedChar) - case c if c > '\u0016' => - builder += c - ctx.fastUncheckedConsumeChars(1) - go() - case _ => ctx.fail(expectedChar) - } - else ctx.fail(expectedEos) + override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { + builder += '\\' + if (ctx.moreInput && ctx.nextChar > '\u0016') { + builder += ctx.nextChar + ctx.fastUncheckedConsumeChars(1) + true + } + else { + ctx.fail(expectedChar) + false } - go() } // $COVERAGE-OFF$ @@ -720,46 +724,34 @@ private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption val expectedEscape = if (_expected == null) "escape code" else _expected val expectedGap = if (_expected == null) "end of string gap" else _expected - override def restOfString(ctx: Context, builder: StringBuilder): Unit = { - @tailrec def go(): Unit = { - if (ctx.moreInput) ctx.nextChar match { - case '"' => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(builder.toString) - case '\\' => - ctx.fastUncheckedConsumeChars(1) - if (spaces(ctx) != 0) - { - if (ctx.moreInput && ctx.nextChar == '\\') - { - ctx.fastUncheckedConsumeChars(1) - go() - } - else ctx.fail(expectedGap) - } - else if (ctx.moreInput && ctx.nextChar == '&') - { - ctx.fastUncheckedConsumeChars(1) - go() - } - else if (escape(ctx)) { - builder += escapeChar - go() - } - else - { - ctx.fail(expectedEscape) - if (badCode) ctx.raw ::= "invalid escape sequence" - } - case c if c > '\u0016' => - builder += c - ctx.fastUncheckedConsumeChars(1) - go() - case _ => ctx.fail(expectedChar) + override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { + if (spaces(ctx) != 0) + { + if (ctx.moreInput && ctx.nextChar == '\\') + { + ctx.fastUncheckedConsumeChars(1) + true } - else ctx.fail(expectedEos) + else { + ctx.fail(expectedGap) + false + } + } + else if (ctx.moreInput && ctx.nextChar == '&') + { + ctx.fastUncheckedConsumeChars(1) + true + } + else if (escape(ctx)) { + builder += escapeChar + true + } + else + { + ctx.fail(expectedEscape) + if (badCode) ctx.raw ::= "invalid escape sequence" + false } - go() } private def spaces(ctx: Context): Int = From c2a0465890921b756e208d3d813112a9a46a7600 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 16:01:14 +0000 Subject: [PATCH 09/34] Improved complexity of of various things --- .../internal/instructions/TokenInstrs.scala | 150 +++++++----------- 1 file changed, 56 insertions(+), 94 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 6868bae52..af9ba429c 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -686,10 +686,8 @@ private [instructions] sealed trait TokenStringLike extends Instr { } else ctx.fail(expectedEos) } - final override def apply(ctx: Context): Unit = - { - if (ctx.moreInput && ctx.nextChar == '"') - { + final override def apply(ctx: Context): Unit = { + if (ctx.moreInput && ctx.nextChar == '"') { ctx.fastUncheckedConsumeChars(1) restOfString(ctx, new StringBuilder()) } @@ -697,8 +695,7 @@ private [instructions] sealed trait TokenStringLike extends Instr { } } -private [internal] final class TokenRawString(_expected: UnsafeOption[String]) extends TokenStringLike -{ +private [internal] final class TokenRawString(_expected: UnsafeOption[String]) extends TokenStringLike { override val expected = _expected override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { builder += '\\' @@ -718,51 +715,38 @@ private [internal] final class TokenRawString(_expected: UnsafeOption[String]) e // $COVERAGE-ON$ } -private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption[String]) extends TokenEscape(_expected) with TokenStringLike -{ +private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption[String]) extends TokenEscape(_expected) with TokenStringLike { override val expected = _expected - val expectedEscape = if (_expected == null) "escape code" else _expected - val expectedGap = if (_expected == null) "end of string gap" else _expected + private val expectedEscape = if (_expected == null) "escape code" else _expected + private val expectedGap = if (_expected == null) "end of string gap" else _expected override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { if (spaces(ctx) != 0) { - if (ctx.moreInput && ctx.nextChar == '\\') - { - ctx.fastUncheckedConsumeChars(1) - true - } + val completedGap = ctx.moreInput && ctx.nextChar == '\\' + if (completedGap) ctx.fastUncheckedConsumeChars(1) + else ctx.fail(expectedGap) + completedGap + } + else { + val isDeadChar = ctx.moreInput && ctx.nextChar == '&' + lazy val isEscapeChar = escape(ctx) + if (isDeadChar) ctx.fastUncheckedConsumeChars(1) + else if (isEscapeChar) builder += escapeChar else { - ctx.fail(expectedGap) - false + ctx.fail(expectedEscape) + if (badCode) ctx.raw ::= "invalid escape sequence" } - } - else if (ctx.moreInput && ctx.nextChar == '&') - { - ctx.fastUncheckedConsumeChars(1) - true - } - else if (escape(ctx)) { - builder += escapeChar - true - } - else - { - ctx.fail(expectedEscape) - if (badCode) ctx.raw ::= "invalid escape sequence" - false + isDeadChar || isEscapeChar } } - private def spaces(ctx: Context): Int = - { - var n = 0 - while (ctx.moreInput && ws(ctx.nextChar)) - { + @tailrec private def spaces(ctx: Context, n: Int = 0): Int = { + if (ctx.moreInput && ws(ctx.nextChar)) { ctx.consumeChar() - n += 1 + spaces(ctx, n + 1) } - n + else n } // $COVERAGE-OFF$ @@ -772,14 +756,11 @@ private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption } private [instructions] abstract class TokenLexi(name: String, illegalName: String) - (start: TokenSet, letter: TokenSet, illegal: String => Boolean, _expected: UnsafeOption[String]) extends Instr -{ + (start: TokenSet, letter: TokenSet, illegal: String => Boolean, _expected: UnsafeOption[String]) extends Instr { private val expected = if (_expected == null) name else _expected - final override def apply(ctx: Context): Unit = - { - if (ctx.moreInput && start(ctx.nextChar)) - { + final override def apply(ctx: Context): Unit = { + if (ctx.moreInput && start(ctx.nextChar)) { val name = new StringBuilder() name += ctx.nextChar ctx.offset += 1 @@ -788,24 +769,19 @@ private [instructions] abstract class TokenLexi(name: String, illegalName: Strin else ctx.fail(expected) } - @tailrec private final def restOfToken(ctx: Context, tok: StringBuilder): Unit = - { - if (ctx.moreInput && letter(ctx.nextChar)) - { + @tailrec private final def restOfToken(ctx: Context, tok: StringBuilder): Unit = { + if (ctx.moreInput && letter(ctx.nextChar)) { tok += ctx.nextChar ctx.offset += 1 restOfToken(ctx, tok) } - else - { + else { val tokStr = tok.toString - if (illegal(tokStr)) - { + if (illegal(tokStr)) { ctx.offset -= tokStr.length ctx.unexpectedFail(expected = expected, unexpected = s"$illegalName $tokStr") } - else - { + else { ctx.col += tokStr.length ctx.pushAndContinue(tokStr) } @@ -826,35 +802,28 @@ private [internal] final class TokenUserOperator(start: TokenSet, letter: TokenS private [internal] final class TokenOperator(start: TokenSet, letter: TokenSet, reservedOps: Set[String], _expected: UnsafeOption[String]) extends TokenLexi("operator", "non-reserved operator")(start, letter, reservedOps.andThen(!_), _expected) -private [instructions] abstract class TokenSpecific(_specific: String, caseSensitive: Boolean, _expected: UnsafeOption[String]) extends Instr -{ +private [instructions] abstract class TokenSpecific(_specific: String, caseSensitive: Boolean, _expected: UnsafeOption[String]) extends Instr { private final val expected = if (_expected == null) _specific else _expected protected final val expectedEnd = if (_expected == null) "end of " + _specific else _expected private final val specific = (if (caseSensitive) _specific else _specific.toLowerCase).toCharArray private final val strsz = specific.length protected def postprocess(ctx: Context, i: Int): Unit - final override def apply(ctx: Context): Unit = - { - val input = ctx.input - var i = ctx.offset - var j = 0 - if (ctx.inputsz >= i + strsz) - { - while (j < strsz) - { - val c = if (caseSensitive) input(i) else input(i).toLower - if (c != specific(j)) - { - ctx.fail(expected) - return - } - i += 1 - j += 1 - } + + @tailrec final private def readSpecific(ctx: Context, i: Int, j: Int): Unit = { + if (j < strsz) { + val c = if (caseSensitive) ctx.input(i) else ctx.input(i).toLower + if (c != specific(j)) ctx.fail(expected) + else readSpecific(ctx, i + 1, j + 1) + } + else { ctx.saveState() ctx.fastUncheckedConsumeChars(strsz) postprocess(ctx, i) } + } + + final override def apply(ctx: Context): Unit = { + if (ctx.inputsz >= ctx.offset + strsz) readSpecific(ctx, ctx.offset, 0) else ctx.fail(expected) } @@ -885,32 +854,25 @@ private [internal] final class TokenOperator_(operator: String, letter: TokenSet // This can be combined into the above private [internal] class TokenMaxOp(operator: String, _ops: Set[String], expected: UnsafeOption[String]) - extends TokenSpecific(operator, true, expected) -{ + extends TokenSpecific(operator, true, expected) { // TODO: We want a Trie backed map here, not whatever this is private val ops = for (op <- _ops.toList if op.length > operator.length && op.startsWith(operator)) yield op.substring(operator.length) override def postprocess(ctx: Context, _i: Int): Unit = { var i = _i - if (i < ctx.inputsz) - { - var ops = this.ops - while (ops.nonEmpty && i < ctx.inputsz) - { - val c = ctx.input(i) - ops = for (op <- ops if op.charAt(0) == c) yield - { - val op_ = op.substring(1) - if (op_.isEmpty) - { - ctx.fail(expectedEnd) - ctx.restoreState() - return - } - op_ + var ops = this.ops + while (i < ctx.inputsz && ops.nonEmpty) { + val c = ctx.input(i) + ops = for (op <- ops if op.charAt(0) == c) yield { + val op_ = op.substring(1) + if (op_.isEmpty) { + ctx.fail(expectedEnd) + ctx.restoreState() + return } - i += 1 + op_ } + i += 1 } ctx.states = ctx.states.tail ctx.pushAndContinue(()) From d8564890acb333bca357897efa5745c221a37218 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 16:51:09 +0000 Subject: [PATCH 10/34] Factored out the pipeline, this will ensure no funny business with the instances... --- .github/workflows/ci.yaml | 3 +- .../parsley/internal/deepembedding/Cont.scala | 3 +- .../deepembedding/GeneralisedEmbedding.scala | 10 +++---- .../internal/deepembedding/Parsley.scala | 29 +++++++++++++------ .../deepembedding/PrimitiveEmbedding.scala | 4 +-- .../internal/instructions/TokenInstrs.scala | 2 +- 6 files changed, 31 insertions(+), 20 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4c864864a..51076ec6a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -57,5 +57,4 @@ jobs: CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} with: coverageCommand: sbt coverageReport - coverageLocations: ${{github.workspace}}/target/scala-2.13/coverage-report/cobertura.xml:cobertura - debug: true \ No newline at end of file + coverageLocations: ${{github.workspace}}/target/scala-2.13/coverage-report/cobertura.xml:cobertura \ No newline at end of file diff --git a/src/main/scala/parsley/internal/deepembedding/Cont.scala b/src/main/scala/parsley/internal/deepembedding/Cont.scala index 0f026eaf3..4f478f647 100644 --- a/src/main/scala/parsley/internal/deepembedding/Cont.scala +++ b/src/main/scala/parsley/internal/deepembedding/Cont.scala @@ -38,9 +38,10 @@ private [deepembedding] object ContOps def result[R, A, Cont[_, +_]](x: A)(implicit canWrap: ContOps[Cont]): Cont[R, A] = canWrap.wrap(x) def perform[R, Cont[_, +_]](wrapped: Cont[R, R])(implicit canUnwrap: ContOps[Cont]): R = canUnwrap.unwrap(wrapped) type GenOps = ContOps[({type C[_, +_]})#C] - def safeCall[A](task: GenOps => A): A = + def safeCall[A](task: GenOps => A): A = { try task(Id.ops.asInstanceOf[GenOps]) catch { case _: StackOverflowError => task(Cont.ops.asInstanceOf[GenOps]) } + } } private [deepembedding] class Cont[R, +A](val cont: (A => Bounce[R]) => Bounce[R]) extends AnyVal diff --git a/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala index de3f68cf5..1fd8192bb 100644 --- a/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/GeneralisedEmbedding.scala @@ -8,7 +8,7 @@ import scala.language.higherKinds // Core Embedding private [parsley] abstract class Singleton[A](pretty: String, instr: instructions.Instr) extends Parsley[A] { final override def preprocess[Cont[_, +_]: ContOps, A_ >: A](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[A_]] = result(this) + label: UnsafeOption[String]): Cont[Unit, Parsley[A_]] = result(this) final override def findLetsAux[Cont[_, +_]: ContOps](implicit seen: Set[Parsley[_]], state: LetFinderState): Cont[Unit, Unit] = result(()) final override def codeGen[Cont[_, +_]: ContOps](implicit instrs: InstrBuffer, state: CodeGenState): Cont[Unit, Unit] = { result(instrs += instr) @@ -21,7 +21,7 @@ private [parsley] abstract class Singleton[A](pretty: String, instr: instruction private [deepembedding] abstract class SingletonExpect[A](pretty: String, builder: UnsafeOption[String] => SingletonExpect[A], instr: instructions.Instr) extends Parsley[A] { final override def preprocess[Cont[_, +_]: ContOps, A_ >: A](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[A]] = { + label: UnsafeOption[String]): Cont[Unit, Parsley[A]] = { if (label == null) result(this) else result(builder(label)) } @@ -44,7 +44,7 @@ private [deepembedding] abstract class Unary[A, B](_p: =>Parsley[A])(pretty: Str p.findLets } override def preprocess[Cont[_, +_]: ContOps, B_ >: B](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[B_]] = + label: UnsafeOption[String]): Cont[Unit, Parsley[B_]] = if (label == null && processed) result(this) else for (p <- this.p.optimised) yield { val self = if (label == null) this else empty(label) self.ready(p) @@ -74,7 +74,7 @@ private [deepembedding] abstract class Binary[A, B, C](_left: =>Parsley[A], _rig left.findLets >> right.findLets } override def preprocess[Cont[_, +_]: ContOps, C_ >: C](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[C_]] = + label: UnsafeOption[String]): Cont[Unit, Parsley[C_]] = if (label == null && processed) result(this) else for (left <- this.left.optimised; right <- this.right.optimised) yield { val self = if (label == null) this else empty self.ready(left, right) @@ -100,7 +100,7 @@ private [deepembedding] abstract class Ternary[A, B, C, D](_first: =>Parsley[A], protected var third: Parsley[C] = _ protected val numInstrs: Int override def preprocess[Cont[_, +_]: ContOps, D_ >: D](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[D_]] = + label: UnsafeOption[String]): Cont[Unit, Parsley[D_]] = if (label == null && processed) result(this) else for (first <- this.first.optimised; second <- this.second.optimised; third <- this.third.optimised) yield { val self = if (label == null) this else empty diff --git a/src/main/scala/parsley/internal/deepembedding/Parsley.scala b/src/main/scala/parsley/internal/deepembedding/Parsley.scala index d93d28e23..ca81d56b5 100644 --- a/src/main/scala/parsley/internal/deepembedding/Parsley.scala +++ b/src/main/scala/parsley/internal/deepembedding/Parsley.scala @@ -51,7 +51,7 @@ private [parsley] abstract class Parsley[+A] private [deepembedding] } final private [deepembedding] def optimised[Cont[_, +_]: ContOps, A_ >: A](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[A_]] = { + label: UnsafeOption[String] = null): Cont[Unit, Parsley[A_]] = { for (p <- this.fix.preprocess(implicitly[ContOps[Cont]], seen + this, sub, label)) yield p.optimise } final private [deepembedding] var safe = true @@ -59,12 +59,15 @@ private [parsley] abstract class Parsley[+A] private [deepembedding] final private [deepembedding] var size: Int = 1 final private [deepembedding] var processed = false - final private def computeInstrs(implicit ops: GenOps): Array[Instr] = { - val instrs: InstrBuffer = new ResizableArray() - val state = new CodeGenState - val letFinderState = new LetFinderState - perform(findLets(ops, Set.empty, letFinderState)) - perform(perform(optimised(ops, Set.empty, new SubMap(letFinderState.lets), null)).codeGen(ops, instrs, state)) + final private def pipeline[Cont[_, +_]: ContOps](implicit instrs: InstrBuffer, state: CodeGenState): Unit = { + perform { + implicit val letFinderState: LetFinderState = new LetFinderState + implicit val seenSet: Set[Parsley[_]] = Set.empty + findLets >> { + implicit val subMap: SubMap = new SubMap(letFinderState.lets) + optimised.flatMap(_.codeGen) + } + } if (state.map.nonEmpty) { val end = state.freshLabel() instrs += new instructions.Jump(end) @@ -73,11 +76,19 @@ private [parsley] abstract class Parsley[+A] private [deepembedding] val p = state.nextSub() val label = map(p) instrs += new instructions.Label(label) - perform(p.codeGen(ops, instrs, state)) + perform(p.codeGen) instrs += instructions.Return } instrs += new instructions.Label(end) } + } + + final private def computeInstrs(ops: GenOps): Array[Instr] = { + val instrs: InstrBuffer = new ResizableArray() + val state = new CodeGenState + + pipeline(ops, instrs, state) + val instrsOversize = instrs.toArray val labelMapping = new Array[Int](state.nlabels) @tailrec def findLabels(instrs: Array[Instr], labels: Array[Int], n: Int, i: Int, off: Int): Int = if (i + off < n) instrs(i + off) match { @@ -116,7 +127,7 @@ private [parsley] abstract class Parsley[+A] private [deepembedding] // Sub-tree optimisation and Rec calculation - Bottom-up protected def preprocess[Cont[_, +_]: ContOps, A_ >: A](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[A_]] + label: UnsafeOption[String]): Cont[Unit, Parsley[A_]] // Let-finder recursion protected def findLetsAux[Cont[_, +_]: ContOps](implicit seen: Set[Parsley[_]], state: LetFinderState): Cont[Unit, Unit] // Optimisation - Bottom-up diff --git a/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala index 79a2381d7..78daaae2a 100644 --- a/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala @@ -44,7 +44,7 @@ private [parsley] final class Subroutine[A](_p: =>Parsley[A], val expected: Unsa override val childRepeats = 0 override def preprocess[Cont[_, +_]: ContOps, A_ >: A](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[A_]] = { + label: UnsafeOption[String]): Cont[Unit, Parsley[A_]] = { val self = if (label == null) this else Subroutine(p, label) if (!processed) for (p <- this.p.optimised(implicitly[ContOps[Cont]], seen, sub, null)) yield self.ready(p) else result(self) @@ -69,7 +69,7 @@ private [parsley] final class Put[S](private [Put] val v: Var, _p: =>Parsley[S]) private [parsley] final class ErrorRelabel[+A](_p: =>Parsley[A], msg: String) extends Parsley[A] { lazy val p = _p override def preprocess[Cont[_, +_]: ContOps, A_ >: A](implicit seen: Set[Parsley[_]], sub: SubMap, - label: UnsafeOption[String]): Cont[Parsley[_], Parsley[A_]] = { + label: UnsafeOption[String]): Cont[Unit, Parsley[A_]] = { if (label == null) p.optimised(implicitly[ContOps[Cont]], seen, sub, msg) else p.optimised } diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index af9ba429c..19bac1998 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -1,6 +1,6 @@ package parsley.internal.instructions -import parsley.internal.deepembedding.Sign._ +import parsley.internal.deepembedding.Sign.{SignType, IntType, DoubleType} import parsley.TokenParser.TokenSet import parsley.internal.UnsafeOption From edf7814d4c02199154c060efa9439f57c2522578 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 17:24:15 +0000 Subject: [PATCH 11/34] Improved code in computeInstrs --- .../internal/deepembedding/Parsley.scala | 25 ++++++++----------- .../internal/instructions/OptInstrs.scala | 2 +- .../internal/instructions/package.scala | 2 ++ 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/main/scala/parsley/internal/deepembedding/Parsley.scala b/src/main/scala/parsley/internal/deepembedding/Parsley.scala index ca81d56b5..17e05429d 100644 --- a/src/main/scala/parsley/internal/deepembedding/Parsley.scala +++ b/src/main/scala/parsley/internal/deepembedding/Parsley.scala @@ -42,11 +42,10 @@ private [parsley] abstract class Parsley[+A] private [deepembedding] } final private def fix(implicit seen: Set[Parsley[_]], sub: SubMap, label: UnsafeOption[String]): Parsley[A] = { // We use the seen set here to prevent cascading sub-routines + val wasSeen = seen(this) val self = sub(this) - if (seen(this)) { - if (self == this) new Rec(this, label) - else this - } + if (wasSeen && (self eq this)) new Rec(this, label) + else if (wasSeen) this else self } final private [deepembedding] def optimised[Cont[_, +_]: ContOps, A_ >: A](implicit seen: Set[Parsley[_]], @@ -89,27 +88,23 @@ private [parsley] abstract class Parsley[+A] private [deepembedding] pipeline(ops, instrs, state) - val instrsOversize = instrs.toArray - val labelMapping = new Array[Int](state.nlabels) @tailrec def findLabels(instrs: Array[Instr], labels: Array[Int], n: Int, i: Int, off: Int): Int = if (i + off < n) instrs(i + off) match { - case label: Label => instrs(i + off) = null; labels(label.i) = i; findLabels(instrs, labels, n, i, off + 1) + case label: Label => + instrs(i + off) = null + labels(label.i) = i + findLabels(instrs, labels, n, i, off + 1) case _ => findLabels(instrs, labels, n, i + 1, off) } else i @tailrec def applyLabels(srcs: Array[Instr], labels: Array[Int], dests: Array[Instr], n: Int, i: Int, off: Int): Unit = if (i < n) srcs(i + off) match { case null => applyLabels(srcs, labels, dests, n, i, off + 1) - case jump: JumpInstr => - jump.label = labels(jump.label) - dests(i) = jump - applyLabels(srcs, labels, dests, n, i + 1, off) - case table: JumpTable => - table.relabel(labels) - dests(i) = table - applyLabels(srcs, labels, dests, n, i + 1, off) case instr => + instr.relabel(labels) dests(i) = instr applyLabels(srcs, labels, dests, n, i + 1, off) } + val instrsOversize = instrs.toArray + val labelMapping = new Array[Int](state.nlabels) val size = findLabels(instrsOversize, labelMapping, instrs.length, 0, 0) val instrs_ = new Array[Instr](size) applyLabels(instrsOversize, labelMapping, instrs_, instrs_.length, 0, 0) diff --git a/src/main/scala/parsley/internal/instructions/OptInstrs.scala b/src/main/scala/parsley/internal/instructions/OptInstrs.scala index 31df9d41c..88cc57f7c 100644 --- a/src/main/scala/parsley/internal/instructions/OptInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/OptInstrs.scala @@ -119,7 +119,7 @@ private [internal] final class JumpTable(prefixes: List[Char], labels: List[Int] } } - private [internal] def relabel(labels: Array[Int]): Unit = { + override def relabel(labels: Array[Int]): Unit = { jumpTable.mapValuesInPlace((_, v) => labels(v)) default = labels(default) defaultPreamble = default - 1 diff --git a/src/main/scala/parsley/internal/instructions/package.scala b/src/main/scala/parsley/internal/instructions/package.scala index be07b9cff..a2b2b9b3e 100644 --- a/src/main/scala/parsley/internal/instructions/package.scala +++ b/src/main/scala/parsley/internal/instructions/package.scala @@ -12,6 +12,7 @@ package object instructions private [internal] abstract class Instr { def apply(ctx: Context): Unit + def relabel(labels: Array[Int]): Unit = () // Instructions should override this if they have mutable state inside! def copy: Instr = this } @@ -20,6 +21,7 @@ package object instructions private [internal] abstract class JumpInstr extends Instr { var label: Int + override def relabel(labels: Array[Int]): Unit = label = labels(label) } // It's 2018 and Labels are making a come-back, along with 2 pass assembly From 410a651d008b624442e4593dbd012383a4a61d12 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 17:33:19 +0000 Subject: [PATCH 12/34] Reformatted TokenInstrs --- .../internal/instructions/TokenInstrs.scala | 291 ++++++------------ 1 file changed, 97 insertions(+), 194 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 19bac1998..e349766df 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -7,41 +7,32 @@ import parsley.internal.UnsafeOption import scala.annotation.{switch, tailrec} // TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later -private [internal] class TokenComment(start: String, end: String, line: String, nested: Boolean) extends Instr -{ +private [internal] class TokenComment(start: String, end: String, line: String, nested: Boolean) extends Instr { protected final val noLine = line.isEmpty protected final val noMulti = start.isEmpty - override def apply(ctx: Context): Unit = - { + override def apply(ctx: Context): Unit = { if (!ctx.moreInput) ctx.fail("comment") else if (noLine && noMulti) ctx.fail("comment") - else if (noLine) - { + else if (noLine) { if (!ctx.input.startsWith(start, ctx.offset)) ctx.fail("comment") - else - { + else { if (!multiLineComment(ctx)) return ctx.pushAndContinue(()) } } - else if (noMulti) - { + else if (noMulti) { if (!ctx.input.startsWith(line, ctx.offset)) ctx.fail("comment") - else - { + else { singleLineComment(ctx) ctx.pushAndContinue(()) } } - else - { + else { val startsSingle = ctx.input.startsWith(line, ctx.offset) val startsMulti = ctx.input.startsWith(start, ctx.offset) if (!startsSingle && !startsMulti) ctx.fail("comment") - else - { - if (startsMulti) - { + else { + if (startsMulti) { if (!multiLineComment(ctx)) return } else singleLineComment(ctx) @@ -50,31 +41,25 @@ private [internal] class TokenComment(start: String, end: String, line: String, } } - protected final def singleLineComment(ctx: Context): Unit = - { + protected final def singleLineComment(ctx: Context): Unit = { ctx.fastUncheckedConsumeChars(line.length) while (ctx.moreInput && ctx.nextChar != '\n') ctx.consumeChar() } - protected final def multiLineComment(ctx: Context): Boolean = - { + protected final def multiLineComment(ctx: Context): Boolean = { ctx.fastUncheckedConsumeChars(start.length) var n = 1 - while (n != 0) - { - if (ctx.input.startsWith(end, ctx.offset)) - { + while (n != 0) { + if (ctx.input.startsWith(end, ctx.offset)) { ctx.fastUncheckedConsumeChars(end.length) n -= 1 } - else if (nested && ctx.input.startsWith(start, ctx.offset)) - { + else if (nested && ctx.input.startsWith(start, ctx.offset)) { ctx.fastUncheckedConsumeChars(start.length) n += 1 } else if (ctx.moreInput) ctx.consumeChar() - else - { + else { ctx.fail("end of comment") return false } @@ -87,26 +72,19 @@ private [internal] class TokenComment(start: String, end: String, line: String, } // TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later -private [internal] final class TokenSkipComments(start: String, end: String, line: String, nested: Boolean) extends TokenComment(start, end, line, nested) -{ - override def apply(ctx: Context): Unit = - { - if (noLine && !noMulti) - { +private [internal] final class TokenSkipComments(start: String, end: String, line: String, nested: Boolean) extends TokenComment(start, end, line, nested) { + override def apply(ctx: Context): Unit = { + if (noLine && !noMulti) { while (ctx.moreInput && ctx.input.startsWith(start, ctx.offset)) if (!multiLineComment(ctx)) return } - else if (noMulti && !noLine) - { + else if (noMulti && !noLine) { while (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) singleLineComment(ctx) } - else if (!noLine && !noMulti) - { + else if (!noLine && !noMulti) { var startsSingle = ctx.input.startsWith(line, ctx.offset) var startsMulti = ctx.input.startsWith(start, ctx.offset) - while (ctx.moreInput && (startsSingle || startsMulti)) - { - if (startsMulti) - { + while (ctx.moreInput && (startsSingle || startsMulti)) { + if (startsMulti) { if (!multiLineComment(ctx)) return } else singleLineComment(ctx) @@ -122,39 +100,30 @@ private [internal] final class TokenSkipComments(start: String, end: String, lin } private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: String, line: String, nested: Boolean) - extends TokenComment(start, end, line, nested) -{ - override def apply(ctx: Context): Unit = - { + extends TokenComment(start, end, line, nested) { + override def apply(ctx: Context): Unit = { if (noLine && noMulti) spaces(ctx) - else if (noLine) - { + else if (noLine) { spaces(ctx) - while (ctx.moreInput && ctx.input.startsWith(start, ctx.offset)) - { + while (ctx.moreInput && ctx.input.startsWith(start, ctx.offset)) { if (!multiLineComment(ctx)) return spaces(ctx) } } - else if (noMulti) - { + else if (noMulti) { spaces(ctx) - while (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) - { + while (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) { singleLineComment(ctx) spaces(ctx) } } - else - { + else { spaces(ctx) // TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later var startsSingle = ctx.input.startsWith(line, ctx.offset) var startsMulti = ctx.input.startsWith(start, ctx.offset) - while (ctx.moreInput && (startsSingle || startsMulti)) - { - if (startsMulti) - { + while (ctx.moreInput && (startsSingle || startsMulti)) { + if (startsMulti) { if (!multiLineComment(ctx)) return } else singleLineComment(ctx) @@ -172,22 +141,17 @@ private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: // $COVERAGE-ON$ } -private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[String]) extends Instr -{ +private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[String]) extends Instr { val expected = if (_expected == null) "sign" else _expected - val neg: Any => Any = ty match - { + val neg: Any => Any = ty match { case IntType => ((x: Int) => -x).asInstanceOf[Any => Any] case DoubleType => ((x: Double) => -x).asInstanceOf[Any => Any] } val pos: Any => Any = x => x - override def apply(ctx: Context): Unit = - { - if (ctx.moreInput) - { - if (ctx.nextChar == '-') - { + override def apply(ctx: Context): Unit = { + if (ctx.moreInput) { + if (ctx.nextChar == '-') { ctx.fastUncheckedConsumeChars(1) ctx.stack.push(neg) } @@ -222,12 +186,9 @@ private [instructions] sealed trait NumericReader { protected final def decimal(ctx: Context, firstDigit: Int = 0): Int = subDecimal(10, '9', ctx)(firstDigit) protected final def octal(ctx: Context, firstDigit: Int = 0): Int = subDecimal(8, '7', ctx)(firstDigit) - @tailrec protected final def hexadecimal(ctx: Context, x: Int = 0): Int = - { - if (ctx.moreInput) - { - (ctx.nextChar: @switch) match - { + @tailrec protected final def hexadecimal(ctx: Context, x: Int = 0): Int = { + if (ctx.moreInput) { + (ctx.nextChar: @switch) match { case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => @@ -240,26 +201,19 @@ private [instructions] sealed trait NumericReader { } } -private [internal] final class TokenNatural(_expected: UnsafeOption[String]) extends Instr with NumericReader -{ +private [internal] final class TokenNatural(_expected: UnsafeOption[String]) extends Instr with NumericReader { val expected = if (_expected == null) "natural" else _expected - override def apply(ctx: Context): Unit = - { - if (ctx.moreInput) (ctx.nextChar: @switch) match - { + override def apply(ctx: Context): Unit = { + if (ctx.moreInput) (ctx.nextChar: @switch) match { case '0' => ctx.fastUncheckedConsumeChars(1) if (!ctx.moreInput) ctx.pushAndContinue(0) - else - { - (ctx.nextChar: @switch) match - { + else { + (ctx.nextChar: @switch) match { case 'x' | 'X' => ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) - { - (ctx.nextChar: @switch) match - { + if (ctx.moreInput) { + (ctx.nextChar: @switch) match { case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => @@ -271,11 +225,9 @@ private [internal] final class TokenNatural(_expected: UnsafeOption[String]) ext else ctx.fail(expected) case 'o' | 'O' => ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) - { + if (ctx.moreInput) { val d = ctx.nextChar - if (d >= '0' && d <= '7') - { + if (d >= '0' && d <= '7') { ctx.fastUncheckedConsumeChars(1) ctx.pushAndContinue(octal(ctx, d.asDigit)) } @@ -301,27 +253,21 @@ private [internal] final class TokenNatural(_expected: UnsafeOption[String]) ext // $COVERAGE-ON$ } -private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr -{ +private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr { val expected = if (_expected == null) "unsigned float" else _expected - override def apply(ctx: Context): Unit = - { + override def apply(ctx: Context): Unit = { var failed = false - if (ctx.moreInput) (ctx.nextChar: @switch) match - { + if (ctx.moreInput) (ctx.nextChar: @switch) match { case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => ctx.fastUncheckedConsumeChars(1) val builder = new StringBuilder() failed = decimal(ctx, builder += d, false) - if (ctx.moreInput) (ctx.nextChar: @switch) match - { + if (ctx.moreInput) (ctx.nextChar: @switch) match { case '.' => // fraction ctx.fastUncheckedConsumeChars(1) failed = decimal(ctx, builder += '.') - if (!failed) - { - if (ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E')) - { + if (!failed) { + if (ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E')) { ctx.fastUncheckedConsumeChars(1) failed = exponent(ctx, builder += 'e') } @@ -343,13 +289,10 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten else ctx.inc() } - @tailrec private def decimal(ctx: Context, x: StringBuilder, first: Boolean = true): Boolean = - { - if (ctx.moreInput) - { + @tailrec private def decimal(ctx: Context, x: StringBuilder, first: Boolean = true): Boolean = { + if (ctx.moreInput) { val d = ctx.nextChar - if (d >= '0' && d <= '9') - { + if (d >= '0' && d <= '9') { ctx.fastUncheckedConsumeChars(1) decimal(ctx, x += d, false) } @@ -358,12 +301,9 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten else first } - private def exponent(ctx: Context, x: StringBuilder): Boolean = - { - if (ctx.moreInput) - { - ctx.nextChar match - { + private def exponent(ctx: Context, x: StringBuilder): Boolean = { + if (ctx.moreInput) { + ctx.nextChar match { case '+' => ctx.fastUncheckedConsumeChars(1) decimal(ctx, x) @@ -381,28 +321,22 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten // $COVERAGE-ON$ } -private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with Stateful with NumericReader -{ +private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with Stateful with NumericReader { private [this] final val expected = if (_expected == null) "escape code" else _expected protected var escapeChar: Char = _ protected var badCode: Boolean = _ - override def apply(ctx: Context): Unit = - { + override def apply(ctx: Context): Unit = { badCode = false if (escape(ctx)) ctx.pushAndContinue(escapeChar) - else - { + else { ctx.fail(expected) if (badCode) ctx.raw ::= "invalid escape sequence" } } - protected final def escape(ctx: Context): Boolean = - { - if (ctx.moreInput) - { - (ctx.nextChar: @switch) match - { + protected final def escape(ctx: Context): Boolean = { + if (ctx.moreInput) { + (ctx.nextChar: @switch) match { case 'a' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\u0007' case 'b' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\b' case 'f' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\u000c' @@ -417,17 +351,14 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In ctx.fastUncheckedConsumeChars(1) val escapeCode = decimal(ctx, d.asDigit) if (escapeCode <= 0x10FFFF) escapeChar = escapeCode.toChar - else - { + else { badCode = true return false } case 'x' => ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) - { - (ctx.nextChar: @switch) match - { + if (ctx.moreInput) { + (ctx.nextChar: @switch) match { case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => @@ -445,16 +376,13 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In else return false case 'o' => ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) - { + if (ctx.moreInput) { val d = ctx.nextChar - if (d >= '0' && d <= '7') - { + if (d >= '0' && d <= '7') { ctx.fastUncheckedConsumeChars(1) val escapeCode = octal(ctx, d.asDigit) if (escapeCode <= 0x10FFFF) escapeChar = escapeCode.toChar - else - { + else { badCode = true return false } @@ -464,11 +392,9 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In else return false case '^' => ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) - { + if (ctx.moreInput) { val c = ctx.nextChar - if (c >= 'A' && c <= 'Z') - { + if (c >= 'A' && c <= 'Z') { ctx.fastUncheckedConsumeChars(1) escapeChar = (c - 'A' + 1).toChar } @@ -476,41 +402,34 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In } else return false case 'A' => //ACK - if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'C' && ctx.input(ctx.offset + 2) == 'K') - { + if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'C' && ctx.input(ctx.offset + 2) == 'K') { ctx.fastUncheckedConsumeChars(3) escapeChar = '\u0006' } else return false case 'B' => //BS BEL - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u0008' } - else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'E' && ctx.input(ctx.offset + 2) == 'L') - { + else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'E' && ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3) escapeChar = '\u0007' } else return false case 'C' => //CR CAN - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'R') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'R') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u000d' } - else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'N') - { + else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3) escapeChar = '\u0018' } else return false case 'D' => //DC1 DC2 DC3 DC4 DEL DLE - if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match - { - case 'C' => (ctx.input(ctx.offset + 2): @switch) match - { + if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { + case 'C' => (ctx.input(ctx.offset + 2): @switch) match { case '1' => ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0011' case '2' => ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0012' case '3' => ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0013' @@ -527,13 +446,11 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In } else return false case 'E' => //EM ETX ETB ESC EOT ENQ - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'M') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'M') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u0019' } - else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match - { + else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'N' => if (ctx.input(ctx.offset + 2) == 'Q') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0005' } else return false @@ -551,75 +468,63 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In } else return false case 'F' => //FF FS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u000c' } - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') - { + else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u001c' } else return false case 'G' => //GS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u001d' } else return false case 'H' => //HT - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u0009' } else return false case 'L' => //LF - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\n' } else return false case 'N' => //NUL NAK - if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'U' && ctx.input(ctx.offset + 2) == 'L') - { + if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'U' && ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3) escapeChar = '\u0000' } - else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'K') - { + else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'K') { ctx.fastUncheckedConsumeChars(3) escapeChar = '\u0015' } else return false case 'R' => //RS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u001e' } else return false case 'S' => //SO SI SP SOH STX SYN SUB - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'O') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'O') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u000e' } - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'I') - { + else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'I') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u000f' } - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'P') - { + else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'P') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u0020' } - else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match - { + else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'O' => if (ctx.input(ctx.offset + 2) == 'H') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0001' } else return false @@ -636,15 +541,13 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In } else return false case 'U' => //US - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u001f' } else return false case 'V' => //VT - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') - { + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { ctx.fastUncheckedConsumeChars(2) escapeChar = '\u000b' } From 2d5af0742993900baae6e9163f44a91a9436ffa5 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 22:38:33 +0000 Subject: [PATCH 13/34] Added radix, improving the complexity of TokenMaxOp by a factor of N and removed a loop --- src/main/scala/parsley/internal/Radix.scala | 71 +++++++++++++++++++ .../internal/instructions/TokenInstrs.scala | 35 +++++---- 2 files changed, 88 insertions(+), 18 deletions(-) create mode 100644 src/main/scala/parsley/internal/Radix.scala diff --git a/src/main/scala/parsley/internal/Radix.scala b/src/main/scala/parsley/internal/Radix.scala new file mode 100644 index 000000000..15e3df670 --- /dev/null +++ b/src/main/scala/parsley/internal/Radix.scala @@ -0,0 +1,71 @@ +package parsley.internal + +import scala.collection.mutable +import scala.language.implicitConversions + +class Radix[A] { + import Radix.Entry + private var x = Option.empty[A] + private val m = mutable.Map.empty[Char, Entry[A]] + + def get(key: String): Option[A] = { + if (key.isEmpty) x + else for + { + e <- m.get(key.head) + if key.startsWith(e.prefix) + v <- e.radix.get(key.drop(e.prefix.length)) + } yield v + } + + def isEmpty: Boolean = x.isEmpty && m.isEmpty + def nonEmpty: Boolean = !isEmpty + + def suffixes(c: Char): Radix[A] = m.get(c) match { + case Some(e) => + // We have to form a new root + if (e.prefix.length > 1) Radix(new Entry(e.prefix.tail, e.radix)) + else e.radix + case None => Radix.empty + } + + def contains(key: String): Boolean = get(key).nonEmpty + def apply(key: String): A = get(key).getOrElse(throw new NoSuchElementException(key)) + + def update(key: String, value: A): Unit = + if (key.isEmpty) x = Some(value) + else { + val e = m.getOrElseUpdate(key.head, new Entry(key, Radix.empty[A])) + if (key.startsWith(e.prefix)) e.radix(key.drop(e.prefix.length)) = value + else { + // Need to split the tree: find their common prefix first + val common = key.view.zip(e.prefix).takeWhile(Function.tupled(_ == _)).map(_._1).mkString + e.dropInPlace(common.length) + val radix = Radix(e) + // Continue inserting the key + radix(key.drop(common.length)) = value + // Insert our new entry + m(common.head) = new Entry(common, radix) + } + } +} + +object Radix { + def empty[A]: Radix[A] = new Radix + + private def apply[A](e: Entry[A]): Radix[A] = { + val radix = empty[A] + radix.m(e.prefix.head) = e + radix + } + + def apply[A](xs: Iterable[String]): Radix[Unit] = { + val r = Radix.empty[Unit] + for (x <- xs) r(x) = () + r + } + + private class Entry[A](var prefix: String, val radix: Radix[A]) { + def dropInPlace(n: Int) = prefix = prefix.drop(n) + } +} \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index e349766df..2d619740b 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -2,7 +2,7 @@ package parsley.internal.instructions import parsley.internal.deepembedding.Sign.{SignType, IntType, DoubleType} import parsley.TokenParser.TokenSet -import parsley.internal.UnsafeOption +import parsley.internal.{Radix, UnsafeOption} import scala.annotation.{switch, tailrec} @@ -755,31 +755,30 @@ private [internal] final class TokenKeyword(keyword: String, letter: TokenSet, c private [internal] final class TokenOperator_(operator: String, letter: TokenSet, expected: UnsafeOption[String]) extends TokenSpecificNoTrailLetter(operator, letter, true, expected) -// This can be combined into the above private [internal] class TokenMaxOp(operator: String, _ops: Set[String], expected: UnsafeOption[String]) extends TokenSpecific(operator, true, expected) { - // TODO: We want a Trie backed map here, not whatever this is - private val ops = for (op <- _ops.toList if op.length > operator.length && op.startsWith(operator)) yield op.substring(operator.length) - - override def postprocess(ctx: Context, _i: Int): Unit = { - var i = _i - var ops = this.ops - while (i < ctx.inputsz && ops.nonEmpty) { - val c = ctx.input(i) - ops = for (op <- ops if op.charAt(0) == c) yield { - val op_ = op.substring(1) - if (op_.isEmpty) { + private val ops = Radix(_ops.collect { + case op if op.length > operator.length && op.startsWith(operator) => op.substring(operator.length) + }) + + override def postprocess(ctx: Context, i: Int): Unit = { + @tailrec def go(i: Int, ops: Radix[Unit]): Unit = { + if (i < ctx.inputsz && ops.nonEmpty) { + val ops_ = ops.suffixes(ctx.input(i)) + if (ops_.contains("")) { ctx.fail(expectedEnd) ctx.restoreState() - return } - op_ + else go(i + 1, ops_) + } + else { + ctx.states = ctx.states.tail + ctx.pushAndContinue(()) } - i += 1 } - ctx.states = ctx.states.tail - ctx.pushAndContinue(()) + go(i, ops) } + // $COVERAGE-OFF$ override def toString: String = s"TokenMaxOp(${operator})" // $COVERAGE-ON$ From f22f1ea00ca3a88b3eae8e5a543082af848c81ce Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Fri, 1 Jan 2021 22:50:08 +0000 Subject: [PATCH 14/34] Refactored TokenMaxOp for the last time --- src/main/scala/parsley/internal/Radix.scala | 4 +-- .../internal/instructions/TokenInstrs.scala | 30 +++++++++---------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/main/scala/parsley/internal/Radix.scala b/src/main/scala/parsley/internal/Radix.scala index 15e3df670..2971fb548 100644 --- a/src/main/scala/parsley/internal/Radix.scala +++ b/src/main/scala/parsley/internal/Radix.scala @@ -1,10 +1,10 @@ package parsley.internal +import Radix.Entry import scala.collection.mutable import scala.language.implicitConversions class Radix[A] { - import Radix.Entry private var x = Option.empty[A] private val m = mutable.Map.empty[Char, Entry[A]] @@ -66,6 +66,6 @@ object Radix { } private class Entry[A](var prefix: String, val radix: Radix[A]) { - def dropInPlace(n: Int) = prefix = prefix.drop(n) + def dropInPlace(n: Int): Unit = prefix = prefix.drop(n) } } \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 2d619740b..d54896aab 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -755,30 +755,28 @@ private [internal] final class TokenKeyword(keyword: String, letter: TokenSet, c private [internal] final class TokenOperator_(operator: String, letter: TokenSet, expected: UnsafeOption[String]) extends TokenSpecificNoTrailLetter(operator, letter, true, expected) -private [internal] class TokenMaxOp(operator: String, _ops: Set[String], expected: UnsafeOption[String]) +private [internal] final class TokenMaxOp(operator: String, _ops: Set[String], expected: UnsafeOption[String]) extends TokenSpecific(operator, true, expected) { private val ops = Radix(_ops.collect { case op if op.length > operator.length && op.startsWith(operator) => op.substring(operator.length) }) - override def postprocess(ctx: Context, i: Int): Unit = { - @tailrec def go(i: Int, ops: Radix[Unit]): Unit = { - if (i < ctx.inputsz && ops.nonEmpty) { - val ops_ = ops.suffixes(ctx.input(i)) - if (ops_.contains("")) { - ctx.fail(expectedEnd) - ctx.restoreState() - } - else go(i + 1, ops_) - } - else { - ctx.states = ctx.states.tail - ctx.pushAndContinue(()) - } + @tailrec private def go(ctx: Context, i: Int, ops: Radix[Unit]): Unit = { + lazy val ops_ = ops.suffixes(ctx.input(i)) + val possibleOpsRemain = i < ctx.inputsz && ops.nonEmpty + if (possibleOpsRemain && ops_.contains("")) { + ctx.fail(expectedEnd) + ctx.restoreState() + } + else if (possibleOpsRemain) go(ctx, i + 1, ops_) + else { + ctx.states = ctx.states.tail + ctx.pushAndContinue(()) } - go(i, ops) } + override def postprocess(ctx: Context, i: Int): Unit = go(ctx, i, ops) + // $COVERAGE-OFF$ override def toString: String = s"TokenMaxOp(${operator})" // $COVERAGE-ON$ From af8c7e054b56ad1b0b4076bf99ecd7bd7c052a88 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 17:44:40 +0000 Subject: [PATCH 15/34] Optimised some of the instruction generation --- .gitignore | 2 +- src/main/scala/parsley/Token.scala | 13 +- .../deepembedding/PrimitiveEmbedding.scala | 7 +- .../internal/instructions/TokenInstrs.scala | 229 +++++++++--------- 4 files changed, 129 insertions(+), 122 deletions(-) diff --git a/.gitignore b/.gitignore index 20c8de0a8..eac6f7a5d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,6 @@ target/ .cache-main .classpath .project +.jvmopts *.class *.log -parsley*.jar diff --git a/src/main/scala/parsley/Token.scala b/src/main/scala/parsley/Token.scala index 3652bd5de..cd4a447db 100644 --- a/src/main/scala/parsley/Token.scala +++ b/src/main/scala/parsley/Token.scala @@ -46,7 +46,9 @@ final case class LanguageDef(commentStart: String, keywords: Set[String], operators: Set[String], caseSensitive: Boolean, - space: Impl) + space: Impl) { + private [parsley] val supportsComments = commentStart.nonEmpty || commentEnd.nonEmpty || commentLine.nonEmpty +} object LanguageDef { val plain = LanguageDef("", "", "", false, NotRequired, NotRequired, NotRequired, NotRequired, Set.empty, Set.empty, true, NotRequired) @@ -213,6 +215,7 @@ final class TokenParser(lang: LanguageDef) { case BitSetImpl(ws) => new Parsley(new deepembedding.StringLiteral(ws)) case Predicate(ws) => new Parsley(new deepembedding.StringLiteral(ws)) + case NotRequired => new Parsley(new deepembedding.StringLiteral(_ => false)) case _ => between('"' ? "string", '"' ? "end of string", many(stringChar)) <#> (_.flatten.mkString) } @@ -340,14 +343,18 @@ final class TokenParser(lang: LanguageDef) new Parsley(new deepembedding.WhiteSpace(ws, lang.commentStart, lang.commentEnd, lang.commentLine, lang.nestedComments)) case Predicate(ws) => new Parsley(new deepembedding.WhiteSpace(ws, lang.commentStart, lang.commentEnd, lang.commentLine, lang.nestedComments)) - case Parser(space_) => + case Parser(space_) if lang.supportsComments => skipMany(new Parsley(new deepembedding.Comment(lang.commentStart, lang.commentEnd, lang.commentLine, lang.nestedComments)) <\> space_) + case Parser(space_) => skipMany(space_) case NotRequired => skipComments } /**Parses any comments and skips them, this includes both line comments and block comments.*/ lazy val skipComments: Parsley[Unit] = { - new Parsley(new deepembedding.SkipComments(lang.commentStart, lang.commentEnd, lang.commentLine, lang.nestedComments)) + if (!lang.supportsComments) unit + else { + new Parsley(new deepembedding.SkipComments(lang.commentStart, lang.commentEnd, lang.commentLine, lang.nestedComments)) + } } private def enclosing[A](p: =>Parsley[A], open: Char, close: Char, singular: String, plural: String) = diff --git a/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala index 78daaae2a..cb00f7cc5 100644 --- a/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/PrimitiveEmbedding.scala @@ -27,7 +27,12 @@ private [deepembedding] sealed abstract class ScopedUnary[A, B](_p: =>Parsley[A] private [parsley] final class Attempt[A](_p: =>Parsley[A]) extends ScopedUnary[A, A](_p, "attempt", _ => Attempt.empty, instructions.Attempt) private [parsley] final class Look[A](_p: =>Parsley[A]) extends ScopedUnary[A, A](_p, "lookAhead", _ => Look.empty, instructions.Look) private [parsley] final class NotFollowedBy[A](_p: =>Parsley[A], val expected: UnsafeOption[String] = null) - extends ScopedUnary[A, Unit](_p, "notFollowedBy", NotFollowedBy.empty, new instructions.NotFollowedBy(expected)) + extends ScopedUnary[A, Unit](_p, "notFollowedBy", NotFollowedBy.empty, new instructions.NotFollowedBy(expected)) { + override def optimise: Parsley[Unit] = p match { + case z: MZero => new Pure(()) + case _ => this + } +} private [parsley] final class Fail(private [Fail] val msg: String, val expected: UnsafeOption[String] = null) extends SingletonExpect[Nothing](s"fail($msg)", new Fail(msg, _), new instructions.Fail(msg, expected)) with MZero diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index d54896aab..ee26c8427 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -321,40 +321,40 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten // $COVERAGE-ON$ } +object TokenEscape { + private [instructions] sealed trait Escape + private [instructions] case class EscapeChar(escapeChar: Char) extends Escape + private [instructions] case object BadCode extends Escape + private [instructions] case object NoParse extends Escape +} private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with Stateful with NumericReader { private [this] final val expected = if (_expected == null) "escape code" else _expected protected var escapeChar: Char = _ protected var badCode: Boolean = _ - override def apply(ctx: Context): Unit = { - badCode = false - if (escape(ctx)) ctx.pushAndContinue(escapeChar) - else { - ctx.fail(expected) - if (badCode) ctx.raw ::= "invalid escape sequence" - } + override def apply(ctx: Context): Unit = escape(ctx) match { + case TokenEscape.EscapeChar(escapeChar) =>ctx.pushAndContinue(escapeChar) + case TokenEscape.BadCode => ctx.failWithMessage(expected, msg = "invalid escape sequence") + case TokenEscape.NoParse => ctx.fail(expected) } - protected final def escape(ctx: Context): Boolean = { + protected final def escape(ctx: Context): TokenEscape.Escape = { if (ctx.moreInput) { (ctx.nextChar: @switch) match { - case 'a' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\u0007' - case 'b' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\b' - case 'f' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\u000c' - case 'n' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\n' - case 'r' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\r' - case 't' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\t' - case 'v' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\u000b' - case '\\' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\\' - case '\"' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\"' - case '\'' => ctx.fastUncheckedConsumeChars(1); escapeChar = '\'' + case 'a' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\u0007') + case 'b' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\b') + case 'f' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\u000c') + case 'n' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\n') + case 'r' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\r') + case 't' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\t') + case 'v' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\u000b') + case '\\' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\\') + case '\"' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\"') + case '\'' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\'') case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => ctx.fastUncheckedConsumeChars(1) val escapeCode = decimal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) escapeChar = escapeCode.toChar - else { - badCode = true - return false - } + if (escapeCode <= 0x10FFFF) TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode case 'x' => ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput) { @@ -364,16 +364,12 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => ctx.fastUncheckedConsumeChars(1) val escapeCode = hexadecimal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) escapeChar = escapeCode.toChar - else - { - badCode = true - return false - } - case _ => return false + if (escapeCode <= 0x10FFFF) TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode + case _ => TokenEscape.NoParse } } - else return false + else TokenEscape.NoParse case 'o' => ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput) { @@ -381,182 +377,178 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In if (d >= '0' && d <= '7') { ctx.fastUncheckedConsumeChars(1) val escapeCode = octal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) escapeChar = escapeCode.toChar - else { - badCode = true - return false - } + if (escapeCode <= 0x10FFFF) TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode } - else return false + else TokenEscape.NoParse } - else return false + else TokenEscape.NoParse case '^' => ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput) { val c = ctx.nextChar if (c >= 'A' && c <= 'Z') { ctx.fastUncheckedConsumeChars(1) - escapeChar = (c - 'A' + 1).toChar + TokenEscape.EscapeChar((c - 'A' + 1).toChar) } - else return false + else TokenEscape.NoParse } - else return false + else TokenEscape.NoParse case 'A' => //ACK if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'C' && ctx.input(ctx.offset + 2) == 'K') { ctx.fastUncheckedConsumeChars(3) - escapeChar = '\u0006' + TokenEscape.EscapeChar('\u0006') } - else return false + else TokenEscape.NoParse case 'B' => //BS BEL if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u0008' + TokenEscape.EscapeChar('\u0008') } else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'E' && ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3) - escapeChar = '\u0007' + TokenEscape.EscapeChar('\u0007') } - else return false + else TokenEscape.NoParse case 'C' => //CR CAN if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'R') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u000d' + TokenEscape.EscapeChar('\u000d') } else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3) - escapeChar = '\u0018' + TokenEscape.EscapeChar('\u0018') } - else return false + else TokenEscape.NoParse case 'D' => //DC1 DC2 DC3 DC4 DEL DLE if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'C' => (ctx.input(ctx.offset + 2): @switch) match { - case '1' => ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0011' - case '2' => ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0012' - case '3' => ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0013' - case '4' => ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0014' - case _ => return false + case '1' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0011') + case '2' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0012') + case '3' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0013') + case '4' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0014') + case _ => TokenEscape.NoParse } case 'E' => - if (ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u001f' } - else return false + if (ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u001f') } + else TokenEscape.NoParse case 'L' => - if (ctx.input(ctx.offset + 2) == 'E') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0010' } - else return false - case _ => return false + if (ctx.input(ctx.offset + 2) == 'E') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0010') } + else TokenEscape.NoParse + case _ => TokenEscape.NoParse } - else return false + else TokenEscape.NoParse case 'E' => //EM ETX ETB ESC EOT ENQ if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'M') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u0019' + TokenEscape.EscapeChar('\u0019') } else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'N' => - if (ctx.input(ctx.offset + 2) == 'Q') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0005' } - else return false + if (ctx.input(ctx.offset + 2) == 'Q') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0005') } + else TokenEscape.NoParse case 'O' => - if (ctx.input(ctx.offset + 2) == 'T') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0004' } - else return false + if (ctx.input(ctx.offset + 2) == 'T') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0004') } + else TokenEscape.NoParse case 'S' => - if (ctx.input(ctx.offset + 2) == 'C') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u001b' } - else return false + if (ctx.input(ctx.offset + 2) == 'C') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u001b') } + else TokenEscape.NoParse case 'T' => - if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0003' } - else if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0017' } - else return false - case _ => return false + if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0003') } + else if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0017') } + else TokenEscape.NoParse + case _ => TokenEscape.NoParse } - else return false + else TokenEscape.NoParse case 'F' => //FF FS if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u000c' + TokenEscape.EscapeChar('\u000c') } else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u001c' + TokenEscape.EscapeChar('\u001c') } - else return false + else TokenEscape.NoParse case 'G' => //GS if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u001d' + TokenEscape.EscapeChar('\u001d') } - else return false + else TokenEscape.NoParse case 'H' => //HT if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u0009' + TokenEscape.EscapeChar('\u0009') } - else return false + else TokenEscape.NoParse case 'L' => //LF if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\n' + TokenEscape.EscapeChar('\n') } - else return false + else TokenEscape.NoParse case 'N' => //NUL NAK if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'U' && ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3) - escapeChar = '\u0000' + TokenEscape.EscapeChar('\u0000') } else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'K') { ctx.fastUncheckedConsumeChars(3) - escapeChar = '\u0015' + TokenEscape.EscapeChar('\u0015') } - else return false + else TokenEscape.NoParse case 'R' => //RS if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u001e' + TokenEscape.EscapeChar('\u001e') } - else return false + else TokenEscape.NoParse case 'S' => //SO SI SP SOH STX SYN SUB if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'O') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u000e' + TokenEscape.EscapeChar('\u000e') } else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'I') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u000f' + TokenEscape.EscapeChar('\u000f') } else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'P') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u0020' + TokenEscape.EscapeChar('\u0020') } else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'O' => - if (ctx.input(ctx.offset + 2) == 'H') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0001' } - else return false + if (ctx.input(ctx.offset + 2) == 'H') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0001') } + else TokenEscape.NoParse case 'T' => - if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0002' } - else return false + if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0002') } + else TokenEscape.NoParse case 'Y' => - if (ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u0016' } - else return false + if (ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0016') } + else TokenEscape.NoParse case 'U' => - if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); escapeChar = '\u001a' } - else return false - case _ => return false + if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u001a') } + else TokenEscape.NoParse + case _ => TokenEscape.NoParse } - else return false + else TokenEscape.NoParse case 'U' => //US if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u001f' + TokenEscape.EscapeChar('\u001f') } - else return false + else TokenEscape.NoParse case 'V' => //VT if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { ctx.fastUncheckedConsumeChars(2) - escapeChar = '\u000b' + TokenEscape.EscapeChar('\u000b') } - else return false - case _ => return false + else TokenEscape.NoParse + case _ => TokenEscape.NoParse } - true } - else false + else TokenEscape.NoParse } // $COVERAGE-OFF$ @@ -624,23 +616,26 @@ private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption private val expectedGap = if (_expected == null) "end of string gap" else _expected override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { - if (spaces(ctx) != 0) - { + if (spaces(ctx) != 0) { val completedGap = ctx.moreInput && ctx.nextChar == '\\' if (completedGap) ctx.fastUncheckedConsumeChars(1) else ctx.fail(expectedGap) completedGap } - else { - val isDeadChar = ctx.moreInput && ctx.nextChar == '&' - lazy val isEscapeChar = escape(ctx) - if (isDeadChar) ctx.fastUncheckedConsumeChars(1) - else if (isEscapeChar) builder += escapeChar - else { + else if (ctx.moreInput && ctx.nextChar == '&') { + ctx.fastUncheckedConsumeChars(1) + true + } + else escape(ctx) match { + case TokenEscape.EscapeChar(c) => + builder += c + true + case TokenEscape.BadCode => + ctx.failWithMessage(expectedEscape, "invalid escape sequence") + false + case TokenEscape.NoParse => ctx.fail(expectedEscape) - if (badCode) ctx.raw ::= "invalid escape sequence" - } - isDeadChar || isEscapeChar + false } } From 124cc83bd8a9e8a4cec452879363951d21fd5482 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 18:00:25 +0000 Subject: [PATCH 16/34] Slight cognitive complexity reductions --- .../internal/instructions/TokenInstrs.scala | 144 +++++++++--------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index ee26c8427..71f9b9545 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -171,13 +171,10 @@ private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[S private [instructions] sealed trait NumericReader { private final def subDecimal(base: Int, maxDigit: Char, ctx: Context): Int => Int = { @tailrec def go(x: Int): Int = { - if (ctx.moreInput) { - val d = ctx.nextChar - if (d >= '0' && d <= maxDigit) { - ctx.fastUncheckedConsumeChars(1) - go(x * base + d.asDigit) - } - else x + if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= maxDigit) { + val d = ctx.nextChar.asDigit + ctx.fastUncheckedConsumeChars(1) + go(x * base + d) } else x } @@ -340,20 +337,20 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In protected final def escape(ctx: Context): TokenEscape.Escape = { if (ctx.moreInput) { (ctx.nextChar: @switch) match { - case 'a' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\u0007') - case 'b' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\b') - case 'f' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\u000c') - case 'n' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\n') - case 'r' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\r') - case 't' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\t') - case 'v' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\u000b') - case '\\' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\\') - case '\"' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\"') - case '\'' => ctx.fastUncheckedConsumeChars(1); TokenEscape.EscapeChar('\'') + case 'a' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\u0007') + case 'b' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\b') + case 'f' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\u000c') + case 'n' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\n') + case 'r' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\r') + case 't' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\t') + case 'v' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\u000b') + case '\\' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\\') + case '\"' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\"') + case '\'' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\'') case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => ctx.fastUncheckedConsumeChars(1) val escapeCode = decimal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) TokenEscape.EscapeChar(escapeCode.toChar) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) else TokenEscape.BadCode case 'x' => ctx.fastUncheckedConsumeChars(1) @@ -364,7 +361,7 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => ctx.fastUncheckedConsumeChars(1) val escapeCode = hexadecimal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) TokenEscape.EscapeChar(escapeCode.toChar) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) else TokenEscape.BadCode case _ => TokenEscape.NoParse } @@ -377,7 +374,7 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In if (d >= '0' && d <= '7') { ctx.fastUncheckedConsumeChars(1) val escapeCode = octal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) TokenEscape.EscapeChar(escapeCode.toChar) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) else TokenEscape.BadCode } else TokenEscape.NoParse @@ -389,7 +386,7 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In val c = ctx.nextChar if (c >= 'A' && c <= 'Z') { ctx.fastUncheckedConsumeChars(1) - TokenEscape.EscapeChar((c - 'A' + 1).toChar) + new TokenEscape.EscapeChar((c - 'A' + 1).toChar) } else TokenEscape.NoParse } @@ -397,43 +394,43 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case 'A' => //ACK if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'C' && ctx.input(ctx.offset + 2) == 'K') { ctx.fastUncheckedConsumeChars(3) - TokenEscape.EscapeChar('\u0006') + new TokenEscape.EscapeChar('\u0006') } else TokenEscape.NoParse case 'B' => //BS BEL if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u0008') + new TokenEscape.EscapeChar('\u0008') } else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'E' && ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3) - TokenEscape.EscapeChar('\u0007') + new TokenEscape.EscapeChar('\u0007') } else TokenEscape.NoParse case 'C' => //CR CAN if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'R') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u000d') + new TokenEscape.EscapeChar('\u000d') } else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3) - TokenEscape.EscapeChar('\u0018') + new TokenEscape.EscapeChar('\u0018') } else TokenEscape.NoParse case 'D' => //DC1 DC2 DC3 DC4 DEL DLE if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'C' => (ctx.input(ctx.offset + 2): @switch) match { - case '1' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0011') - case '2' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0012') - case '3' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0013') - case '4' => ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0014') + case '1' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0011') + case '2' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0012') + case '3' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0013') + case '4' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0014') case _ => TokenEscape.NoParse } case 'E' => - if (ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u001f') } + if (ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u001f') } else TokenEscape.NoParse case 'L' => - if (ctx.input(ctx.offset + 2) == 'E') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0010') } + if (ctx.input(ctx.offset + 2) == 'E') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0010') } else TokenEscape.NoParse case _ => TokenEscape.NoParse } @@ -441,21 +438,21 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case 'E' => //EM ETX ETB ESC EOT ENQ if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'M') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u0019') + new TokenEscape.EscapeChar('\u0019') } else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'N' => - if (ctx.input(ctx.offset + 2) == 'Q') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0005') } + if (ctx.input(ctx.offset + 2) == 'Q') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0005') } else TokenEscape.NoParse case 'O' => - if (ctx.input(ctx.offset + 2) == 'T') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0004') } + if (ctx.input(ctx.offset + 2) == 'T') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0004') } else TokenEscape.NoParse case 'S' => - if (ctx.input(ctx.offset + 2) == 'C') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u001b') } + if (ctx.input(ctx.offset + 2) == 'C') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u001b') } else TokenEscape.NoParse case 'T' => - if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0003') } - else if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0017') } + if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0003') } + else if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0017') } else TokenEscape.NoParse case _ => TokenEscape.NoParse } @@ -463,72 +460,72 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case 'F' => //FF FS if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u000c') + new TokenEscape.EscapeChar('\u000c') } else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u001c') + new TokenEscape.EscapeChar('\u001c') } else TokenEscape.NoParse case 'G' => //GS if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u001d') + new TokenEscape.EscapeChar('\u001d') } else TokenEscape.NoParse case 'H' => //HT if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u0009') + new TokenEscape.EscapeChar('\u0009') } else TokenEscape.NoParse case 'L' => //LF if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\n') + new TokenEscape.EscapeChar('\n') } else TokenEscape.NoParse case 'N' => //NUL NAK if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'U' && ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3) - TokenEscape.EscapeChar('\u0000') + new TokenEscape.EscapeChar('\u0000') } else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'K') { ctx.fastUncheckedConsumeChars(3) - TokenEscape.EscapeChar('\u0015') + new TokenEscape.EscapeChar('\u0015') } else TokenEscape.NoParse case 'R' => //RS if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u001e') + new TokenEscape.EscapeChar('\u001e') } else TokenEscape.NoParse case 'S' => //SO SI SP SOH STX SYN SUB if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'O') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u000e') + new TokenEscape.EscapeChar('\u000e') } else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'I') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u000f') + new TokenEscape.EscapeChar('\u000f') } else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'P') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u0020') + new TokenEscape.EscapeChar('\u0020') } else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { case 'O' => - if (ctx.input(ctx.offset + 2) == 'H') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0001') } + if (ctx.input(ctx.offset + 2) == 'H') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0001') } else TokenEscape.NoParse case 'T' => - if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0002') } + if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0002') } else TokenEscape.NoParse case 'Y' => - if (ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u0016') } + if (ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0016') } else TokenEscape.NoParse case 'U' => - if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); TokenEscape.EscapeChar('\u001a') } + if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u001a') } else TokenEscape.NoParse case _ => TokenEscape.NoParse } @@ -536,13 +533,13 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case 'U' => //US if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u001f') + new TokenEscape.EscapeChar('\u001f') } else TokenEscape.NoParse case 'V' => //VT if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { ctx.fastUncheckedConsumeChars(2) - TokenEscape.EscapeChar('\u000b') + new TokenEscape.EscapeChar('\u000b') } else TokenEscape.NoParse case _ => TokenEscape.NoParse @@ -615,13 +612,15 @@ private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption private val expectedEscape = if (_expected == null) "escape code" else _expected private val expectedGap = if (_expected == null) "end of string gap" else _expected + private def readGap(ctx: Context): Boolean = { + val completedGap = ctx.moreInput && ctx.nextChar == '\\' + if (completedGap) ctx.fastUncheckedConsumeChars(1) + else ctx.fail(expectedGap) + completedGap + } + override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { - if (spaces(ctx) != 0) { - val completedGap = ctx.moreInput && ctx.nextChar == '\\' - if (completedGap) ctx.fastUncheckedConsumeChars(1) - else ctx.fail(expectedGap) - completedGap - } + if (spaces(ctx) != 0) readGap(ctx) else if (ctx.moreInput && ctx.nextChar == '&') { ctx.fastUncheckedConsumeChars(1) true @@ -667,23 +666,24 @@ private [instructions] abstract class TokenLexi(name: String, illegalName: Strin else ctx.fail(expected) } + private def ensureLegal(ctx: Context, tok: String) = { + if (illegal(tok)) { + ctx.offset -= tok.length + ctx.unexpectedFail(expected = expected, unexpected = s"$illegalName $tok") + } + else { + ctx.col += tok.length + ctx.pushAndContinue(tok) + } + } + @tailrec private final def restOfToken(ctx: Context, tok: StringBuilder): Unit = { if (ctx.moreInput && letter(ctx.nextChar)) { tok += ctx.nextChar ctx.offset += 1 restOfToken(ctx, tok) } - else { - val tokStr = tok.toString - if (illegal(tokStr)) { - ctx.offset -= tokStr.length - ctx.unexpectedFail(expected = expected, unexpected = s"$illegalName $tokStr") - } - else { - ctx.col += tokStr.length - ctx.pushAndContinue(tokStr) - } - } + else ensureLegal(ctx, tok.toString) } // $COVERAGE-OFF$ From 12bc1476594b95ba4d2064c3a11b24e899c7a3f9 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 18:12:01 +0000 Subject: [PATCH 17/34] Improved TokenSpecific just a little more --- .../internal/instructions/TokenInstrs.scala | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 71f9b9545..d765e8b4e 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -318,16 +318,8 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten // $COVERAGE-ON$ } -object TokenEscape { - private [instructions] sealed trait Escape - private [instructions] case class EscapeChar(escapeChar: Char) extends Escape - private [instructions] case object BadCode extends Escape - private [instructions] case object NoParse extends Escape -} -private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with Stateful with NumericReader { +private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with NumericReader { private [this] final val expected = if (_expected == null) "escape code" else _expected - protected var escapeChar: Char = _ - protected var badCode: Boolean = _ override def apply(ctx: Context): Unit = escape(ctx) match { case TokenEscape.EscapeChar(escapeChar) =>ctx.pushAndContinue(escapeChar) case TokenEscape.BadCode => ctx.failWithMessage(expected, msg = "invalid escape sequence") @@ -551,7 +543,12 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In // $COVERAGE-OFF$ override def toString: String = "TokenEscape" // $COVERAGE-ON$ - override def copy: TokenEscape = new TokenEscape(expected) +} +object TokenEscape { + private [instructions] sealed trait Escape + private [instructions] case class EscapeChar(escapeChar: Char) extends Escape + private [instructions] case object BadCode extends Escape + private [instructions] case object NoParse extends Escape } private [instructions] sealed trait TokenStringLike extends Instr { @@ -649,7 +646,6 @@ private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption // $COVERAGE-OFF$ override def toString: String = "TokenString" // $COVERAGE-ON$ - override def copy: TokenString = new TokenString(ws, _expected) } private [instructions] abstract class TokenLexi(name: String, illegalName: String) @@ -707,10 +703,14 @@ private [instructions] abstract class TokenSpecific(_specific: String, caseSensi private final val strsz = specific.length protected def postprocess(ctx: Context, i: Int): Unit + val readCharCaseHandled = { + if (caseSensitive) (ctx: Context, i: Int) => ctx.input(i) + else (ctx: Context, i: Int) => ctx.input(i).toLower + } + @tailrec final private def readSpecific(ctx: Context, i: Int, j: Int): Unit = { if (j < strsz) { - val c = if (caseSensitive) ctx.input(i) else ctx.input(i).toLower - if (c != specific(j)) ctx.fail(expected) + if (readCharCaseHandled(ctx, i) != specific(j)) ctx.fail(expected) else readSpecific(ctx, i + 1, j + 1) } else { From 27f7819001edef79a8baf93084281b40f110f780 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 18:14:54 +0000 Subject: [PATCH 18/34] Improved it for the last time --- .../scala/parsley/internal/instructions/TokenInstrs.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index d765e8b4e..5baa5da0e 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -709,10 +709,8 @@ private [instructions] abstract class TokenSpecific(_specific: String, caseSensi } @tailrec final private def readSpecific(ctx: Context, i: Int, j: Int): Unit = { - if (j < strsz) { - if (readCharCaseHandled(ctx, i) != specific(j)) ctx.fail(expected) - else readSpecific(ctx, i + 1, j + 1) - } + if (j < strsz && readCharCaseHandled(ctx, i) == specific(j)) readSpecific(ctx, i + 1, j + 1) + else if (j < strsz) ctx.fail(expected) else { ctx.saveState() ctx.fastUncheckedConsumeChars(strsz) From 80453f006c1d2a386ba94ce7d41710b73c23c135 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 19:24:03 +0000 Subject: [PATCH 19/34] Massively reduced size of esc --- .../internal/instructions/TokenInstrs.scala | 175 ++++++------------ 1 file changed, 53 insertions(+), 122 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 5baa5da0e..083f0cbc8 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -326,19 +326,24 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case TokenEscape.NoParse => ctx.fail(expected) } + private final def consumeAndReturn(ctx: Context, n: Int, c: Char): TokenEscape.Escape = { + ctx.fastUncheckedConsumeChars(n) + new TokenEscape.EscapeChar(c) + } + protected final def escape(ctx: Context): TokenEscape.Escape = { if (ctx.moreInput) { (ctx.nextChar: @switch) match { - case 'a' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\u0007') - case 'b' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\b') - case 'f' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\u000c') - case 'n' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\n') - case 'r' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\r') - case 't' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\t') - case 'v' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\u000b') - case '\\' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\\') - case '\"' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\"') - case '\'' => ctx.fastUncheckedConsumeChars(1); new TokenEscape.EscapeChar('\'') + case 'a' => consumeAndReturn(ctx, 1, '\u0007') + case 'b' => consumeAndReturn(ctx, 1, '\b') + case 'f' => consumeAndReturn(ctx, 1, '\u000c') + case 'n' => consumeAndReturn(ctx, 1, '\n') + case 'r' => consumeAndReturn(ctx, 1, '\r') + case 't' => consumeAndReturn(ctx, 1, '\t') + case 'v' => consumeAndReturn(ctx, 1, '\u000b') + case '\\' => consumeAndReturn(ctx, 1, '\\') + case '\"' => consumeAndReturn(ctx, 1, '\"') + case '\'' => consumeAndReturn(ctx, 1, '\'') case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => ctx.fastUncheckedConsumeChars(1) val escapeCode = decimal(ctx, d.asDigit) @@ -376,163 +381,89 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput) { val c = ctx.nextChar - if (c >= 'A' && c <= 'Z') { - ctx.fastUncheckedConsumeChars(1) - new TokenEscape.EscapeChar((c - 'A' + 1).toChar) - } + if (c >= 'A' && c <= 'Z') consumeAndReturn(ctx, 1, (c - 'A' + 1).toChar) else TokenEscape.NoParse } else TokenEscape.NoParse case 'A' => //ACK - if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'C' && ctx.input(ctx.offset + 2) == 'K') { - ctx.fastUncheckedConsumeChars(3) - new TokenEscape.EscapeChar('\u0006') - } + if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'C' && ctx.input(ctx.offset + 2) == 'K') consumeAndReturn(ctx, 3, '\u0006') else TokenEscape.NoParse case 'B' => //BS BEL - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u0008') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u0008') else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'E' && ctx.input(ctx.offset + 2) == 'L') { - ctx.fastUncheckedConsumeChars(3) - new TokenEscape.EscapeChar('\u0007') + consumeAndReturn(ctx, 3, '\u0007') } else TokenEscape.NoParse case 'C' => //CR CAN - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'R') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u000d') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'R') consumeAndReturn(ctx, 2, '\u000d') else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'N') { - ctx.fastUncheckedConsumeChars(3) - new TokenEscape.EscapeChar('\u0018') + consumeAndReturn(ctx, 3, '\u0018') } else TokenEscape.NoParse case 'D' => //DC1 DC2 DC3 DC4 DEL DLE - if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { + if (ctx.offset + 2 < ctx.inputsz) ctx.input(ctx.offset + 1) match { case 'C' => (ctx.input(ctx.offset + 2): @switch) match { - case '1' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0011') - case '2' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0012') - case '3' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0013') - case '4' => ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0014') + case '1' => consumeAndReturn(ctx, 3, '\u0011') + case '2' => consumeAndReturn(ctx, 3, '\u0012') + case '3' => consumeAndReturn(ctx, 3, '\u0013') + case '4' => consumeAndReturn(ctx, 3, '\u0014') case _ => TokenEscape.NoParse } - case 'E' => - if (ctx.input(ctx.offset + 2) == 'L') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u001f') } - else TokenEscape.NoParse - case 'L' => - if (ctx.input(ctx.offset + 2) == 'E') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0010') } - else TokenEscape.NoParse + case 'E' if ctx.input(ctx.offset + 2) == 'L' => consumeAndReturn(ctx, 3, '\u001f') + case 'L' if ctx.input(ctx.offset + 2) == 'E' => consumeAndReturn(ctx, 3, '\u0010') case _ => TokenEscape.NoParse } else TokenEscape.NoParse case 'E' => //EM ETX ETB ESC EOT ENQ - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'M') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u0019') - } - else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { - case 'N' => - if (ctx.input(ctx.offset + 2) == 'Q') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0005') } - else TokenEscape.NoParse - case 'O' => - if (ctx.input(ctx.offset + 2) == 'T') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0004') } - else TokenEscape.NoParse - case 'S' => - if (ctx.input(ctx.offset + 2) == 'C') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u001b') } - else TokenEscape.NoParse - case 'T' => - if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0003') } - else if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0017') } - else TokenEscape.NoParse + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'M') consumeAndReturn(ctx, 2, '\u0019') + else if (ctx.offset + 2 < ctx.inputsz) ctx.input(ctx.offset + 1) match { + case 'N' if ctx.input(ctx.offset + 2) == 'Q' => consumeAndReturn(ctx, 3, '\u0005') + case 'O' if ctx.input(ctx.offset + 2) == 'T' => consumeAndReturn(ctx, 3, '\u0004') + case 'S' if ctx.input(ctx.offset + 2) == 'C' => consumeAndReturn(ctx, 3, '\u001b') + case 'T' if ctx.input(ctx.offset + 2) == 'X' => consumeAndReturn(ctx, 3, '\u0003') + case 'T' if ctx.input(ctx.offset + 2) == 'B' => consumeAndReturn(ctx, 3, '\u0017') case _ => TokenEscape.NoParse } else TokenEscape.NoParse case 'F' => //FF FS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u000c') - } - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u001c') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') consumeAndReturn(ctx, 2, '\u000c') + else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001c') else TokenEscape.NoParse case 'G' => //GS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u001d') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001d') else TokenEscape.NoParse case 'H' => //HT - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u0009') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') consumeAndReturn(ctx, 2, '\u0009') else TokenEscape.NoParse case 'L' => //LF - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\n') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') consumeAndReturn(ctx, 2, '\n') else TokenEscape.NoParse case 'N' => //NUL NAK - if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'U' && ctx.input(ctx.offset + 2) == 'L') { - ctx.fastUncheckedConsumeChars(3) - new TokenEscape.EscapeChar('\u0000') - } + if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'U' && ctx.input(ctx.offset + 2) == 'L') consumeAndReturn(ctx, 3, '\u0000') else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'K') { - ctx.fastUncheckedConsumeChars(3) - new TokenEscape.EscapeChar('\u0015') + consumeAndReturn(ctx, 3, '\u0015') } else TokenEscape.NoParse case 'R' => //RS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u001e') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001e') else TokenEscape.NoParse case 'S' => //SO SI SP SOH STX SYN SUB - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'O') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u000e') - } - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'I') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u000f') - } - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'P') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u0020') - } - else if (ctx.offset + 2 < ctx.inputsz) (ctx.input(ctx.offset + 1): @switch) match { - case 'O' => - if (ctx.input(ctx.offset + 2) == 'H') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0001') } - else TokenEscape.NoParse - case 'T' => - if (ctx.input(ctx.offset + 2) == 'X') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0002') } - else TokenEscape.NoParse - case 'Y' => - if (ctx.input(ctx.offset + 2) == 'N') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u0016') } - else TokenEscape.NoParse - case 'U' => - if (ctx.input(ctx.offset + 2) == 'B') { ctx.fastUncheckedConsumeChars(3); new TokenEscape.EscapeChar('\u001a') } - else TokenEscape.NoParse + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'O') consumeAndReturn(ctx, 2, '\u000e') + else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'I') consumeAndReturn(ctx, 2, '\u000f') + else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'P') consumeAndReturn(ctx, 2, '\u0020') + else if (ctx.offset + 2 < ctx.inputsz) ctx.input(ctx.offset + 1) match { + case 'O' if ctx.input(ctx.offset + 2) == 'H' => consumeAndReturn(ctx, 3, '\u0001') + case 'T' if ctx.input(ctx.offset + 2) == 'X' => consumeAndReturn(ctx, 3, '\u0002') + case 'Y' if ctx.input(ctx.offset + 2) == 'N' => consumeAndReturn(ctx, 3, '\u0016') + case 'U' if ctx.input(ctx.offset + 2) == 'B' => consumeAndReturn(ctx, 3, '\u001a') case _ => TokenEscape.NoParse } else TokenEscape.NoParse case 'U' => //US - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u001f') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001f') else TokenEscape.NoParse case 'V' => //VT - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') { - ctx.fastUncheckedConsumeChars(2) - new TokenEscape.EscapeChar('\u000b') - } + if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') consumeAndReturn(ctx, 2, '\u000b') else TokenEscape.NoParse case _ => TokenEscape.NoParse } From be319cf6d428a9b4526040419ff3596088ba3fef Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 22:04:27 +0000 Subject: [PATCH 20/34] More refactoring of escape --- .../internal/instructions/TokenInstrs.scala | 111 ++++++++---------- 1 file changed, 48 insertions(+), 63 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 083f0cbc8..ec1afd267 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -331,9 +331,13 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In new TokenEscape.EscapeChar(c) } + private final def lookAhead(ctx: Context, n: Int): Char = ctx.input(ctx.offset + n) + private final def lookAhead(ctx: Context, n: Int, c: Char): Boolean = ctx.offset + n < ctx.inputsz && lookAhead(ctx, n) == c + protected final def escape(ctx: Context): TokenEscape.Escape = { + val threeAvailable = ctx.offset + 2 < ctx.inputsz if (ctx.moreInput) { - (ctx.nextChar: @switch) match { + ctx.nextChar match { case 'a' => consumeAndReturn(ctx, 1, '\u0007') case 'b' => consumeAndReturn(ctx, 1, '\b') case 'f' => consumeAndReturn(ctx, 1, '\u000c') @@ -352,10 +356,10 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case 'x' => ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput) { - (ctx.nextChar: @switch) match { - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' - | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => + ctx.nextChar match { + case d@( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => ctx.fastUncheckedConsumeChars(1) val escapeCode = hexadecimal(ctx, d.asDigit) if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) @@ -385,86 +389,67 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In else TokenEscape.NoParse } else TokenEscape.NoParse - case 'A' => //ACK - if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'C' && ctx.input(ctx.offset + 2) == 'K') consumeAndReturn(ctx, 3, '\u0006') - else TokenEscape.NoParse + case 'A' if threeAvailable && lookAhead(ctx, 1) == 'C' && lookAhead(ctx, 2) == 'K' => consumeAndReturn(ctx, 3, '\u0006') //ACK case 'B' => //BS BEL - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u0008') - else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'E' && ctx.input(ctx.offset + 2) == 'L') { + if (lookAhead(ctx, 1, 'S')) consumeAndReturn(ctx, 2, '\u0008') + else if (lookAhead(ctx, 2, 'L') && lookAhead(ctx, 1) == 'E') { consumeAndReturn(ctx, 3, '\u0007') } else TokenEscape.NoParse case 'C' => //CR CAN - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'R') consumeAndReturn(ctx, 2, '\u000d') - else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'N') { - consumeAndReturn(ctx, 3, '\u0018') - } + if (lookAhead(ctx, 1, 'R')) consumeAndReturn(ctx, 2, '\u000d') + else if (lookAhead(ctx, 2, 'N') && lookAhead(ctx, 1) == 'A') consumeAndReturn(ctx, 3, '\u0018') else TokenEscape.NoParse - case 'D' => //DC1 DC2 DC3 DC4 DEL DLE - if (ctx.offset + 2 < ctx.inputsz) ctx.input(ctx.offset + 1) match { - case 'C' => (ctx.input(ctx.offset + 2): @switch) match { - case '1' => consumeAndReturn(ctx, 3, '\u0011') - case '2' => consumeAndReturn(ctx, 3, '\u0012') - case '3' => consumeAndReturn(ctx, 3, '\u0013') - case '4' => consumeAndReturn(ctx, 3, '\u0014') - case _ => TokenEscape.NoParse - } - case 'E' if ctx.input(ctx.offset + 2) == 'L' => consumeAndReturn(ctx, 3, '\u001f') - case 'L' if ctx.input(ctx.offset + 2) == 'E' => consumeAndReturn(ctx, 3, '\u0010') + case 'D' if threeAvailable => //DC1 DC2 DC3 DC4 DEL DLE + val c = lookAhead(ctx, 2) + lookAhead(ctx, 1) match { + case 'C' if c == '1' => consumeAndReturn(ctx, 3, '\u0011') + case 'C' if c == '2' => consumeAndReturn(ctx, 3, '\u0012') + case 'C' if c == '3' => consumeAndReturn(ctx, 3, '\u0013') + case 'C' if c == '4' => consumeAndReturn(ctx, 3, '\u0014') + case 'E' if c == 'L' => consumeAndReturn(ctx, 3, '\u001f') + case 'L' if c == 'E' => consumeAndReturn(ctx, 3, '\u0010') case _ => TokenEscape.NoParse } - else TokenEscape.NoParse case 'E' => //EM ETX ETB ESC EOT ENQ - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'M') consumeAndReturn(ctx, 2, '\u0019') - else if (ctx.offset + 2 < ctx.inputsz) ctx.input(ctx.offset + 1) match { - case 'N' if ctx.input(ctx.offset + 2) == 'Q' => consumeAndReturn(ctx, 3, '\u0005') - case 'O' if ctx.input(ctx.offset + 2) == 'T' => consumeAndReturn(ctx, 3, '\u0004') - case 'S' if ctx.input(ctx.offset + 2) == 'C' => consumeAndReturn(ctx, 3, '\u001b') - case 'T' if ctx.input(ctx.offset + 2) == 'X' => consumeAndReturn(ctx, 3, '\u0003') - case 'T' if ctx.input(ctx.offset + 2) == 'B' => consumeAndReturn(ctx, 3, '\u0017') + if (lookAhead(ctx, 1, 'M')) consumeAndReturn(ctx, 2, '\u0019') + else if (threeAvailable) lookAhead(ctx, 1) match { + case 'N' if lookAhead(ctx, 2) == 'Q' => consumeAndReturn(ctx, 3, '\u0005') + case 'O' if lookAhead(ctx, 2) == 'T' => consumeAndReturn(ctx, 3, '\u0004') + case 'S' if lookAhead(ctx, 2) == 'C' => consumeAndReturn(ctx, 3, '\u001b') + case 'T' if lookAhead(ctx, 2) == 'X' => consumeAndReturn(ctx, 3, '\u0003') + case 'T' if lookAhead(ctx, 2) == 'B' => consumeAndReturn(ctx, 3, '\u0017') case _ => TokenEscape.NoParse } else TokenEscape.NoParse case 'F' => //FF FS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') consumeAndReturn(ctx, 2, '\u000c') - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001c') - else TokenEscape.NoParse - case 'G' => //GS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001d') - else TokenEscape.NoParse - case 'H' => //HT - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') consumeAndReturn(ctx, 2, '\u0009') - else TokenEscape.NoParse - case 'L' => //LF - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'F') consumeAndReturn(ctx, 2, '\n') + if (lookAhead(ctx, 1, 'F')) consumeAndReturn(ctx, 2, '\u000c') + else if (lookAhead(ctx, 1, 'S')) consumeAndReturn(ctx, 2, '\u001c') else TokenEscape.NoParse + case 'G' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001d') //GS + case 'H' if lookAhead(ctx, 1, 'T') => consumeAndReturn(ctx, 2, '\u0009') //HT + case 'L' if lookAhead(ctx, 1, 'F') => consumeAndReturn(ctx, 2, '\n') //LF case 'N' => //NUL NAK - if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'U' && ctx.input(ctx.offset + 2) == 'L') consumeAndReturn(ctx, 3, '\u0000') - else if (ctx.offset + 2 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'A' && ctx.input(ctx.offset + 2) == 'K') { + if (threeAvailable && lookAhead(ctx, 1) == 'U' && lookAhead(ctx, 2) == 'L') consumeAndReturn(ctx, 3, '\u0000') + else if (threeAvailable && lookAhead(ctx, 1) == 'A' && lookAhead(ctx, 2) == 'K') { consumeAndReturn(ctx, 3, '\u0015') } else TokenEscape.NoParse - case 'R' => //RS - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001e') - else TokenEscape.NoParse + case 'R' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001e') //RS case 'S' => //SO SI SP SOH STX SYN SUB - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'O') consumeAndReturn(ctx, 2, '\u000e') - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'I') consumeAndReturn(ctx, 2, '\u000f') - else if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'P') consumeAndReturn(ctx, 2, '\u0020') - else if (ctx.offset + 2 < ctx.inputsz) ctx.input(ctx.offset + 1) match { - case 'O' if ctx.input(ctx.offset + 2) == 'H' => consumeAndReturn(ctx, 3, '\u0001') - case 'T' if ctx.input(ctx.offset + 2) == 'X' => consumeAndReturn(ctx, 3, '\u0002') - case 'Y' if ctx.input(ctx.offset + 2) == 'N' => consumeAndReturn(ctx, 3, '\u0016') - case 'U' if ctx.input(ctx.offset + 2) == 'B' => consumeAndReturn(ctx, 3, '\u001a') + if (lookAhead(ctx, 1, 'O')) consumeAndReturn(ctx, 2, '\u000e') + else if (lookAhead(ctx, 1, 'I')) consumeAndReturn(ctx, 2, '\u000f') + else if (lookAhead(ctx, 1, 'P')) consumeAndReturn(ctx, 2, '\u0020') + else if (threeAvailable) lookAhead(ctx, 1) match { + case 'O' if lookAhead(ctx, 2) == 'H' => consumeAndReturn(ctx, 3, '\u0001') + case 'T' if lookAhead(ctx, 2) == 'X' => consumeAndReturn(ctx, 3, '\u0002') + case 'Y' if lookAhead(ctx, 2) == 'N' => consumeAndReturn(ctx, 3, '\u0016') + case 'U' if lookAhead(ctx, 2) == 'B' => consumeAndReturn(ctx, 3, '\u001a') case _ => TokenEscape.NoParse } else TokenEscape.NoParse - case 'U' => //US - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'S') consumeAndReturn(ctx, 2, '\u001f') - else TokenEscape.NoParse - case 'V' => //VT - if (ctx.offset + 1 < ctx.inputsz && ctx.input(ctx.offset + 1) == 'T') consumeAndReturn(ctx, 2, '\u000b') - else TokenEscape.NoParse + case 'U' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001f') //US + case 'V' if lookAhead(ctx, 1, 'T') => consumeAndReturn(ctx, 2, '\u000b') //VT case _ => TokenEscape.NoParse } } From f2414f7e4c650d148c8595f432134b58f272e455 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 22:16:21 +0000 Subject: [PATCH 21/34] factored more code out --- .../internal/instructions/TokenInstrs.scala | 96 +++++++++++-------- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index ec1afd267..0cba1dee4 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -326,7 +326,7 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case TokenEscape.NoParse => ctx.fail(expected) } - private final def consumeAndReturn(ctx: Context, n: Int, c: Char): TokenEscape.Escape = { + private final def consumeAndReturn(ctx: Context, n: Int, c: Char) = { ctx.fastUncheckedConsumeChars(n) new TokenEscape.EscapeChar(c) } @@ -334,6 +334,55 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In private final def lookAhead(ctx: Context, n: Int): Char = ctx.input(ctx.offset + n) private final def lookAhead(ctx: Context, n: Int, c: Char): Boolean = ctx.offset + n < ctx.inputsz && lookAhead(ctx, n) == c + private final def decimalEscape(ctx: Context, d: Int) = { + ctx.fastUncheckedConsumeChars(1) + val escapeCode = decimal(ctx, d) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode + } + + private final def hexadecimalEscape(ctx: Context) = { + ctx.fastUncheckedConsumeChars(1) + if (ctx.moreInput) { + ctx.nextChar match { + case d@( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => + ctx.fastUncheckedConsumeChars(1) + val escapeCode = hexadecimal(ctx, d.asDigit) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode + case _ => TokenEscape.NoParse + } + } + else TokenEscape.NoParse + } + + private final def octalEscape(ctx: Context) = { + ctx.fastUncheckedConsumeChars(1) + if (ctx.moreInput) { + val d = ctx.nextChar.asDigit + if (d >= 0 && d <= 7) { + ctx.fastUncheckedConsumeChars(1) + val escapeCode = octal(ctx, d) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode + } + else TokenEscape.NoParse + } + else TokenEscape.NoParse + } + + private final def caretEscape(ctx: Context) = { + ctx.fastUncheckedConsumeChars(1) + if (ctx.moreInput) { + val c = ctx.nextChar + if (c >= 'A' && c <= 'Z') consumeAndReturn(ctx, 1, (c - 'A' + 1).toChar) + else TokenEscape.NoParse + } + else TokenEscape.NoParse + } + protected final def escape(ctx: Context): TokenEscape.Escape = { val threeAvailable = ctx.offset + 2 < ctx.inputsz if (ctx.moreInput) { @@ -348,47 +397,10 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In case '\\' => consumeAndReturn(ctx, 1, '\\') case '\"' => consumeAndReturn(ctx, 1, '\"') case '\'' => consumeAndReturn(ctx, 1, '\'') - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => - ctx.fastUncheckedConsumeChars(1) - val escapeCode = decimal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode - case 'x' => - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - ctx.nextChar match { - case d@( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' - | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => - ctx.fastUncheckedConsumeChars(1) - val escapeCode = hexadecimal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode - case _ => TokenEscape.NoParse - } - } - else TokenEscape.NoParse - case 'o' => - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - val d = ctx.nextChar - if (d >= '0' && d <= '7') { - ctx.fastUncheckedConsumeChars(1) - val escapeCode = octal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode - } - else TokenEscape.NoParse - } - else TokenEscape.NoParse - case '^' => - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - val c = ctx.nextChar - if (c >= 'A' && c <= 'Z') consumeAndReturn(ctx, 1, (c - 'A' + 1).toChar) - else TokenEscape.NoParse - } - else TokenEscape.NoParse + case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => decimalEscape(ctx, d.asDigit) + case 'x' => hexadecimalEscape(ctx) + case 'o' => octalEscape(ctx) + case '^' => caretEscape(ctx) case 'A' if threeAvailable && lookAhead(ctx, 1) == 'C' && lookAhead(ctx, 2) == 'K' => consumeAndReturn(ctx, 3, '\u0006') //ACK case 'B' => //BS BEL if (lookAhead(ctx, 1, 'S')) consumeAndReturn(ctx, 2, '\u0008') From d024e9f3ebffce3f58f31359717b61909296ccaf Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 22:52:22 +0000 Subject: [PATCH 22/34] Reduced compexity of several helpers --- .../internal/instructions/TokenInstrs.scala | 36 +++++++------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 0cba1dee4..02e951220 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -343,43 +343,31 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In private final def hexadecimalEscape(ctx: Context) = { ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - ctx.nextChar match { - case d@( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' - | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => - ctx.fastUncheckedConsumeChars(1) - val escapeCode = hexadecimal(ctx, d.asDigit) - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode - case _ => TokenEscape.NoParse - } + if (ctx.moreInput && (ctx.nextChar.isDigit || (ctx.nextChar.toLower >= 'a' && ctx.nextChar.toLower <= 'f'))) { + val d = ctx.nextChar.asDigit + ctx.fastUncheckedConsumeChars(1) + val escapeCode = hexadecimal(ctx, d) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode } else TokenEscape.NoParse } private final def octalEscape(ctx: Context) = { ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { + if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '7') { val d = ctx.nextChar.asDigit - if (d >= 0 && d <= 7) { - ctx.fastUncheckedConsumeChars(1) - val escapeCode = octal(ctx, d) - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode - } - else TokenEscape.NoParse + ctx.fastUncheckedConsumeChars(1) + val escapeCode = octal(ctx, d) + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode } else TokenEscape.NoParse } private final def caretEscape(ctx: Context) = { ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - val c = ctx.nextChar - if (c >= 'A' && c <= 'Z') consumeAndReturn(ctx, 1, (c - 'A' + 1).toChar) - else TokenEscape.NoParse - } + if (ctx.moreInput && ctx.nextChar >= 'A' && ctx.nextChar <= 'Z') consumeAndReturn(ctx, 1, (ctx.nextChar - 'A' + 1).toChar) else TokenEscape.NoParse } From 87acfdd94ba15889ff0581b04c042683ab6063e8 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 23:03:25 +0000 Subject: [PATCH 23/34] Factored out more --- .../internal/instructions/TokenInstrs.scala | 28 ++++++------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 02e951220..baff52743 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -334,37 +334,25 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In private final def lookAhead(ctx: Context, n: Int): Char = ctx.input(ctx.offset + n) private final def lookAhead(ctx: Context, n: Int, c: Char): Boolean = ctx.offset + n < ctx.inputsz && lookAhead(ctx, n) == c - private final def decimalEscape(ctx: Context, d: Int) = { + private final def numericEscape(ctx: Context, code: =>Int) = { ctx.fastUncheckedConsumeChars(1) - val escapeCode = decimal(ctx, d) + val escapeCode = code if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) else TokenEscape.BadCode } - private final def hexadecimalEscape(ctx: Context) = { + private final def nonDecimalNumericEscape(ctx: Context, lexer: (Context, Int) => Int, validDigit: Char => Boolean) = { ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput && (ctx.nextChar.isDigit || (ctx.nextChar.toLower >= 'a' && ctx.nextChar.toLower <= 'f'))) { + if (ctx.moreInput && validDigit(ctx.nextChar)) { val d = ctx.nextChar.asDigit - ctx.fastUncheckedConsumeChars(1) - val escapeCode = hexadecimal(ctx, d) - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode - } - else TokenEscape.NoParse - } - - private final def octalEscape(ctx: Context) = { - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '7') { - val d = ctx.nextChar.asDigit - ctx.fastUncheckedConsumeChars(1) - val escapeCode = octal(ctx, d) - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode + numericEscape(ctx, lexer(ctx, d)) } else TokenEscape.NoParse } + private final def decimalEscape(ctx: Context, d: Int) = numericEscape(ctx, decimal(ctx, d)) + private final def hexadecimalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, hexadecimal, c => c.isDigit || (c.toLower >= 'a' && c.toLower <= 'f')) + private final def octalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, octal, c => c >= '0' && c <= '7') private final def caretEscape(ctx: Context) = { ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput && ctx.nextChar >= 'A' && ctx.nextChar <= 'Z') consumeAndReturn(ctx, 1, (ctx.nextChar - 'A' + 1).toChar) From 07ec0b330a50a4245a0b3e2e097957f062fad11f Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 23:39:33 +0000 Subject: [PATCH 24/34] Minor refactors --- .../internal/instructions/TokenInstrs.scala | 127 +++++++++--------- 1 file changed, 60 insertions(+), 67 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index baff52743..c1dcb21b7 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -201,43 +201,36 @@ private [instructions] sealed trait NumericReader { private [internal] final class TokenNatural(_expected: UnsafeOption[String]) extends Instr with NumericReader { val expected = if (_expected == null) "natural" else _expected override def apply(ctx: Context): Unit = { - if (ctx.moreInput) (ctx.nextChar: @switch) match { + if (ctx.moreInput) ctx.nextChar match { case '0' => ctx.fastUncheckedConsumeChars(1) if (!ctx.moreInput) ctx.pushAndContinue(0) - else { - (ctx.nextChar: @switch) match { - case 'x' | 'X' => - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - (ctx.nextChar: @switch) match { - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' - | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(hexadecimal(ctx, d.asDigit)) - case _ => ctx.fail(expected) - } - } - else ctx.fail(expected) - case 'o' | 'O' => - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - val d = ctx.nextChar - if (d >= '0' && d <= '7') { - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(octal(ctx, d.asDigit)) - } - else ctx.fail(expected) - } - else ctx.fail(expected) - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => + else ctx.nextChar match { + case 'x' | 'X' => + ctx.fastUncheckedConsumeChars(1) + if (ctx.moreInput) ctx.nextChar match { + case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => + ctx.fastUncheckedConsumeChars(1) + ctx.pushAndContinue(hexadecimal(ctx, d.asDigit)) + case _ => ctx.fail(expected) + } + else ctx.fail(expected) + case 'o' | 'O' => + ctx.fastUncheckedConsumeChars(1) + if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '7') { + val d = ctx.nextChar ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(decimal(ctx, d.asDigit)) - case _ => ctx.pushAndContinue(0) - } + ctx.pushAndContinue(octal(ctx, d.asDigit)) + } + else ctx.fail(expected) + case d if d.isDigit => + ctx.fastUncheckedConsumeChars(1) + ctx.pushAndContinue(decimal(ctx, d.asDigit)) + case _ => ctx.pushAndContinue(0) } - case d@('1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => + case d if d.isDigit => ctx.fastUncheckedConsumeChars(1) ctx.pushAndContinue(decimal(ctx, d.asDigit)) case _ => ctx.fail(expected) @@ -253,49 +246,49 @@ private [internal] final class TokenNatural(_expected: UnsafeOption[String]) ext private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr { val expected = if (_expected == null) "unsigned float" else _expected override def apply(ctx: Context): Unit = { - var failed = false - if (ctx.moreInput) (ctx.nextChar: @switch) match { - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => - ctx.fastUncheckedConsumeChars(1) - val builder = new StringBuilder() - failed = decimal(ctx, builder += d, false) - if (ctx.moreInput) (ctx.nextChar: @switch) match { - case '.' => // fraction - ctx.fastUncheckedConsumeChars(1) - failed = decimal(ctx, builder += '.') - if (!failed) { - if (ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E')) { - ctx.fastUncheckedConsumeChars(1) - failed = exponent(ctx, builder += 'e') + if (ctx.moreInput && ctx.nextChar.isDigit) { + val d = ctx.nextChar + ctx.fastUncheckedConsumeChars(1) + val builder = new StringBuilder() + if (decimal(ctx, builder += d, false) && ctx.moreInput) ctx.nextChar match { + case '.' => // fraction + ctx.fastUncheckedConsumeChars(1) + if (decimal(ctx, builder += '.')) { + if (ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E')) { + ctx.fastUncheckedConsumeChars(1) + if (exponent(ctx, builder += 'e')) { + try ctx.pushAndContinue(builder.toString.toDouble) + catch { case _: NumberFormatException => ctx.fail(expected) } } - if (!failed) try ctx.stack.push(builder.toString.toDouble) - catch { case _: NumberFormatException => failed = true } + else ctx.fail(expected) } - case 'e' | 'E' => // exponent - ctx.fastUncheckedConsumeChars(1) - failed = exponent(ctx, builder += 'e') - if (!failed) try ctx.stack.push(builder.toString.toDouble) + else { + try ctx.pushAndContinue(builder.toString.toDouble) + catch { case _: NumberFormatException => ctx.fail(expected) } + } + } + else ctx.fail(expected) + case 'e' | 'E' => // exponent + ctx.fastUncheckedConsumeChars(1) + if (exponent(ctx, builder += 'e')) { + try ctx.pushAndContinue(builder.toString.toDouble) catch { case _: NumberFormatException => ctx.fail(expected) } - case _ => failed = true - } - else failed = true - case _ => failed = true + } + else ctx.fail(expected) + case _ => ctx.fail(expected) + } + else ctx.fail(expected) } - else failed = true - if (failed) ctx.fail(expected) - else ctx.inc() + else ctx.fail(expected) } @tailrec private def decimal(ctx: Context, x: StringBuilder, first: Boolean = true): Boolean = { - if (ctx.moreInput) { + if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '9') { val d = ctx.nextChar - if (d >= '0' && d <= '9') { - ctx.fastUncheckedConsumeChars(1) - decimal(ctx, x += d, false) - } - else first + ctx.fastUncheckedConsumeChars(1) + decimal(ctx, x += d, false) } - else first + else !first } private def exponent(ctx: Context, x: StringBuilder): Boolean = { @@ -310,7 +303,7 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten case _ => decimal(ctx, x) } } - else true + else false } // $COVERAGE-OFF$ From 4303d001c8c730dd5731517c0ff875f041759b15 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Mon, 4 Jan 2021 23:51:00 +0000 Subject: [PATCH 25/34] Refactor float --- .../internal/instructions/TokenInstrs.scala | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index c1dcb21b7..8788a6527 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -250,62 +250,62 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten val d = ctx.nextChar ctx.fastUncheckedConsumeChars(1) val builder = new StringBuilder() - if (decimal(ctx, builder += d, false) && ctx.moreInput) ctx.nextChar match { - case '.' => // fraction + if (decimal(ctx, builder += d, false) && ctx.moreInput) { + if (ctx.nextChar == '.') { // fraction ctx.fastUncheckedConsumeChars(1) - if (decimal(ctx, builder += '.')) { - if (ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E')) { - ctx.fastUncheckedConsumeChars(1) - if (exponent(ctx, builder += 'e')) { - try ctx.pushAndContinue(builder.toString.toDouble) - catch { case _: NumberFormatException => ctx.fail(expected) } - } - else ctx.fail(expected) - } - else { - try ctx.pushAndContinue(builder.toString.toDouble) - catch { case _: NumberFormatException => ctx.fail(expected) } - } - } + if (decimal(ctx, builder += '.')) lexExponent(ctx, builder, missingOk = true) else ctx.fail(expected) - case 'e' | 'E' => // exponent - ctx.fastUncheckedConsumeChars(1) - if (exponent(ctx, builder += 'e')) { - try ctx.pushAndContinue(builder.toString.toDouble) - catch { case _: NumberFormatException => ctx.fail(expected) } - } - else ctx.fail(expected) - case _ => ctx.fail(expected) + } + else lexExponent(ctx, builder, missingOk = false) } else ctx.fail(expected) } else ctx.fail(expected) } - @tailrec private def decimal(ctx: Context, x: StringBuilder, first: Boolean = true): Boolean = { + @tailrec private final def decimal(ctx: Context, builder: StringBuilder, first: Boolean = true): Boolean = { if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '9') { val d = ctx.nextChar ctx.fastUncheckedConsumeChars(1) - decimal(ctx, x += d, false) + decimal(ctx, builder += d, false) } else !first } - private def exponent(ctx: Context, x: StringBuilder): Boolean = { + private final def exponent(ctx: Context, builder: StringBuilder): Boolean = { if (ctx.moreInput) { ctx.nextChar match { case '+' => ctx.fastUncheckedConsumeChars(1) - decimal(ctx, x) + decimal(ctx, builder) case '-' => ctx.fastUncheckedConsumeChars(1) - decimal(ctx, x += '-') - case _ => decimal(ctx, x) + decimal(ctx, builder += '-') + case _ => decimal(ctx, builder) } } else false } + private final def attemptCastAndContinue(ctx: Context, builder: StringBuilder): Unit = { + try ctx.pushAndContinue(builder.toString.toDouble) + catch { + case _: NumberFormatException => ctx.fail(expected) + } + } + + private final def lexExponent(ctx: Context, builder: StringBuilder, missingOk: Boolean): Unit = { + if (ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E')) { + ctx.fastUncheckedConsumeChars(1) + if (exponent(ctx, builder += 'e')) { + attemptCastAndContinue(ctx, builder) + } + else ctx.fail(expected) + } + else if (missingOk) attemptCastAndContinue(ctx, builder) + else ctx.fail(expected) + } + // $COVERAGE-OFF$ override def toString: String = "TokenFloat" // $COVERAGE-ON$ From e1835947db4c79a370bfd52cfcb65e8420dadd14 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 00:01:10 +0000 Subject: [PATCH 26/34] final refactor of TokenFloat --- .../internal/instructions/TokenInstrs.scala | 49 +++++++++---------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 8788a6527..8814fa1eb 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -246,18 +246,12 @@ private [internal] final class TokenNatural(_expected: UnsafeOption[String]) ext private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr { val expected = if (_expected == null) "unsigned float" else _expected override def apply(ctx: Context): Unit = { + if (ctx.moreInput && ctx.nextChar.isDigit) { - val d = ctx.nextChar - ctx.fastUncheckedConsumeChars(1) val builder = new StringBuilder() - if (decimal(ctx, builder += d, false) && ctx.moreInput) { - if (ctx.nextChar == '.') { // fraction - ctx.fastUncheckedConsumeChars(1) - if (decimal(ctx, builder += '.')) lexExponent(ctx, builder, missingOk = true) - else ctx.fail(expected) - } - else lexExponent(ctx, builder, missingOk = false) - } + builder += ctx.nextChar + ctx.fastUncheckedConsumeChars(1) + if (decimal(ctx, builder, false) && ctx.moreInput) lexFraction(ctx, builder) else ctx.fail(expected) } else ctx.fail(expected) @@ -265,24 +259,22 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten @tailrec private final def decimal(ctx: Context, builder: StringBuilder, first: Boolean = true): Boolean = { if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '9') { - val d = ctx.nextChar + builder += ctx.nextChar ctx.fastUncheckedConsumeChars(1) - decimal(ctx, builder += d, false) + decimal(ctx, builder, false) } else !first } private final def exponent(ctx: Context, builder: StringBuilder): Boolean = { + ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput) { - ctx.nextChar match { - case '+' => - ctx.fastUncheckedConsumeChars(1) - decimal(ctx, builder) - case '-' => - ctx.fastUncheckedConsumeChars(1) - decimal(ctx, builder += '-') - case _ => decimal(ctx, builder) + if (ctx.nextChar == '+') ctx.fastUncheckedConsumeChars(1) + else if (ctx.nextChar == '-') { + ctx.fastUncheckedConsumeChars(1) + builder += '-' } + decimal(ctx, builder) } else false } @@ -295,15 +287,20 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten } private final def lexExponent(ctx: Context, builder: StringBuilder, missingOk: Boolean): Unit = { - if (ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E')) { + val requireExponent = ctx.moreInput && ctx.nextChar == 'e' || ctx.nextChar == 'E' + if (requireExponent && exponent(ctx, builder += 'e')) attemptCastAndContinue(ctx, builder) + else if (requireExponent) ctx.fail(expected) + else if (missingOk) attemptCastAndContinue(ctx, builder) + else ctx.fail(expected) + } + + private final def lexFraction(ctx: Context, builder: StringBuilder) = { + if (ctx.nextChar == '.') { ctx.fastUncheckedConsumeChars(1) - if (exponent(ctx, builder += 'e')) { - attemptCastAndContinue(ctx, builder) - } + if (decimal(ctx, builder += '.')) lexExponent(ctx, builder, missingOk = true) else ctx.fail(expected) } - else if (missingOk) attemptCastAndContinue(ctx, builder) - else ctx.fail(expected) + else lexExponent(ctx, builder, missingOk = false) } // $COVERAGE-OFF$ From 31e7c5ebdab84ed8129e4540de39403c0cd1af6c Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 00:05:00 +0000 Subject: [PATCH 27/34] Fixed broken condition --- src/main/scala/parsley/internal/instructions/TokenInstrs.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 8814fa1eb..718695aff 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -287,7 +287,7 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten } private final def lexExponent(ctx: Context, builder: StringBuilder, missingOk: Boolean): Unit = { - val requireExponent = ctx.moreInput && ctx.nextChar == 'e' || ctx.nextChar == 'E' + val requireExponent = ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E') if (requireExponent && exponent(ctx, builder += 'e')) attemptCastAndContinue(ctx, builder) else if (requireExponent) ctx.fail(expected) else if (missingOk) attemptCastAndContinue(ctx, builder) From 7810c9a44745d5951ce8be729005aef8edbd60b9 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 10:54:16 +0000 Subject: [PATCH 28/34] Removed last duplicated code --- .../internal/instructions/TokenInstrs.scala | 125 ++++++++---------- 1 file changed, 57 insertions(+), 68 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 718695aff..bd4c79c81 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -147,19 +147,18 @@ private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[S case IntType => ((x: Int) => -x).asInstanceOf[Any => Any] case DoubleType => ((x: Double) => -x).asInstanceOf[Any => Any] } - val pos: Any => Any = x => x + val pos = (x: Any) => x override def apply(ctx: Context): Unit = { - if (ctx.moreInput) { - if (ctx.nextChar == '-') { - ctx.fastUncheckedConsumeChars(1) - ctx.stack.push(neg) - } - else { - if (ctx.nextChar == '+') ctx.fastUncheckedConsumeChars(1) - ctx.stack.push(pos) - } + if (ctx.moreInput && ctx.nextChar == '-') { + ctx.fastUncheckedConsumeChars(1) + ctx.stack.push(neg) } + else if (ctx.moreInput && ctx.nextChar == '+') { + ctx.fastUncheckedConsumeChars(1) + ctx.stack.push(pos) + } + else ctx.stack.push(pos) ctx.inc() } @@ -169,73 +168,63 @@ private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[S } private [instructions] sealed trait NumericReader { - private final def subDecimal(base: Int, maxDigit: Char, ctx: Context): Int => Int = { - @tailrec def go(x: Int): Int = { + private final def subDecimal(base: Int, maxDigit: Char, ctx: Context): (Int, Boolean) => Option[Int] = { + @tailrec def go(x: Int, first: Boolean): Option[Int] = { if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= maxDigit) { val d = ctx.nextChar.asDigit ctx.fastUncheckedConsumeChars(1) - go(x * base + d) + go(x * base + d, false) } - else x + else if (first) None + else Some(x) } go } - protected final def decimal(ctx: Context, firstDigit: Int = 0): Int = subDecimal(10, '9', ctx)(firstDigit) - protected final def octal(ctx: Context, firstDigit: Int = 0): Int = subDecimal(8, '7', ctx)(firstDigit) + protected final def decimal(ctx: Context, x: Int, first: Boolean): Option[Int] = subDecimal(10, '9', ctx)(x, first) + protected final def octal(ctx: Context, x: Int, first: Boolean): Option[Int] = subDecimal(8, '7', ctx)(x, first) - @tailrec protected final def hexadecimal(ctx: Context, x: Int = 0): Int = { + @tailrec protected final def hexadecimal(ctx: Context, x: Int, first: Boolean): Option[Int] = { if (ctx.moreInput) { (ctx.nextChar: @switch) match { - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' - | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => + case d@( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => ctx.fastUncheckedConsumeChars(1) - hexadecimal(ctx, x * 16 + d.asDigit) - case _ => x + hexadecimal(ctx, x * 16 + d.asDigit, false) + case _ => Some(x) } } - else x + else if (first) None + else Some(x) } } private [internal] final class TokenNatural(_expected: UnsafeOption[String]) extends Instr with NumericReader { val expected = if (_expected == null) "natural" else _expected override def apply(ctx: Context): Unit = { - if (ctx.moreInput) ctx.nextChar match { - case '0' => - ctx.fastUncheckedConsumeChars(1) - if (!ctx.moreInput) ctx.pushAndContinue(0) - else ctx.nextChar match { - case 'x' | 'X' => - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) ctx.nextChar match { - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' - | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(hexadecimal(ctx, d.asDigit)) - case _ => ctx.fail(expected) - } - else ctx.fail(expected) - case 'o' | 'O' => - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '7') { - val d = ctx.nextChar - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(octal(ctx, d.asDigit)) - } - else ctx.fail(expected) - case d if d.isDigit => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(decimal(ctx, d.asDigit)) - case _ => ctx.pushAndContinue(0) - } - case d if d.isDigit => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(decimal(ctx, d.asDigit)) - case _ => ctx.fail(expected) + if (ctx.moreInput && ctx.nextChar == '0') { + ctx.fastUncheckedConsumeChars(1) + if (!ctx.moreInput) ctx.pushAndContinue(0) + else ctx.nextChar match { + case 'x' | 'X' => + ctx.fastUncheckedConsumeChars(1) + hexadecimal(ctx, 0, true) match { + case Some(x) => ctx.pushAndContinue(x) + case None => ctx.fail(expected) + } + case 'o' | 'O' => + ctx.fastUncheckedConsumeChars(1) + octal(ctx, 0, true) match { + case Some(x) => ctx.pushAndContinue(x) + case None => ctx.fail(expected) + } + case _ => ctx.pushAndContinue(decimal(ctx, 0, true).getOrElse(0)) + } + } + else decimal(ctx, 0, true) match { + case Some(x) => ctx.pushAndContinue(x) + case None => ctx.fail(expected) } - else ctx.fail(expected) } // $COVERAGE-OFF$ @@ -324,25 +313,25 @@ private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends In private final def lookAhead(ctx: Context, n: Int): Char = ctx.input(ctx.offset + n) private final def lookAhead(ctx: Context, n: Int, c: Char): Boolean = ctx.offset + n < ctx.inputsz && lookAhead(ctx, n) == c - private final def numericEscape(ctx: Context, code: =>Int) = { - ctx.fastUncheckedConsumeChars(1) - val escapeCode = code + private final def numericEscape(ctx: Context, escapeCode: Int) = { if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) else TokenEscape.BadCode } - private final def nonDecimalNumericEscape(ctx: Context, lexer: (Context, Int) => Int, validDigit: Char => Boolean) = { + private final def nonDecimalNumericEscape(ctx: Context, lexer: (Context, Int, Boolean) => Option[Int]) = { ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput && validDigit(ctx.nextChar)) { - val d = ctx.nextChar.asDigit - numericEscape(ctx, lexer(ctx, d)) + lexer(ctx, 0, true) match { + case Some(x) => numericEscape(ctx, x) + case None => TokenEscape.NoParse } - else TokenEscape.NoParse } - private final def decimalEscape(ctx: Context, d: Int) = numericEscape(ctx, decimal(ctx, d)) - private final def hexadecimalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, hexadecimal, c => c.isDigit || (c.toLower >= 'a' && c.toLower <= 'f')) - private final def octalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, octal, c => c >= '0' && c <= '7') + private final def decimalEscape(ctx: Context, d: Int) = { + ctx.fastUncheckedConsumeChars(1) + numericEscape(ctx, decimal(ctx, d, false).get) + } + private final def hexadecimalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, hexadecimal) + private final def octalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, octal) private final def caretEscape(ctx: Context) = { ctx.fastUncheckedConsumeChars(1) if (ctx.moreInput && ctx.nextChar >= 'A' && ctx.nextChar <= 'Z') consumeAndReturn(ctx, 1, (ctx.nextChar - 'A' + 1).toChar) From 1630791339fc95a1a4666cd6cc38d267378e13e3 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 11:22:33 +0000 Subject: [PATCH 29/34] Further refactoring of the numerics --- .../internal/instructions/TokenInstrs.scala | 77 ++++++------------- 1 file changed, 25 insertions(+), 52 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index bd4c79c81..ee6bba8d7 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -168,35 +168,21 @@ private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[S } private [instructions] sealed trait NumericReader { - private final def subDecimal(base: Int, maxDigit: Char, ctx: Context): (Int, Boolean) => Option[Int] = { - @tailrec def go(x: Int, first: Boolean): Option[Int] = { - if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= maxDigit) { + private final def subDecimal(base: Int, isDigit: Char => Boolean): (Context, Int, Boolean) => Option[Int] = { + @tailrec def go(ctx: Context, x: Int, first: Boolean): Option[Int] = { + if (ctx.moreInput && isDigit(ctx.nextChar)) { val d = ctx.nextChar.asDigit ctx.fastUncheckedConsumeChars(1) - go(x * base + d, false) + go(ctx, x * base + d, false) } else if (first) None else Some(x) } go } - protected final def decimal(ctx: Context, x: Int, first: Boolean): Option[Int] = subDecimal(10, '9', ctx)(x, first) - protected final def octal(ctx: Context, x: Int, first: Boolean): Option[Int] = subDecimal(8, '7', ctx)(x, first) - - @tailrec protected final def hexadecimal(ctx: Context, x: Int, first: Boolean): Option[Int] = { - if (ctx.moreInput) { - (ctx.nextChar: @switch) match { - case d@( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' - | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') => - ctx.fastUncheckedConsumeChars(1) - hexadecimal(ctx, x * 16 + d.asDigit, false) - case _ => Some(x) - } - } - else if (first) None - else Some(x) - } + protected final val decimal = subDecimal(10, _.isDigit) + protected final val octal = subDecimal(8, parsley.Char.isOctDigit) + protected final val hexadecimal = subDecimal(16, parsley.Char.isHexDigit) } private [internal] final class TokenNatural(_expected: UnsafeOption[String]) extends Instr with NumericReader { @@ -204,22 +190,16 @@ private [internal] final class TokenNatural(_expected: UnsafeOption[String]) ext override def apply(ctx: Context): Unit = { if (ctx.moreInput && ctx.nextChar == '0') { ctx.fastUncheckedConsumeChars(1) - if (!ctx.moreInput) ctx.pushAndContinue(0) - else ctx.nextChar match { - case 'x' | 'X' => - ctx.fastUncheckedConsumeChars(1) - hexadecimal(ctx, 0, true) match { - case Some(x) => ctx.pushAndContinue(x) - case None => ctx.fail(expected) - } - case 'o' | 'O' => - ctx.fastUncheckedConsumeChars(1) - octal(ctx, 0, true) match { - case Some(x) => ctx.pushAndContinue(x) - case None => ctx.fail(expected) - } - case _ => ctx.pushAndContinue(decimal(ctx, 0, true).getOrElse(0)) + lazy val hexa = ctx.nextChar == 'x' || ctx.nextChar == 'X' + lazy val octa = ctx.nextChar == 'o' || ctx.nextChar == 'O' + if (ctx.moreInput && (hexa || octa)) { + ctx.fastUncheckedConsumeChars(1) + (if (hexa) hexadecimal else octal)(ctx, 0, true) match { + case Some(x) => ctx.pushAndContinue(x) + case None => ctx.fail(expected) + } } + else ctx.pushAndContinue(decimal(ctx, 0, true).getOrElse(0)) } else decimal(ctx, 0, true) match { case Some(x) => ctx.pushAndContinue(x) @@ -235,13 +215,9 @@ private [internal] final class TokenNatural(_expected: UnsafeOption[String]) ext private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr { val expected = if (_expected == null) "unsigned float" else _expected override def apply(ctx: Context): Unit = { - - if (ctx.moreInput && ctx.nextChar.isDigit) { - val builder = new StringBuilder() - builder += ctx.nextChar - ctx.fastUncheckedConsumeChars(1) - if (decimal(ctx, builder, false) && ctx.moreInput) lexFraction(ctx, builder) - else ctx.fail(expected) + val builder = new StringBuilder() + if (decimal(ctx, builder)) { + lexFraction(ctx, builder) } else ctx.fail(expected) } @@ -257,15 +233,12 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten private final def exponent(ctx: Context, builder: StringBuilder): Boolean = { ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput) { - if (ctx.nextChar == '+') ctx.fastUncheckedConsumeChars(1) - else if (ctx.nextChar == '-') { - ctx.fastUncheckedConsumeChars(1) - builder += '-' - } - decimal(ctx, builder) + if (ctx.moreInput && ctx.nextChar == '+') ctx.fastUncheckedConsumeChars(1) + else if (ctx.moreInput && ctx.nextChar == '-') { + ctx.fastUncheckedConsumeChars(1) + builder += '-' } - else false + decimal(ctx, builder) } private final def attemptCastAndContinue(ctx: Context, builder: StringBuilder): Unit = { @@ -284,7 +257,7 @@ private [internal] final class TokenFloat(_expected: UnsafeOption[String]) exten } private final def lexFraction(ctx: Context, builder: StringBuilder) = { - if (ctx.nextChar == '.') { + if (ctx.moreInput && ctx.nextChar == '.') { ctx.fastUncheckedConsumeChars(1) if (decimal(ctx, builder += '.')) lexExponent(ctx, builder, missingOk = true) else ctx.fail(expected) From 8db314c2d884d20dca223ecd917e23245ea55f5a Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 12:36:10 +0000 Subject: [PATCH 30/34] Refactored whitespace and comments a little --- .../internal/instructions/TokenInstrs.scala | 161 +++++++++--------- 1 file changed, 76 insertions(+), 85 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index ee6bba8d7..6d24ec2d0 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -6,107 +6,48 @@ import parsley.internal.{Radix, UnsafeOption} import scala.annotation.{switch, tailrec} -// TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later -private [internal] class TokenComment(start: String, end: String, line: String, nested: Boolean) extends Instr { +private [instructions] abstract class CommentLexer(start: String, end: String, line: String, nested: Boolean) extends Instr { protected final val noLine = line.isEmpty protected final val noMulti = start.isEmpty - override def apply(ctx: Context): Unit = { - if (!ctx.moreInput) ctx.fail("comment") - else if (noLine && noMulti) ctx.fail("comment") - else if (noLine) { - if (!ctx.input.startsWith(start, ctx.offset)) ctx.fail("comment") - else { - if (!multiLineComment(ctx)) return - ctx.pushAndContinue(()) - } - } - else if (noMulti) { - if (!ctx.input.startsWith(line, ctx.offset)) ctx.fail("comment") - else { - singleLineComment(ctx) - ctx.pushAndContinue(()) - } - } - else { - val startsSingle = ctx.input.startsWith(line, ctx.offset) - val startsMulti = ctx.input.startsWith(start, ctx.offset) - if (!startsSingle && !startsMulti) ctx.fail("comment") - else { - if (startsMulti) { - if (!multiLineComment(ctx)) return - } - else singleLineComment(ctx) - ctx.pushAndContinue(()) - } - } - } protected final def singleLineComment(ctx: Context): Unit = { ctx.fastUncheckedConsumeChars(line.length) while (ctx.moreInput && ctx.nextChar != '\n') ctx.consumeChar() } - protected final def multiLineComment(ctx: Context): Boolean = { - ctx.fastUncheckedConsumeChars(start.length) - var n = 1 - while (n != 0) { - if (ctx.input.startsWith(end, ctx.offset)) { - ctx.fastUncheckedConsumeChars(end.length) - n -= 1 - } - else if (nested && ctx.input.startsWith(start, ctx.offset)) { - ctx.fastUncheckedConsumeChars(start.length) - n += 1 - } - else if (ctx.moreInput) ctx.consumeChar() - else { - ctx.fail("end of comment") - return false - } + @tailrec private final def wellNested(ctx: Context, unmatched: Int): Boolean = { + if (unmatched == 0) true + else if (ctx.input.startsWith(end, ctx.offset)) { + ctx.fastUncheckedConsumeChars(end.length) + wellNested(ctx, unmatched - 1) } - true - } - // $COVERAGE-OFF$ - override def toString: String = "TokenComment" - // $COVERAGE-ON$ -} - -// TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later -private [internal] final class TokenSkipComments(start: String, end: String, line: String, nested: Boolean) extends TokenComment(start, end, line, nested) { - override def apply(ctx: Context): Unit = { - if (noLine && !noMulti) { - while (ctx.moreInput && ctx.input.startsWith(start, ctx.offset)) if (!multiLineComment(ctx)) return + else if (nested && ctx.input.startsWith(start, ctx.offset)) { + ctx.fastUncheckedConsumeChars(start.length) + wellNested(ctx, unmatched + 1) } - else if (noMulti && !noLine) { - while (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) singleLineComment(ctx) - } - else if (!noLine && !noMulti) { - var startsSingle = ctx.input.startsWith(line, ctx.offset) - var startsMulti = ctx.input.startsWith(start, ctx.offset) - while (ctx.moreInput && (startsSingle || startsMulti)) { - if (startsMulti) { - if (!multiLineComment(ctx)) return - } - else singleLineComment(ctx) - startsSingle = ctx.input.startsWith(line, ctx.offset) - startsMulti = ctx.input.startsWith(start, ctx.offset) - } + else if (ctx.moreInput) { + ctx.consumeChar() + wellNested(ctx, unmatched) } - ctx.pushAndContinue(()) + else false + } + + protected final def multiLineComment(ctx: Context): Boolean = { + ctx.fastUncheckedConsumeChars(start.length) + wellNested(ctx, 1) } - // $COVERAGE-OFF$ - override def toString: String = "TokenSkipComments" - // $COVERAGE-ON$ } -private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: String, line: String, nested: Boolean) - extends TokenComment(start, end, line, nested) { - override def apply(ctx: Context): Unit = { +private [instructions] abstract class WhiteSpaceLike(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) { + override final def apply(ctx: Context): Unit = { if (noLine && noMulti) spaces(ctx) else if (noLine) { spaces(ctx) while (ctx.moreInput && ctx.input.startsWith(start, ctx.offset)) { - if (!multiLineComment(ctx)) return + if (!multiLineComment(ctx)) { + ctx.fail("end of comment") + return + } spaces(ctx) } } @@ -124,7 +65,10 @@ private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: var startsMulti = ctx.input.startsWith(start, ctx.offset) while (ctx.moreInput && (startsSingle || startsMulti)) { if (startsMulti) { - if (!multiLineComment(ctx)) return + if (!multiLineComment(ctx)) { + ctx.fail("end of comment") + return + } } else singleLineComment(ctx) spaces(ctx) @@ -135,12 +79,59 @@ private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: ctx.pushAndContinue(()) } - private def spaces(ctx: Context): Unit = while (ctx.moreInput && ws(ctx.nextChar)) ctx.consumeChar() + protected def spaces(ctx: Context): Unit +} + +// TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later +private [internal] final class TokenComment(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) { + override def apply(ctx: Context): Unit = { + if (!ctx.moreInput) ctx.fail("comment") + else if (noLine && noMulti) ctx.fail() + else if (noLine) { + if (!ctx.input.startsWith(start, ctx.offset)) ctx.fail("comment") + else if (multiLineComment(ctx)) ctx.pushAndContinue(()) + else ctx.fail("end of comment") + } + else if (noMulti) { + if (!ctx.input.startsWith(line, ctx.offset)) ctx.fail("comment") + else { + singleLineComment(ctx) + ctx.pushAndContinue(()) + } + } + else { + val startsSingle = ctx.input.startsWith(line, ctx.offset) + val startsMulti = ctx.input.startsWith(start, ctx.offset) + if (!startsSingle && !startsMulti) ctx.fail("comment") + else if (startsMulti && multiLineComment(ctx)) ctx.pushAndContinue(()) + else if (startsMulti) ctx.fail("end of comment") + else { + singleLineComment(ctx) + ctx.pushAndContinue(()) + } + } + } + + // $COVERAGE-OFF$ + override def toString: String = "TokenComment" + // $COVERAGE-ON$ +} + +private [internal] final class TokenWhiteSpace(ws: TokenSet, start: String, end: String, line: String, nested: Boolean) + extends WhiteSpaceLike(start, end, line, nested) { + override def spaces(ctx: Context): Unit = while (ctx.moreInput && ws(ctx.nextChar)) ctx.consumeChar() // $COVERAGE-OFF$ override def toString: String = "TokenWhiteSpace" // $COVERAGE-ON$ } +private [internal] final class TokenSkipComments(start: String, end: String, line: String, nested: Boolean) extends WhiteSpaceLike(start, end, line, nested) { + override def spaces(ctx: Context): Unit = () + // $COVERAGE-OFF$ + override def toString: String = "TokenSkipComments" + // $COVERAGE-ON$ +} + private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[String]) extends Instr { val expected = if (_expected == null) "sign" else _expected val neg: Any => Any = ty match { From 98e792dd4b1a08c1a3692485ee1cd0e87b8439d1 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 12:52:51 +0000 Subject: [PATCH 31/34] Simplified logic of token comment --- .../internal/instructions/TokenInstrs.scala | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 6d24ec2d0..9e57e0ee3 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -85,30 +85,18 @@ private [instructions] abstract class WhiteSpaceLike(start: String, end: String, // TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later private [internal] final class TokenComment(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) { override def apply(ctx: Context): Unit = { - if (!ctx.moreInput) ctx.fail("comment") - else if (noLine && noMulti) ctx.fail() - else if (noLine) { - if (!ctx.input.startsWith(start, ctx.offset)) ctx.fail("comment") - else if (multiLineComment(ctx)) ctx.pushAndContinue(()) - else ctx.fail("end of comment") - } - else if (noMulti) { - if (!ctx.input.startsWith(line, ctx.offset)) ctx.fail("comment") - else { - singleLineComment(ctx) - ctx.pushAndContinue(()) - } - } + val startsSingle = ctx.input.startsWith(line, ctx.offset) + val startsMulti = ctx.input.startsWith(start, ctx.offset) + if (noLine && noMulti) ctx.fail() + // If neither comment is available we fail + else if (!ctx.moreInput || (!noLine && !startsSingle) && (!noMulti && !startsMulti)) ctx.fail("comment") + // One of the comments must be available + else if (!noMulti && startsMulti && multiLineComment(ctx)) ctx.pushAndContinue(()) + else if (!noMulti && startsMulti) ctx.fail("end of comment") + // It clearly wasn't the multi-line comment, so we are left with single line else { - val startsSingle = ctx.input.startsWith(line, ctx.offset) - val startsMulti = ctx.input.startsWith(start, ctx.offset) - if (!startsSingle && !startsMulti) ctx.fail("comment") - else if (startsMulti && multiLineComment(ctx)) ctx.pushAndContinue(()) - else if (startsMulti) ctx.fail("end of comment") - else { - singleLineComment(ctx) - ctx.pushAndContinue(()) - } + singleLineComment(ctx) + ctx.pushAndContinue(()) } } From 1f60cfcaf13bf6d7ea7dd232240d92b7c53c91a3 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 13:57:20 +0000 Subject: [PATCH 32/34] Finished comment refactor --- src/main/scala/parsley/Token.scala | 10 ++- .../internal/instructions/TokenInstrs.scala | 84 ++++++++++--------- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/src/main/scala/parsley/Token.scala b/src/main/scala/parsley/Token.scala index cd4a447db..91d58b1f2 100644 --- a/src/main/scala/parsley/Token.scala +++ b/src/main/scala/parsley/Token.scala @@ -47,7 +47,15 @@ final case class LanguageDef(commentStart: String, operators: Set[String], caseSensitive: Boolean, space: Impl) { - private [parsley] val supportsComments = commentStart.nonEmpty || commentEnd.nonEmpty || commentLine.nonEmpty + private [parsley] lazy val supportsComments = { + val on = (commentStart.nonEmpty && commentEnd.nonEmpty) || commentLine.nonEmpty + if (on && commentStart.nonEmpty && commentLine.startsWith(commentStart)) { + throw new IllegalArgumentException( + "multi-line comments which are a valid prefix of a single-line comment are not allowed as this causes ambiguity in the parser" + ) + } + on + } } object LanguageDef { diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 9e57e0ee3..b8fc5db57 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -8,7 +8,7 @@ import scala.annotation.{switch, tailrec} private [instructions] abstract class CommentLexer(start: String, end: String, line: String, nested: Boolean) extends Instr { protected final val noLine = line.isEmpty - protected final val noMulti = start.isEmpty + protected final val noMulti = start.isEmpty || end.isEmpty protected final def singleLineComment(ctx: Context): Unit = { ctx.fastUncheckedConsumeChars(line.length) @@ -39,60 +39,64 @@ private [instructions] abstract class CommentLexer(start: String, end: String, l } private [instructions] abstract class WhiteSpaceLike(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) { - override final def apply(ctx: Context): Unit = { - if (noLine && noMulti) spaces(ctx) - else if (noLine) { + private final def singlesOnly(ctx: Context): Unit = { + spaces(ctx) + while (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) { + singleLineComment(ctx) spaces(ctx) - while (ctx.moreInput && ctx.input.startsWith(start, ctx.offset)) { - if (!multiLineComment(ctx)) { - ctx.fail("end of comment") - return - } - spaces(ctx) - } } - else if (noMulti) { + ctx.pushAndContinue(()) + } + + @tailrec private final def multisOnly(ctx: Context): Unit = { + spaces(ctx) + val startsMulti = ctx.moreInput && ctx.input.startsWith(start, ctx.offset) + if (startsMulti && multiLineComment(ctx)) { spaces(ctx) - while (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) { + multisOnly(ctx) + } + else if (startsMulti) ctx.fail("end of comment") + else ctx.pushAndContinue(()) + } + + private val sharedPrefix = line.view.zip(start).takeWhile(Function.tupled(_ == _)).map(_._1).mkString + private val factoredStart = start.drop(sharedPrefix.length) + private val factoredLine = line.drop(sharedPrefix.length) + // PRE: Multi-line comments may not prefix single-line, but single-line may prefix multi-line + @tailrec final def singlesAndMultis(ctx: Context): Unit = { + spaces(ctx) + if (ctx.moreInput && ctx.input.startsWith(sharedPrefix, ctx.offset)) { + val startsMulti = ctx.input.startsWith(factoredStart, ctx.offset + sharedPrefix.length) + if (startsMulti && multiLineComment(ctx)) singlesAndMultis(ctx) + else if (startsMulti) ctx.fail("end of comment") + else if (ctx.input.startsWith(factoredLine, ctx.offset + sharedPrefix.length)) { singleLineComment(ctx) - spaces(ctx) + singlesAndMultis(ctx) } } - else { - spaces(ctx) - // TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later - var startsSingle = ctx.input.startsWith(line, ctx.offset) - var startsMulti = ctx.input.startsWith(start, ctx.offset) - while (ctx.moreInput && (startsSingle || startsMulti)) { - if (startsMulti) { - if (!multiLineComment(ctx)) { - ctx.fail("end of comment") - return - } - } - else singleLineComment(ctx) - spaces(ctx) - startsSingle = ctx.input.startsWith(line, ctx.offset) - startsMulti = ctx.input.startsWith(start, ctx.offset) - } - } - ctx.pushAndContinue(()) + else ctx.pushAndContinue(()) + } + + private final val impl = { + if (noLine) multisOnly(_) + else if (noMulti) singlesOnly(_) + else singlesAndMultis(_) } + override final def apply(ctx: Context): Unit = impl(ctx) protected def spaces(ctx: Context): Unit } -// TODO This is considered as a VERY rough implementation of the intrinsic, just to get it working, it will be optimised later private [internal] final class TokenComment(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) { + // PRE: one of the comments is supported + // PRE: Multi-line comments may not prefix single-line, but single-line may prefix multi-line override def apply(ctx: Context): Unit = { - val startsSingle = ctx.input.startsWith(line, ctx.offset) - val startsMulti = ctx.input.startsWith(start, ctx.offset) - if (noLine && noMulti) ctx.fail() + val startsMulti = !noMulti && ctx.input.startsWith(start, ctx.offset) // If neither comment is available we fail - else if (!ctx.moreInput || (!noLine && !startsSingle) && (!noMulti && !startsMulti)) ctx.fail("comment") + if (!ctx.moreInput || (!noLine && !ctx.input.startsWith(line, ctx.offset)) && (!noMulti && !startsMulti)) ctx.fail("comment") // One of the comments must be available - else if (!noMulti && startsMulti && multiLineComment(ctx)) ctx.pushAndContinue(()) - else if (!noMulti && startsMulti) ctx.fail("end of comment") + else if (startsMulti && multiLineComment(ctx)) ctx.pushAndContinue(()) + else if (startsMulti) ctx.fail("end of comment") // It clearly wasn't the multi-line comment, so we are left with single line else { singleLineComment(ctx) From 08807aaf3dd3b6ebbb55830c9b667c332834a7b1 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 17:21:30 +0000 Subject: [PATCH 33/34] Split up the token instructions --- .../internal/instructions/TokenInstrs.scala | 378 +----------------- .../instructions/TokenNumericInstrs.scala | 135 +++++++ .../instructions/TokenStringInstrs.scala | 240 +++++++++++ 3 files changed, 381 insertions(+), 372 deletions(-) create mode 100644 src/main/scala/parsley/internal/instructions/TokenNumericInstrs.scala create mode 100644 src/main/scala/parsley/internal/instructions/TokenStringInstrs.scala diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index b8fc5db57..3d8263433 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -4,7 +4,7 @@ import parsley.internal.deepembedding.Sign.{SignType, IntType, DoubleType} import parsley.TokenParser.TokenSet import parsley.internal.{Radix, UnsafeOption} -import scala.annotation.{switch, tailrec} +import scala.annotation.tailrec private [instructions] abstract class CommentLexer(start: String, end: String, line: String, nested: Boolean) extends Instr { protected final val noLine = line.isEmpty @@ -39,22 +39,19 @@ private [instructions] abstract class CommentLexer(start: String, end: String, l } private [instructions] abstract class WhiteSpaceLike(start: String, end: String, line: String, nested: Boolean) extends CommentLexer(start, end, line, nested) { - private final def singlesOnly(ctx: Context): Unit = { + @tailrec private final def singlesOnly(ctx: Context): Unit = { spaces(ctx) - while (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) { + if (ctx.moreInput && ctx.input.startsWith(line, ctx.offset)) { singleLineComment(ctx) - spaces(ctx) + singlesOnly(ctx) } - ctx.pushAndContinue(()) + else ctx.pushAndContinue(()) } @tailrec private final def multisOnly(ctx: Context): Unit = { spaces(ctx) val startsMulti = ctx.moreInput && ctx.input.startsWith(start, ctx.offset) - if (startsMulti && multiLineComment(ctx)) { - spaces(ctx) - multisOnly(ctx) - } + if (startsMulti && multiLineComment(ctx)) multisOnly(ctx) else if (startsMulti) ctx.fail("end of comment") else ctx.pushAndContinue(()) } @@ -124,369 +121,6 @@ private [internal] final class TokenSkipComments(start: String, end: String, lin // $COVERAGE-ON$ } -private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[String]) extends Instr { - val expected = if (_expected == null) "sign" else _expected - val neg: Any => Any = ty match { - case IntType => ((x: Int) => -x).asInstanceOf[Any => Any] - case DoubleType => ((x: Double) => -x).asInstanceOf[Any => Any] - } - val pos = (x: Any) => x - - override def apply(ctx: Context): Unit = { - if (ctx.moreInput && ctx.nextChar == '-') { - ctx.fastUncheckedConsumeChars(1) - ctx.stack.push(neg) - } - else if (ctx.moreInput && ctx.nextChar == '+') { - ctx.fastUncheckedConsumeChars(1) - ctx.stack.push(pos) - } - else ctx.stack.push(pos) - ctx.inc() - } - - // $COVERAGE-OFF$ - override def toString: String = "TokenSign" - // $COVERAGE-ON$ -} - -private [instructions] sealed trait NumericReader { - private final def subDecimal(base: Int, isDigit: Char => Boolean): (Context, Int, Boolean) => Option[Int] = { - @tailrec def go(ctx: Context, x: Int, first: Boolean): Option[Int] = { - if (ctx.moreInput && isDigit(ctx.nextChar)) { - val d = ctx.nextChar.asDigit - ctx.fastUncheckedConsumeChars(1) - go(ctx, x * base + d, false) - } - else if (first) None - else Some(x) - } - go - } - protected final val decimal = subDecimal(10, _.isDigit) - protected final val octal = subDecimal(8, parsley.Char.isOctDigit) - protected final val hexadecimal = subDecimal(16, parsley.Char.isHexDigit) -} - -private [internal] final class TokenNatural(_expected: UnsafeOption[String]) extends Instr with NumericReader { - val expected = if (_expected == null) "natural" else _expected - override def apply(ctx: Context): Unit = { - if (ctx.moreInput && ctx.nextChar == '0') { - ctx.fastUncheckedConsumeChars(1) - lazy val hexa = ctx.nextChar == 'x' || ctx.nextChar == 'X' - lazy val octa = ctx.nextChar == 'o' || ctx.nextChar == 'O' - if (ctx.moreInput && (hexa || octa)) { - ctx.fastUncheckedConsumeChars(1) - (if (hexa) hexadecimal else octal)(ctx, 0, true) match { - case Some(x) => ctx.pushAndContinue(x) - case None => ctx.fail(expected) - } - } - else ctx.pushAndContinue(decimal(ctx, 0, true).getOrElse(0)) - } - else decimal(ctx, 0, true) match { - case Some(x) => ctx.pushAndContinue(x) - case None => ctx.fail(expected) - } - } - - // $COVERAGE-OFF$ - override def toString: String = "TokenNatural" - // $COVERAGE-ON$ -} - -private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr { - val expected = if (_expected == null) "unsigned float" else _expected - override def apply(ctx: Context): Unit = { - val builder = new StringBuilder() - if (decimal(ctx, builder)) { - lexFraction(ctx, builder) - } - else ctx.fail(expected) - } - - @tailrec private final def decimal(ctx: Context, builder: StringBuilder, first: Boolean = true): Boolean = { - if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '9') { - builder += ctx.nextChar - ctx.fastUncheckedConsumeChars(1) - decimal(ctx, builder, false) - } - else !first - } - - private final def exponent(ctx: Context, builder: StringBuilder): Boolean = { - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput && ctx.nextChar == '+') ctx.fastUncheckedConsumeChars(1) - else if (ctx.moreInput && ctx.nextChar == '-') { - ctx.fastUncheckedConsumeChars(1) - builder += '-' - } - decimal(ctx, builder) - } - - private final def attemptCastAndContinue(ctx: Context, builder: StringBuilder): Unit = { - try ctx.pushAndContinue(builder.toString.toDouble) - catch { - case _: NumberFormatException => ctx.fail(expected) - } - } - - private final def lexExponent(ctx: Context, builder: StringBuilder, missingOk: Boolean): Unit = { - val requireExponent = ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E') - if (requireExponent && exponent(ctx, builder += 'e')) attemptCastAndContinue(ctx, builder) - else if (requireExponent) ctx.fail(expected) - else if (missingOk) attemptCastAndContinue(ctx, builder) - else ctx.fail(expected) - } - - private final def lexFraction(ctx: Context, builder: StringBuilder) = { - if (ctx.moreInput && ctx.nextChar == '.') { - ctx.fastUncheckedConsumeChars(1) - if (decimal(ctx, builder += '.')) lexExponent(ctx, builder, missingOk = true) - else ctx.fail(expected) - } - else lexExponent(ctx, builder, missingOk = false) - } - - // $COVERAGE-OFF$ - override def toString: String = "TokenFloat" - // $COVERAGE-ON$ -} - -private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with NumericReader { - private [this] final val expected = if (_expected == null) "escape code" else _expected - override def apply(ctx: Context): Unit = escape(ctx) match { - case TokenEscape.EscapeChar(escapeChar) =>ctx.pushAndContinue(escapeChar) - case TokenEscape.BadCode => ctx.failWithMessage(expected, msg = "invalid escape sequence") - case TokenEscape.NoParse => ctx.fail(expected) - } - - private final def consumeAndReturn(ctx: Context, n: Int, c: Char) = { - ctx.fastUncheckedConsumeChars(n) - new TokenEscape.EscapeChar(c) - } - - private final def lookAhead(ctx: Context, n: Int): Char = ctx.input(ctx.offset + n) - private final def lookAhead(ctx: Context, n: Int, c: Char): Boolean = ctx.offset + n < ctx.inputsz && lookAhead(ctx, n) == c - - private final def numericEscape(ctx: Context, escapeCode: Int) = { - if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) - else TokenEscape.BadCode - } - - private final def nonDecimalNumericEscape(ctx: Context, lexer: (Context, Int, Boolean) => Option[Int]) = { - ctx.fastUncheckedConsumeChars(1) - lexer(ctx, 0, true) match { - case Some(x) => numericEscape(ctx, x) - case None => TokenEscape.NoParse - } - } - - private final def decimalEscape(ctx: Context, d: Int) = { - ctx.fastUncheckedConsumeChars(1) - numericEscape(ctx, decimal(ctx, d, false).get) - } - private final def hexadecimalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, hexadecimal) - private final def octalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, octal) - private final def caretEscape(ctx: Context) = { - ctx.fastUncheckedConsumeChars(1) - if (ctx.moreInput && ctx.nextChar >= 'A' && ctx.nextChar <= 'Z') consumeAndReturn(ctx, 1, (ctx.nextChar - 'A' + 1).toChar) - else TokenEscape.NoParse - } - - protected final def escape(ctx: Context): TokenEscape.Escape = { - val threeAvailable = ctx.offset + 2 < ctx.inputsz - if (ctx.moreInput) { - ctx.nextChar match { - case 'a' => consumeAndReturn(ctx, 1, '\u0007') - case 'b' => consumeAndReturn(ctx, 1, '\b') - case 'f' => consumeAndReturn(ctx, 1, '\u000c') - case 'n' => consumeAndReturn(ctx, 1, '\n') - case 'r' => consumeAndReturn(ctx, 1, '\r') - case 't' => consumeAndReturn(ctx, 1, '\t') - case 'v' => consumeAndReturn(ctx, 1, '\u000b') - case '\\' => consumeAndReturn(ctx, 1, '\\') - case '\"' => consumeAndReturn(ctx, 1, '\"') - case '\'' => consumeAndReturn(ctx, 1, '\'') - case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => decimalEscape(ctx, d.asDigit) - case 'x' => hexadecimalEscape(ctx) - case 'o' => octalEscape(ctx) - case '^' => caretEscape(ctx) - case 'A' if threeAvailable && lookAhead(ctx, 1) == 'C' && lookAhead(ctx, 2) == 'K' => consumeAndReturn(ctx, 3, '\u0006') //ACK - case 'B' => //BS BEL - if (lookAhead(ctx, 1, 'S')) consumeAndReturn(ctx, 2, '\u0008') - else if (lookAhead(ctx, 2, 'L') && lookAhead(ctx, 1) == 'E') { - consumeAndReturn(ctx, 3, '\u0007') - } - else TokenEscape.NoParse - case 'C' => //CR CAN - if (lookAhead(ctx, 1, 'R')) consumeAndReturn(ctx, 2, '\u000d') - else if (lookAhead(ctx, 2, 'N') && lookAhead(ctx, 1) == 'A') consumeAndReturn(ctx, 3, '\u0018') - else TokenEscape.NoParse - case 'D' if threeAvailable => //DC1 DC2 DC3 DC4 DEL DLE - val c = lookAhead(ctx, 2) - lookAhead(ctx, 1) match { - case 'C' if c == '1' => consumeAndReturn(ctx, 3, '\u0011') - case 'C' if c == '2' => consumeAndReturn(ctx, 3, '\u0012') - case 'C' if c == '3' => consumeAndReturn(ctx, 3, '\u0013') - case 'C' if c == '4' => consumeAndReturn(ctx, 3, '\u0014') - case 'E' if c == 'L' => consumeAndReturn(ctx, 3, '\u001f') - case 'L' if c == 'E' => consumeAndReturn(ctx, 3, '\u0010') - case _ => TokenEscape.NoParse - } - case 'E' => //EM ETX ETB ESC EOT ENQ - if (lookAhead(ctx, 1, 'M')) consumeAndReturn(ctx, 2, '\u0019') - else if (threeAvailable) lookAhead(ctx, 1) match { - case 'N' if lookAhead(ctx, 2) == 'Q' => consumeAndReturn(ctx, 3, '\u0005') - case 'O' if lookAhead(ctx, 2) == 'T' => consumeAndReturn(ctx, 3, '\u0004') - case 'S' if lookAhead(ctx, 2) == 'C' => consumeAndReturn(ctx, 3, '\u001b') - case 'T' if lookAhead(ctx, 2) == 'X' => consumeAndReturn(ctx, 3, '\u0003') - case 'T' if lookAhead(ctx, 2) == 'B' => consumeAndReturn(ctx, 3, '\u0017') - case _ => TokenEscape.NoParse - } - else TokenEscape.NoParse - case 'F' => //FF FS - if (lookAhead(ctx, 1, 'F')) consumeAndReturn(ctx, 2, '\u000c') - else if (lookAhead(ctx, 1, 'S')) consumeAndReturn(ctx, 2, '\u001c') - else TokenEscape.NoParse - case 'G' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001d') //GS - case 'H' if lookAhead(ctx, 1, 'T') => consumeAndReturn(ctx, 2, '\u0009') //HT - case 'L' if lookAhead(ctx, 1, 'F') => consumeAndReturn(ctx, 2, '\n') //LF - case 'N' => //NUL NAK - if (threeAvailable && lookAhead(ctx, 1) == 'U' && lookAhead(ctx, 2) == 'L') consumeAndReturn(ctx, 3, '\u0000') - else if (threeAvailable && lookAhead(ctx, 1) == 'A' && lookAhead(ctx, 2) == 'K') { - consumeAndReturn(ctx, 3, '\u0015') - } - else TokenEscape.NoParse - case 'R' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001e') //RS - case 'S' => //SO SI SP SOH STX SYN SUB - if (lookAhead(ctx, 1, 'O')) consumeAndReturn(ctx, 2, '\u000e') - else if (lookAhead(ctx, 1, 'I')) consumeAndReturn(ctx, 2, '\u000f') - else if (lookAhead(ctx, 1, 'P')) consumeAndReturn(ctx, 2, '\u0020') - else if (threeAvailable) lookAhead(ctx, 1) match { - case 'O' if lookAhead(ctx, 2) == 'H' => consumeAndReturn(ctx, 3, '\u0001') - case 'T' if lookAhead(ctx, 2) == 'X' => consumeAndReturn(ctx, 3, '\u0002') - case 'Y' if lookAhead(ctx, 2) == 'N' => consumeAndReturn(ctx, 3, '\u0016') - case 'U' if lookAhead(ctx, 2) == 'B' => consumeAndReturn(ctx, 3, '\u001a') - case _ => TokenEscape.NoParse - } - else TokenEscape.NoParse - case 'U' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001f') //US - case 'V' if lookAhead(ctx, 1, 'T') => consumeAndReturn(ctx, 2, '\u000b') //VT - case _ => TokenEscape.NoParse - } - } - else TokenEscape.NoParse - } - - // $COVERAGE-OFF$ - override def toString: String = "TokenEscape" - // $COVERAGE-ON$ -} -object TokenEscape { - private [instructions] sealed trait Escape - private [instructions] case class EscapeChar(escapeChar: Char) extends Escape - private [instructions] case object BadCode extends Escape - private [instructions] case object NoParse extends Escape -} - -private [instructions] sealed trait TokenStringLike extends Instr { - protected val expected: UnsafeOption[String] - final protected lazy val expectedString = if (expected == null) "string" else expected - final protected lazy val expectedEos = if (expected == null) "end of string" else expected - final protected lazy val expectedChar = if (expected == null) "string character" else expected - - // All failures must be handled by this function - protected def handleEscaped(ctx: Context, builder: StringBuilder): Boolean - @tailrec private final def restOfString(ctx: Context, builder: StringBuilder): Unit = { - if (ctx.moreInput) ctx.nextChar match { - case '"' => - ctx.fastUncheckedConsumeChars(1) - ctx.pushAndContinue(builder.toString) - case '\\' => - ctx.fastUncheckedConsumeChars(1) - if (handleEscaped(ctx, builder)) restOfString(ctx, builder) - case c if c > '\u0016' => - builder += c - ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, builder) - case _ => ctx.fail(expectedChar) - } - else ctx.fail(expectedEos) - } - final override def apply(ctx: Context): Unit = { - if (ctx.moreInput && ctx.nextChar == '"') { - ctx.fastUncheckedConsumeChars(1) - restOfString(ctx, new StringBuilder()) - } - else ctx.fail(expectedString) - } -} - -private [internal] final class TokenRawString(_expected: UnsafeOption[String]) extends TokenStringLike { - override val expected = _expected - override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { - builder += '\\' - if (ctx.moreInput && ctx.nextChar > '\u0016') { - builder += ctx.nextChar - ctx.fastUncheckedConsumeChars(1) - true - } - else { - ctx.fail(expectedChar) - false - } - } - - // $COVERAGE-OFF$ - override def toString: String = "TokenRawString" - // $COVERAGE-ON$ -} - -private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption[String]) extends TokenEscape(_expected) with TokenStringLike { - override val expected = _expected - private val expectedEscape = if (_expected == null) "escape code" else _expected - private val expectedGap = if (_expected == null) "end of string gap" else _expected - - private def readGap(ctx: Context): Boolean = { - val completedGap = ctx.moreInput && ctx.nextChar == '\\' - if (completedGap) ctx.fastUncheckedConsumeChars(1) - else ctx.fail(expectedGap) - completedGap - } - - override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { - if (spaces(ctx) != 0) readGap(ctx) - else if (ctx.moreInput && ctx.nextChar == '&') { - ctx.fastUncheckedConsumeChars(1) - true - } - else escape(ctx) match { - case TokenEscape.EscapeChar(c) => - builder += c - true - case TokenEscape.BadCode => - ctx.failWithMessage(expectedEscape, "invalid escape sequence") - false - case TokenEscape.NoParse => - ctx.fail(expectedEscape) - false - } - } - - @tailrec private def spaces(ctx: Context, n: Int = 0): Int = { - if (ctx.moreInput && ws(ctx.nextChar)) { - ctx.consumeChar() - spaces(ctx, n + 1) - } - else n - } - - // $COVERAGE-OFF$ - override def toString: String = "TokenString" - // $COVERAGE-ON$ -} - private [instructions] abstract class TokenLexi(name: String, illegalName: String) (start: TokenSet, letter: TokenSet, illegal: String => Boolean, _expected: UnsafeOption[String]) extends Instr { private val expected = if (_expected == null) name else _expected diff --git a/src/main/scala/parsley/internal/instructions/TokenNumericInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenNumericInstrs.scala new file mode 100644 index 000000000..34c58509a --- /dev/null +++ b/src/main/scala/parsley/internal/instructions/TokenNumericInstrs.scala @@ -0,0 +1,135 @@ +package parsley.internal.instructions + +import parsley.internal.deepembedding.Sign.{SignType, IntType, DoubleType} +import parsley.internal.UnsafeOption + +import scala.annotation.tailrec + +private [internal] final class TokenSign(ty: SignType, _expected: UnsafeOption[String]) extends Instr { + val expected = if (_expected == null) "sign" else _expected + val neg: Any => Any = ty match { + case IntType => ((x: Int) => -x).asInstanceOf[Any => Any] + case DoubleType => ((x: Double) => -x).asInstanceOf[Any => Any] + } + val pos = (x: Any) => x + + override def apply(ctx: Context): Unit = { + if (ctx.moreInput && ctx.nextChar == '-') { + ctx.fastUncheckedConsumeChars(1) + ctx.stack.push(neg) + } + else if (ctx.moreInput && ctx.nextChar == '+') { + ctx.fastUncheckedConsumeChars(1) + ctx.stack.push(pos) + } + else ctx.stack.push(pos) + ctx.inc() + } + + // $COVERAGE-OFF$ + override def toString: String = "TokenSign" + // $COVERAGE-ON$ +} + +private [instructions] trait NumericReader { + private final def subDecimal(base: Int, isDigit: Char => Boolean): (Context, Int, Boolean) => Option[Int] = { + @tailrec def go(ctx: Context, x: Int, first: Boolean): Option[Int] = { + if (ctx.moreInput && isDigit(ctx.nextChar)) { + val d = ctx.nextChar.asDigit + ctx.fastUncheckedConsumeChars(1) + go(ctx, x * base + d, false) + } + else if (first) None + else Some(x) + } + go + } + protected final val decimal = subDecimal(10, _.isDigit) + protected final val octal = subDecimal(8, parsley.Char.isOctDigit) + protected final val hexadecimal = subDecimal(16, parsley.Char.isHexDigit) +} + +private [internal] final class TokenNatural(_expected: UnsafeOption[String]) extends Instr with NumericReader { + val expected = if (_expected == null) "natural" else _expected + override def apply(ctx: Context): Unit = { + if (ctx.moreInput && ctx.nextChar == '0') { + ctx.fastUncheckedConsumeChars(1) + lazy val hexa = ctx.nextChar == 'x' || ctx.nextChar == 'X' + lazy val octa = ctx.nextChar == 'o' || ctx.nextChar == 'O' + if (ctx.moreInput && (hexa || octa)) { + ctx.fastUncheckedConsumeChars(1) + (if (hexa) hexadecimal else octal)(ctx, 0, true) match { + case Some(x) => ctx.pushAndContinue(x) + case None => ctx.fail(expected) + } + } + else ctx.pushAndContinue(decimal(ctx, 0, true).getOrElse(0)) + } + else decimal(ctx, 0, true) match { + case Some(x) => ctx.pushAndContinue(x) + case None => ctx.fail(expected) + } + } + + // $COVERAGE-OFF$ + override def toString: String = "TokenNatural" + // $COVERAGE-ON$ +} + +private [internal] final class TokenFloat(_expected: UnsafeOption[String]) extends Instr { + val expected = if (_expected == null) "unsigned float" else _expected + override def apply(ctx: Context): Unit = { + val builder = new StringBuilder() + if (decimal(ctx, builder)) { + lexFraction(ctx, builder) + } + else ctx.fail(expected) + } + + @tailrec private final def decimal(ctx: Context, builder: StringBuilder, first: Boolean = true): Boolean = { + if (ctx.moreInput && ctx.nextChar >= '0' && ctx.nextChar <= '9') { + builder += ctx.nextChar + ctx.fastUncheckedConsumeChars(1) + decimal(ctx, builder, false) + } + else !first + } + + private final def exponent(ctx: Context, builder: StringBuilder): Boolean = { + ctx.fastUncheckedConsumeChars(1) + if (ctx.moreInput && ctx.nextChar == '+') ctx.fastUncheckedConsumeChars(1) + else if (ctx.moreInput && ctx.nextChar == '-') { + ctx.fastUncheckedConsumeChars(1) + builder += '-' + } + decimal(ctx, builder) + } + + private final def attemptCastAndContinue(ctx: Context, builder: StringBuilder): Unit = { + try ctx.pushAndContinue(builder.toString.toDouble) + catch { + case _: NumberFormatException => ctx.fail(expected) + } + } + + private final def lexExponent(ctx: Context, builder: StringBuilder, missingOk: Boolean): Unit = { + val requireExponent = ctx.moreInput && (ctx.nextChar == 'e' || ctx.nextChar == 'E') + if (requireExponent && exponent(ctx, builder += 'e')) attemptCastAndContinue(ctx, builder) + else if (requireExponent) ctx.fail(expected) + else if (missingOk) attemptCastAndContinue(ctx, builder) + else ctx.fail(expected) + } + + private final def lexFraction(ctx: Context, builder: StringBuilder) = { + if (ctx.moreInput && ctx.nextChar == '.') { + ctx.fastUncheckedConsumeChars(1) + if (decimal(ctx, builder += '.')) lexExponent(ctx, builder, missingOk = true) + else ctx.fail(expected) + } + else lexExponent(ctx, builder, missingOk = false) + } + + // $COVERAGE-OFF$ + override def toString: String = "TokenFloat" + // $COVERAGE-ON$ +} \ No newline at end of file diff --git a/src/main/scala/parsley/internal/instructions/TokenStringInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenStringInstrs.scala new file mode 100644 index 000000000..499d18568 --- /dev/null +++ b/src/main/scala/parsley/internal/instructions/TokenStringInstrs.scala @@ -0,0 +1,240 @@ +package parsley.internal.instructions + +import parsley.internal.UnsafeOption +import parsley.TokenParser.TokenSet + +import scala.annotation.tailrec + +private [internal] class TokenEscape(_expected: UnsafeOption[String]) extends Instr with NumericReader { + private [this] final val expected = if (_expected == null) "escape code" else _expected + override def apply(ctx: Context): Unit = escape(ctx) match { + case TokenEscape.EscapeChar(escapeChar) =>ctx.pushAndContinue(escapeChar) + case TokenEscape.BadCode => ctx.failWithMessage(expected, msg = "invalid escape sequence") + case TokenEscape.NoParse => ctx.fail(expected) + } + + private final def consumeAndReturn(ctx: Context, n: Int, c: Char) = { + ctx.fastUncheckedConsumeChars(n) + new TokenEscape.EscapeChar(c) + } + + private final def lookAhead(ctx: Context, n: Int): Char = ctx.input(ctx.offset + n) + private final def lookAhead(ctx: Context, n: Int, c: Char): Boolean = ctx.offset + n < ctx.inputsz && lookAhead(ctx, n) == c + + private final def numericEscape(ctx: Context, escapeCode: Int) = { + if (escapeCode <= 0x10FFFF) new TokenEscape.EscapeChar(escapeCode.toChar) + else TokenEscape.BadCode + } + + private final def nonDecimalNumericEscape(ctx: Context, lexer: (Context, Int, Boolean) => Option[Int]) = { + ctx.fastUncheckedConsumeChars(1) + lexer(ctx, 0, true) match { + case Some(x) => numericEscape(ctx, x) + case None => TokenEscape.NoParse + } + } + + private final def decimalEscape(ctx: Context, d: Int) = { + ctx.fastUncheckedConsumeChars(1) + numericEscape(ctx, decimal(ctx, d, false).get) + } + private final def hexadecimalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, hexadecimal) + private final def octalEscape(ctx: Context) = nonDecimalNumericEscape(ctx, octal) + private final def caretEscape(ctx: Context) = { + ctx.fastUncheckedConsumeChars(1) + if (ctx.moreInput && ctx.nextChar >= 'A' && ctx.nextChar <= 'Z') consumeAndReturn(ctx, 1, (ctx.nextChar - 'A' + 1).toChar) + else TokenEscape.NoParse + } + + protected final def escape(ctx: Context): TokenEscape.Escape = { + val threeAvailable = ctx.offset + 2 < ctx.inputsz + if (ctx.moreInput) { + ctx.nextChar match { + case 'a' => consumeAndReturn(ctx, 1, '\u0007') + case 'b' => consumeAndReturn(ctx, 1, '\b') + case 'f' => consumeAndReturn(ctx, 1, '\u000c') + case 'n' => consumeAndReturn(ctx, 1, '\n') + case 'r' => consumeAndReturn(ctx, 1, '\r') + case 't' => consumeAndReturn(ctx, 1, '\t') + case 'v' => consumeAndReturn(ctx, 1, '\u000b') + case '\\' => consumeAndReturn(ctx, 1, '\\') + case '\"' => consumeAndReturn(ctx, 1, '\"') + case '\'' => consumeAndReturn(ctx, 1, '\'') + case d@('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => decimalEscape(ctx, d.asDigit) + case 'x' => hexadecimalEscape(ctx) + case 'o' => octalEscape(ctx) + case '^' => caretEscape(ctx) + case 'A' if threeAvailable && lookAhead(ctx, 1) == 'C' && lookAhead(ctx, 2) == 'K' => consumeAndReturn(ctx, 3, '\u0006') //ACK + case 'B' => //BS BEL + if (lookAhead(ctx, 1, 'S')) consumeAndReturn(ctx, 2, '\u0008') + else if (lookAhead(ctx, 2, 'L') && lookAhead(ctx, 1) == 'E') { + consumeAndReturn(ctx, 3, '\u0007') + } + else TokenEscape.NoParse + case 'C' => //CR CAN + if (lookAhead(ctx, 1, 'R')) consumeAndReturn(ctx, 2, '\u000d') + else if (lookAhead(ctx, 2, 'N') && lookAhead(ctx, 1) == 'A') consumeAndReturn(ctx, 3, '\u0018') + else TokenEscape.NoParse + case 'D' if threeAvailable => //DC1 DC2 DC3 DC4 DEL DLE + val c = lookAhead(ctx, 2) + lookAhead(ctx, 1) match { + case 'C' if c == '1' => consumeAndReturn(ctx, 3, '\u0011') + case 'C' if c == '2' => consumeAndReturn(ctx, 3, '\u0012') + case 'C' if c == '3' => consumeAndReturn(ctx, 3, '\u0013') + case 'C' if c == '4' => consumeAndReturn(ctx, 3, '\u0014') + case 'E' if c == 'L' => consumeAndReturn(ctx, 3, '\u001f') + case 'L' if c == 'E' => consumeAndReturn(ctx, 3, '\u0010') + case _ => TokenEscape.NoParse + } + case 'E' => //EM ETX ETB ESC EOT ENQ + if (lookAhead(ctx, 1, 'M')) consumeAndReturn(ctx, 2, '\u0019') + else if (threeAvailable) lookAhead(ctx, 1) match { + case 'N' if lookAhead(ctx, 2) == 'Q' => consumeAndReturn(ctx, 3, '\u0005') + case 'O' if lookAhead(ctx, 2) == 'T' => consumeAndReturn(ctx, 3, '\u0004') + case 'S' if lookAhead(ctx, 2) == 'C' => consumeAndReturn(ctx, 3, '\u001b') + case 'T' if lookAhead(ctx, 2) == 'X' => consumeAndReturn(ctx, 3, '\u0003') + case 'T' if lookAhead(ctx, 2) == 'B' => consumeAndReturn(ctx, 3, '\u0017') + case _ => TokenEscape.NoParse + } + else TokenEscape.NoParse + case 'F' => //FF FS + if (lookAhead(ctx, 1, 'F')) consumeAndReturn(ctx, 2, '\u000c') + else if (lookAhead(ctx, 1, 'S')) consumeAndReturn(ctx, 2, '\u001c') + else TokenEscape.NoParse + case 'G' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001d') //GS + case 'H' if lookAhead(ctx, 1, 'T') => consumeAndReturn(ctx, 2, '\u0009') //HT + case 'L' if lookAhead(ctx, 1, 'F') => consumeAndReturn(ctx, 2, '\n') //LF + case 'N' => //NUL NAK + if (threeAvailable && lookAhead(ctx, 1) == 'U' && lookAhead(ctx, 2) == 'L') consumeAndReturn(ctx, 3, '\u0000') + else if (threeAvailable && lookAhead(ctx, 1) == 'A' && lookAhead(ctx, 2) == 'K') { + consumeAndReturn(ctx, 3, '\u0015') + } + else TokenEscape.NoParse + case 'R' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001e') //RS + case 'S' => //SO SI SP SOH STX SYN SUB + if (lookAhead(ctx, 1, 'O')) consumeAndReturn(ctx, 2, '\u000e') + else if (lookAhead(ctx, 1, 'I')) consumeAndReturn(ctx, 2, '\u000f') + else if (lookAhead(ctx, 1, 'P')) consumeAndReturn(ctx, 2, '\u0020') + else if (threeAvailable) lookAhead(ctx, 1) match { + case 'O' if lookAhead(ctx, 2) == 'H' => consumeAndReturn(ctx, 3, '\u0001') + case 'T' if lookAhead(ctx, 2) == 'X' => consumeAndReturn(ctx, 3, '\u0002') + case 'Y' if lookAhead(ctx, 2) == 'N' => consumeAndReturn(ctx, 3, '\u0016') + case 'U' if lookAhead(ctx, 2) == 'B' => consumeAndReturn(ctx, 3, '\u001a') + case _ => TokenEscape.NoParse + } + else TokenEscape.NoParse + case 'U' if lookAhead(ctx, 1, 'S') => consumeAndReturn(ctx, 2, '\u001f') //US + case 'V' if lookAhead(ctx, 1, 'T') => consumeAndReturn(ctx, 2, '\u000b') //VT + case _ => TokenEscape.NoParse + } + } + else TokenEscape.NoParse + } + + // $COVERAGE-OFF$ + override def toString: String = "TokenEscape" + // $COVERAGE-ON$ +} +object TokenEscape { + private [instructions] sealed trait Escape + private [instructions] case class EscapeChar(escapeChar: Char) extends Escape + private [instructions] case object BadCode extends Escape + private [instructions] case object NoParse extends Escape +} + +private [instructions] sealed trait TokenStringLike extends Instr { + protected val expected: UnsafeOption[String] + final protected lazy val expectedString = if (expected == null) "string" else expected + final protected lazy val expectedEos = if (expected == null) "end of string" else expected + final protected lazy val expectedChar = if (expected == null) "string character" else expected + + // All failures must be handled by this function + protected def handleEscaped(ctx: Context, builder: StringBuilder): Boolean + @tailrec private final def restOfString(ctx: Context, builder: StringBuilder): Unit = { + if (ctx.moreInput) ctx.nextChar match { + case '"' => + ctx.fastUncheckedConsumeChars(1) + ctx.pushAndContinue(builder.toString) + case '\\' => + ctx.fastUncheckedConsumeChars(1) + if (handleEscaped(ctx, builder)) restOfString(ctx, builder) + case c if c > '\u0016' => + builder += c + ctx.fastUncheckedConsumeChars(1) + restOfString(ctx, builder) + case _ => ctx.fail(expectedChar) + } + else ctx.fail(expectedEos) + } + final override def apply(ctx: Context): Unit = { + if (ctx.moreInput && ctx.nextChar == '"') { + ctx.fastUncheckedConsumeChars(1) + restOfString(ctx, new StringBuilder()) + } + else ctx.fail(expectedString) + } +} + +private [internal] final class TokenRawString(_expected: UnsafeOption[String]) extends TokenStringLike { + override val expected = _expected + override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { + builder += '\\' + if (ctx.moreInput && ctx.nextChar > '\u0016') { + builder += ctx.nextChar + ctx.fastUncheckedConsumeChars(1) + true + } + else { + ctx.fail(expectedChar) + false + } + } + + // $COVERAGE-OFF$ + override def toString: String = "TokenRawString" + // $COVERAGE-ON$ +} + +private [internal] final class TokenString(ws: TokenSet, _expected: UnsafeOption[String]) extends TokenEscape(_expected) with TokenStringLike { + override val expected = _expected + private val expectedEscape = if (_expected == null) "escape code" else _expected + private val expectedGap = if (_expected == null) "end of string gap" else _expected + + private def readGap(ctx: Context): Boolean = { + val completedGap = ctx.moreInput && ctx.nextChar == '\\' + if (completedGap) ctx.fastUncheckedConsumeChars(1) + else ctx.fail(expectedGap) + completedGap + } + + override def handleEscaped(ctx: Context, builder: StringBuilder): Boolean = { + if (spaces(ctx) != 0) readGap(ctx) + else if (ctx.moreInput && ctx.nextChar == '&') { + ctx.fastUncheckedConsumeChars(1) + true + } + else escape(ctx) match { + case TokenEscape.EscapeChar(c) => + builder += c + true + case TokenEscape.BadCode => + ctx.failWithMessage(expectedEscape, "invalid escape sequence") + false + case TokenEscape.NoParse => + ctx.fail(expectedEscape) + false + } + } + + @tailrec private def spaces(ctx: Context, n: Int = 0): Int = { + if (ctx.moreInput && ws(ctx.nextChar)) { + ctx.consumeChar() + spaces(ctx, n + 1) + } + else n + } + + // $COVERAGE-OFF$ + override def toString: String = "TokenString" + // $COVERAGE-ON$ +} From 6d79f01ab3493d21e5426c82811c16465bccf41f Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Tue, 5 Jan 2021 20:13:12 +0000 Subject: [PATCH 34/34] Removed a bunch of redundant AST Nodes and instructions --- src/main/scala/parsley/Token.scala | 46 ++++++++--------- .../deepembedding/AlternativeEmbedding.scala | 5 +- .../deepembedding/TokenEmbedding.scala | 50 ++++++++----------- .../internal/instructions/TokenInstrs.scala | 41 +++++---------- 4 files changed, 57 insertions(+), 85 deletions(-) diff --git a/src/main/scala/parsley/Token.scala b/src/main/scala/parsley/Token.scala index 91d58b1f2..6ab33e626 100644 --- a/src/main/scala/parsley/Token.scala +++ b/src/main/scala/parsley/Token.scala @@ -114,14 +114,18 @@ object BitGen */ final class TokenParser(lang: LanguageDef) { - private def keyOrOp(startImpl: Impl, letterImpl: Impl, parser: Parsley[String], predicate: String => Boolean, name: String, - builder: (TokenSet, TokenSet) => deepembedding.Parsley[String]) = (startImpl, letterImpl) match - { - case (BitSetImpl(start), BitSetImpl(letter)) => lexeme(new Parsley(builder(start, letter))) - case (BitSetImpl(start), Predicate(letter)) => lexeme(new Parsley(builder(start, letter))) - case (Predicate(start), BitSetImpl(letter)) => lexeme(new Parsley(builder(start, letter))) - case (Predicate(start), Predicate(letter)) => lexeme(new Parsley(builder(start, letter))) - case _ => lexeme(attempt(parser.guard(predicate, s"unexpected $name " + _))) + private def keyOrOp(startImpl: Impl, letterImpl: Impl, parser: Parsley[String], predicate: String => Boolean, + combinatorName: String, name: String, illegalName: String) = { + val builder = (start: TokenSet, letter: TokenSet) => + new Parsley(new deepembedding.NonSpecific(combinatorName, name, illegalName, start, letter, !predicate(_))) + lexeme((startImpl, letterImpl) match + { + case (BitSetImpl(start), BitSetImpl(letter)) => builder(start, letter) + case (BitSetImpl(start), Predicate(letter)) => builder(start, letter) + case (Predicate(start), BitSetImpl(letter)) => builder(start, letter) + case (Predicate(start), Predicate(letter)) => builder(start, letter) + case _ => attempt((parser ? name).guard(predicate, s"unexpected $illegalName " + _)) + }) } // Identifiers & Reserved words @@ -129,15 +133,14 @@ final class TokenParser(lang: LanguageDef) * fail on identifiers that are reserved words (i.e. keywords). Legal identifier characters and * keywords are defined in the `LanguageDef` provided to the token parser. An identifier is treated * as a single token using `attempt`.*/ - lazy val identifier: Parsley[String] = keyOrOp(lang.identStart, lang.identLetter, ident, !isReservedName(_), "keyword", - (start, letter) => new deepembedding.Identifier(start, letter, theReservedNames)) + lazy val identifier: Parsley[String] = keyOrOp(lang.identStart, lang.identLetter, ident, !isReservedName(_), "identifier", "identifier", "keyword") /**The lexeme parser `keyword(name)` parses the symbol `name`, but it also checks that the `name` * is not a prefix of a valid identifier. A `keyword` is treated as a single token using `attempt`.*/ def keyword(name: String): Parsley[Unit] = lang.identLetter match { - case BitSetImpl(letter) => lexeme(new Parsley(new deepembedding.Keyword(name, letter, lang.caseSensitive))) - case Predicate(letter) => lexeme(new Parsley(new deepembedding.Keyword(name, letter, lang.caseSensitive))) + case BitSetImpl(letter) => lexeme(new Parsley(new deepembedding.Specific("keyword", name, letter, lang.caseSensitive))) + case Predicate(letter) => lexeme(new Parsley(new deepembedding.Specific("keyword", name, letter, lang.caseSensitive))) case _ => lexeme(attempt(caseString(name) *> notFollowedBy(identLetter) ? ("end of " + name))) } @@ -151,21 +154,19 @@ final class TokenParser(lang: LanguageDef) private val theReservedNames = if (lang.caseSensitive) lang.keywords else lang.keywords.map(_.toLowerCase) private lazy val identStart = toParser(lang.identStart) private lazy val identLetter = toParser(lang.identLetter) - private lazy val ident = lift2((c: Char, cs: List[Char]) => (c::cs).mkString, identStart, many(identLetter)) ? "identifier" + private lazy val ident = lift2((c: Char, cs: List[Char]) => (c::cs).mkString, identStart, many(identLetter)) // Operators & Reserved ops /**This lexeme parser parses a legal operator. Returns the name of the operator. This parser * will fail on any operators that are reserved operators. Legal operator characters and * reserved operators are defined in the `LanguageDef` provided to the token parser. A * `userOp` is treated as a single token using `attempt`.*/ - lazy val userOp: Parsley[String] = keyOrOp(lang.opStart, lang.opLetter, oper, !isReservedOp(_), "reserved operator", - (start, letter) => new deepembedding.UserOp(start, letter, lang.operators)) + lazy val userOp: Parsley[String] = keyOrOp(lang.opStart, lang.opLetter, oper, !isReservedOp(_), "userOp", "operator", "reserved operator") /**This non-lexeme parser parses a reserved operator. Returns the name of the operator. * Legal operator characters and reserved operators are defined in the `LanguageDef` * provided to the token parser. A `reservedOp_` is treated as a single token using `attempt`.*/ - lazy val reservedOp_ : Parsley[String] = keyOrOp(lang.opStart, lang.opLetter, oper, isReservedOp(_), "non-reserved operator", - (start, letter) => new deepembedding.ReservedOp(start, letter, lang.operators)) + lazy val reservedOp_ : Parsley[String] = keyOrOp(lang.opStart, lang.opLetter, oper, isReservedOp(_), "reservedOp", "operator", "non-reserved operator") /**This lexeme parser parses a reserved operator. Returns the name of the operator. Legal * operator characters and reserved operators are defined in the `LanguageDef` provided @@ -182,8 +183,8 @@ final class TokenParser(lang: LanguageDef) * `attempt`.*/ def operator_(name: String): Parsley[Unit] = lang.opLetter match { - case BitSetImpl(letter) => new Parsley(new deepembedding.Operator(name, letter)) - case Predicate(letter) => new Parsley(new deepembedding.Operator(name, letter)) + case BitSetImpl(letter) => new Parsley(new deepembedding.Specific("operator", name, letter, true)) + case Predicate(letter) => new Parsley(new deepembedding.Specific("operator", name, letter, true)) case _ => attempt(name *> notFollowedBy(opLetter) ? ("end of " + name)) } @@ -200,7 +201,7 @@ final class TokenParser(lang: LanguageDef) private def isReservedOp(op: String): Boolean = lang.operators.contains(op) private lazy val opStart = toParser(lang.opStart) private lazy val opLetter = toParser(lang.opLetter) - private lazy val oper = lift2((c: Char, cs: List[Char]) => (c::cs).mkString, opStart, many(opLetter)) ? "operator" + private lazy val oper = lift2((c: Char, cs: List[Char]) => (c::cs).mkString, opStart, many(opLetter)) // Chars & Strings /**This lexeme parser parses a single literal character. Returns the literal character value. @@ -312,10 +313,7 @@ final class TokenParser(lang: LanguageDef) * or "0O". Returns the value of the number.*/ lazy val octal: Parsley[Int] = lexeme('0' *> octal_) - private def number(base: Int, baseDigit: Parsley[Char]): Parsley[Int] = - { - for (digits <- some(baseDigit)) yield digits.foldLeft(0)((x, d) => base*x + d.asDigit) - } + private def number(base: Int, baseDigit: Parsley[Char]): Parsley[Int] = baseDigit.foldLeft(0)((x, d) => base*x + d.asDigit) // White space & symbols /**Lexeme parser `symbol(s)` parses `string(s)` and skips trailing white space.*/ diff --git a/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala index fc0feebf5..15f4d8735 100644 --- a/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/AlternativeEmbedding.scala @@ -137,8 +137,7 @@ private [parsley] final class <|>[A, B](_p: =>Parsley[A], _q: =>Parsley[B]) exte val (c, expected) = lead match { case ct@CharTok(d) => (d, ct.expected) case st@StringTok(s) => (s.head, if (st.expected == null) "\"" + s + "\"" else st.expected) - case kw@Keyword(k) => (k.head, if (kw.expected == null) k else kw.expected) - case op@Operator(o) => (o.head, if (op.expected == null) o else op.expected) + case st@Specific(s) => (s.head, if (st.expected == null) s else st.expected) case op@MaxOp(o) => (o.head, if (op.expected == null) o else op.expected) case sl: StringLiteral => ('"', if (sl.expected == null) "string" else sl.expected) case rs: RawStringLiteral => ('"', if (rs.expected == null) "string" else rs.expected) @@ -155,7 +154,7 @@ private [parsley] final class <|>[A, B](_p: =>Parsley[A], _q: =>Parsley[B]) exte } @tailrec private def tablable(p: Parsley[_]): Option[Parsley[_]] = p match { // CODO: Numeric parsers by leading digit (This one would require changing the foldTablified function a bit) - case t@(_: CharTok | _: StringTok | _: Keyword | _: StringLiteral | _: RawStringLiteral | _: Operator | _: MaxOp) => Some(t) + case t@(_: CharTok | _: StringTok | _: Specific | _: StringLiteral | _: RawStringLiteral | _: MaxOp) => Some(t) case Attempt(t) => tablable(t) case (_: Pure[_]) <*> t => tablable(t) case Lift2(_, t, _) => tablable(t) diff --git a/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala b/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala index b3fb435dd..c9c68ca37 100644 --- a/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala +++ b/src/main/scala/parsley/internal/deepembedding/TokenEmbedding.scala @@ -4,51 +4,44 @@ import parsley.TokenParser.TokenSet import Sign.SignType import parsley.internal.{instructions, UnsafeOption} -private [parsley] class WhiteSpace(ws: TokenSet, start: String, end: String, line: String, nested: Boolean) +private [parsley] final class WhiteSpace(ws: TokenSet, start: String, end: String, line: String, nested: Boolean) extends Singleton[Unit]("whiteSpace", new instructions.TokenWhiteSpace(ws, start, end, line, nested)) -private [parsley] class SkipComments(start: String, end: String, line: String, nested: Boolean) +private [parsley] final class SkipComments(start: String, end: String, line: String, nested: Boolean) extends Singleton[Unit]("skipComments", new instructions.TokenSkipComments(start, end, line, nested)) -private [parsley] class Comment(start: String, end: String, line: String, nested: Boolean) +private [parsley] final class Comment(start: String, end: String, line: String, nested: Boolean) extends Singleton[Unit]("comment", new instructions.TokenComment(start, end, line, nested)) -private [parsley] class Sign[A](ty: SignType, val expected: UnsafeOption[String] = null) +private [parsley] final class Sign[A](ty: SignType, val expected: UnsafeOption[String] = null) extends SingletonExpect[A => A]("sign", new Sign(ty, _), new instructions.TokenSign(ty, expected)) -private [parsley] class Natural(val expected: UnsafeOption[String] = null) +private [parsley] final class Natural(val expected: UnsafeOption[String] = null) extends SingletonExpect[Int]("natural", new Natural(_), new instructions.TokenNatural(expected)) -private [parsley] class Float(val expected: UnsafeOption[String] = null) +private [parsley] final class Float(val expected: UnsafeOption[String] = null) extends SingletonExpect[Double]("float", new Float(_), new instructions.TokenFloat(expected)) -private [parsley] class Escape(val expected: UnsafeOption[String] = null) +private [parsley] final class Escape(val expected: UnsafeOption[String] = null) extends SingletonExpect[Char]("escape", new Escape(_), new instructions.TokenEscape(expected)) -private [parsley] class StringLiteral(ws: TokenSet, val expected: UnsafeOption[String] = null) +private [parsley] final class StringLiteral(ws: TokenSet, val expected: UnsafeOption[String] = null) extends SingletonExpect[String]("stringLiteral", new StringLiteral(ws, _), new instructions.TokenString(ws, expected)) -private [parsley] class RawStringLiteral(val expected: UnsafeOption[String] = null) +private [parsley] final class RawStringLiteral(val expected: UnsafeOption[String] = null) extends SingletonExpect[String]("rawStringLiteral", new RawStringLiteral(_), new instructions.TokenRawString(expected)) -private [parsley] class Identifier(start: TokenSet, letter: TokenSet, keywords: Set[String], val expected: UnsafeOption[String] = null) - extends SingletonExpect[String]("identifier", new Identifier(start, letter, keywords, _), - new instructions.TokenIdentifier(start, letter, keywords, expected)) +private [parsley] class NonSpecific(combinatorName: String, name: String, illegalName: String, start: TokenSet, + letter: TokenSet, illegal: String => Boolean, val expected: UnsafeOption[String] = null) + extends SingletonExpect[String](combinatorName, new NonSpecific(combinatorName, name, illegalName, start, letter, illegal, _), + new instructions.TokenNonSpecific(name, illegalName)(start, letter, illegal, expected)) -private [parsley] class UserOp(start: TokenSet, letter: TokenSet, ops: Set[String], val expected: UnsafeOption[String] = null) - extends SingletonExpect[String]("userOp", new UserOp(start, letter, ops, _), new instructions.TokenUserOperator(start, letter, ops, expected)) +private [parsley] final class Specific(name: String, private [Specific] val specific: String, + letter: TokenSet, caseSensitive: Boolean, val expected: UnsafeOption[String] = null) + extends SingletonExpect[Unit](s"$name($specific)", new Specific(name, specific, letter, caseSensitive, _), + new instructions.TokenSpecific(specific, letter, caseSensitive, expected)) -private [parsley] class ReservedOp(start: TokenSet, letter: TokenSet, ops: Set[String], val expected: UnsafeOption[String] = null) - extends SingletonExpect[String]("reservedOp", new ReservedOp(start, letter, ops, _), new instructions.TokenOperator(start, letter, ops, expected)) - -private [parsley] class Keyword(private [Keyword] val keyword: String, letter: TokenSet, caseSensitive: Boolean, val expected: UnsafeOption[String] = null) - extends SingletonExpect[Unit](s"keyword($keyword)", new Keyword(keyword, letter, caseSensitive, _), - new instructions.TokenKeyword(keyword, letter, caseSensitive, expected)) - -private [parsley] class Operator(private [Operator] val operator: String, letter: TokenSet, val expected: UnsafeOption[String] = null) - extends SingletonExpect[Unit](s"operator($operator)", new Operator(operator, letter, _), new instructions.TokenOperator_(operator, letter, expected)) - -private [parsley] class MaxOp(private [MaxOp] val operator: String, ops: Set[String], val expected: UnsafeOption[String] = null) +private [parsley] final class MaxOp(private [MaxOp] val operator: String, ops: Set[String], val expected: UnsafeOption[String] = null) extends SingletonExpect[Unit](s"maxOp($operator)", new MaxOp(operator, ops, _), new instructions.TokenMaxOp(operator, ops, expected)) private [parsley] object Sign { @@ -64,11 +57,8 @@ private [parsley] object Sign { } // $COVERAGE-OFF$ -private [deepembedding] object Keyword { - def unapply(self: Keyword): Option[String] = Some(self.keyword) -} -private [deepembedding] object Operator { - def unapply(self: Operator): Option[String] = Some(self.operator) +private [deepembedding] object Specific { + def unapply(self: Specific): Option[String] = Some(self.specific) } private [deepembedding] object MaxOp { def unapply(self: MaxOp): Option[String] = Some(self.operator) diff --git a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala index 3d8263433..b0f144255 100644 --- a/src/main/scala/parsley/internal/instructions/TokenInstrs.scala +++ b/src/main/scala/parsley/internal/instructions/TokenInstrs.scala @@ -121,11 +121,11 @@ private [internal] final class TokenSkipComments(start: String, end: String, lin // $COVERAGE-ON$ } -private [instructions] abstract class TokenLexi(name: String, illegalName: String) +private [internal] final class TokenNonSpecific(name: String, illegalName: String) (start: TokenSet, letter: TokenSet, illegal: String => Boolean, _expected: UnsafeOption[String]) extends Instr { private val expected = if (_expected == null) name else _expected - final override def apply(ctx: Context): Unit = { + override def apply(ctx: Context): Unit = { if (ctx.moreInput && start(ctx.nextChar)) { val name = new StringBuilder() name += ctx.nextChar @@ -146,7 +146,7 @@ private [instructions] abstract class TokenLexi(name: String, illegalName: Strin } } - @tailrec private final def restOfToken(ctx: Context, tok: StringBuilder): Unit = { + @tailrec private def restOfToken(ctx: Context, tok: StringBuilder): Unit = { if (ctx.moreInput && letter(ctx.nextChar)) { tok += ctx.nextChar ctx.offset += 1 @@ -156,20 +156,11 @@ private [instructions] abstract class TokenLexi(name: String, illegalName: Strin } // $COVERAGE-OFF$ - final override def toString: String = s"TokenLexi($name)" + override def toString: String = s"TokenNonSpecific($name)" // $COVERAGE-ON$ } -private [internal] final class TokenIdentifier(start: TokenSet, letter: TokenSet, keywords: Set[String], _expected: UnsafeOption[String]) - extends TokenLexi("identifier", "keyword")(start, letter, keywords, _expected) - -private [internal] final class TokenUserOperator(start: TokenSet, letter: TokenSet, reservedOps: Set[String], _expected: UnsafeOption[String]) - extends TokenLexi("operator", "reserved operator")(start, letter, reservedOps, _expected) - -private [internal] final class TokenOperator(start: TokenSet, letter: TokenSet, reservedOps: Set[String], _expected: UnsafeOption[String]) - extends TokenLexi("operator", "non-reserved operator")(start, letter, reservedOps.andThen(!_), _expected) - -private [instructions] abstract class TokenSpecific(_specific: String, caseSensitive: Boolean, _expected: UnsafeOption[String]) extends Instr { +private [instructions] abstract class TokenSpecificAllowTrailing(_specific: String, caseSensitive: Boolean, _expected: UnsafeOption[String]) extends Instr { private final val expected = if (_expected == null) _specific else _expected protected final val expectedEnd = if (_expected == null) "end of " + _specific else _expected private final val specific = (if (caseSensitive) _specific else _specific.toLowerCase).toCharArray @@ -195,15 +186,11 @@ private [instructions] abstract class TokenSpecific(_specific: String, caseSensi if (ctx.inputsz >= ctx.offset + strsz) readSpecific(ctx, ctx.offset, 0) else ctx.fail(expected) } - - // $COVERAGE-OFF$ - override def toString: String = s"TokenSpecific(${_specific})" - // $COVERAGE-ON$ } -private [internal] abstract class TokenSpecificNoTrailLetter(keyword: String, letter: TokenSet, caseSensitive: Boolean, expected: UnsafeOption[String]) - extends TokenSpecific(keyword, caseSensitive, expected) { - final override def postprocess(ctx: Context, i: Int): Unit = { +private [internal] final class TokenSpecific(_specific: String, letter: TokenSet, caseSensitive: Boolean, expected: UnsafeOption[String]) + extends TokenSpecificAllowTrailing(_specific, caseSensitive, expected) { + override def postprocess(ctx: Context, i: Int): Unit = { if (i < ctx.inputsz && letter(ctx.input(i))) { ctx.fail(expectedEnd) ctx.restoreState() @@ -213,16 +200,14 @@ private [internal] abstract class TokenSpecificNoTrailLetter(keyword: String, le ctx.pushAndContinue(()) } } -} -private [internal] final class TokenKeyword(keyword: String, letter: TokenSet, caseSensitive: Boolean, expected: UnsafeOption[String]) - extends TokenSpecificNoTrailLetter(keyword, letter, caseSensitive, expected) - -private [internal] final class TokenOperator_(operator: String, letter: TokenSet, expected: UnsafeOption[String]) - extends TokenSpecificNoTrailLetter(operator, letter, true, expected) + // $COVERAGE-OFF$ + override def toString: String = s"TokenSpecific(${_specific})" + // $COVERAGE-ON$ +} private [internal] final class TokenMaxOp(operator: String, _ops: Set[String], expected: UnsafeOption[String]) - extends TokenSpecific(operator, true, expected) { + extends TokenSpecificAllowTrailing(operator, true, expected) { private val ops = Radix(_ops.collect { case op if op.length > operator.length && op.startsWith(operator) => op.substring(operator.length) })