From 2c2410d0bde1a2c23db7a9c781a72f6e501c19d3 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Sun, 5 Jan 2025 19:37:40 +0000 Subject: [PATCH] feat: backport implementation --- .../backend/SequenceEmbedding.scala | 5 +- .../deepembedding/backend/StrictParsley.scala | 82 ++++--------------- .../deepembedding/frontend/LazyParsley.scala | 11 +-- .../machine/instructions/CoreInstrs.scala | 6 +- .../instructions/PrimitiveInstrs.scala | 67 +-------------- .../src/main/scala/parsley/registers.scala | 9 +- .../src/test/scala/parsley/CoreTests.scala | 6 +- 7 files changed, 34 insertions(+), 152 deletions(-) diff --git a/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/SequenceEmbedding.scala b/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/SequenceEmbedding.scala index 4cda3f864..f7466d696 100644 --- a/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/SequenceEmbedding.scala +++ b/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/SequenceEmbedding.scala @@ -93,10 +93,9 @@ private [deepembedding] final class >>=[A, B](val p: StrictParsley[A], private [ } override def codeGen[M[_, +_]: ContOps, R](producesResults: Boolean)(implicit instrs: InstrBuffer, state: CodeGenState): M[R, Unit] = { suspend(p.codeGen[M, R](producesResults = true)) |> { - instrs += instructions.DynCall[A] { x => + instrs += instructions.DynCall[A] { (x, refsSz) => val p = f(x) - // FIXME: suppress results within p, then can remove pop - p.demandCalleeSave(state.numRegs) + p.setMinReferenceAllocation(refsSz) if (implicitly[ContOps[M]].isStackSafe) p.overflows() p.instrs } diff --git a/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/StrictParsley.scala b/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/StrictParsley.scala index 3ed341c8d..ab2c83d15 100644 --- a/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/StrictParsley.scala +++ b/parsley/shared/src/main/scala/parsley/internal/deepembedding/backend/StrictParsley.scala @@ -16,7 +16,7 @@ import parsley.internal.deepembedding.ContOps, ContOps.{perform, ContAdapter} import parsley.internal.machine.instructions, instructions.{Instr, Label} import StrictParsley.* -import org.typelevel.scalaccompat.annotation.{nowarn, nowarn3} +import org.typelevel.scalaccompat.annotation.nowarn3 /** This is the root type of the parsley "backend": it represents a combinator tree * where the join-points in the tree (recursive or otherwise) have been factored into @@ -44,14 +44,15 @@ private [deepembedding] trait StrictParsley[+A] { * @param state the code generator state * @return the final array of instructions for this parser */ - final private [deepembedding] def generateInstructions[M[_, +_]: ContOps](numRegsUsedByParent: Int, usedRefs: Set[Ref[_]], + final private [deepembedding] def generateInstructions[M[_, +_]: ContOps](minRef: Int, usedRefs: Set[Ref[_]], bodyMap: Map[Let[_], StrictParsley[_]]) (implicit state: CodeGenState): Array[Instr] = { implicit val instrs: InstrBuffer = newInstrBuffer perform { - generateCalleeSave[M, Array[Instr]](numRegsUsedByParent, this.codeGen(producesResults = true), usedRefs) |> { - // When `numRegsUsedByParent` is -1 this is top level, otherwise it is a flatMap - instrs += (if (numRegsUsedByParent >= 0) instructions.Return else instructions.Halt) + allocateAndExpandRefs(minRef, usedRefs) + this.codeGen[M, Array[Instr]](producesResults = true) |> { + // When `minRef` is -1 this is top level, otherwise it is a flatMap + instrs += (if (minRef >= 0) instructions.Return else instructions.Halt) val letRets = finaliseLets(bodyMap) generateHandlers(state.handlers) finaliseInstrs(instrs, state.nlabels, letRets) @@ -98,51 +99,6 @@ private [deepembedding] object StrictParsley { /** Make a fresh instruction buffer */ private def newInstrBuffer: InstrBuffer = new ResizableArray() - /** Given a set of in-use registers, this function will allocate those that are currented - * unallocated, giving them addresses not currently in use by the allocated registers - * - * @param unallocatedRegs the set of registers that need allocating - * @param regs the set of all registers used by a specific parser - * @return the list of slots that have been freshly allocated to - */ - private def allocateRegisters(unallocatedRegs: Set[Ref[_]], regs: Set[Ref[_]]): List[Int] = { - // Global registers cannot occupy the same slot as another global register - // In a flatMap, that means a newly discovered global register must be allocated to a new slot: this may resize the register pool - assert(unallocatedRegs == regs.filterNot(_.allocated)) - if (unallocatedRegs.nonEmpty) { - val usedSlots = regs.collect { - case reg if reg.allocated => reg.addr - }: @nowarn - val freeSlots = (0 until regs.size).filterNot(usedSlots) - applyAllocation(unallocatedRegs, freeSlots) - } - else Nil - } - - /** Given a set of unallocated registers and a supply of unoccupied slots, allocates each - * register to one of the slots. - * - * @param regs the set of registers that require allocation - * @param freeSlots the supply of slots that are currently not in-use - * @return the slots that were used for allocation - */ - private def applyAllocation(refs: Set[Ref[_]] @nowarn3, freeSlots: Iterable[Int]): List[Int] = { - val allocatedSlots = mutable.ListBuffer.empty[Int] - // TODO: For scala 2.12, use lazyZip and foreach! - /*for ((ref, addr) <- refs.zip(freeSlots)) { - ref.allocate(addr) - allocatedSlots += addr - }*/ // FIXME: until 5.0.0 we need to suppress warnings, and Scala 3 is being annoying (refreshing change) - type Ref_ = Ref[_] - refs.zip(freeSlots).foreach { (refAndAddr: (Ref[_], Int) @nowarn3) => - val ref: Ref_ @nowarn3 = refAndAddr._1 - val addr = refAndAddr._2 - ref.allocate(addr) - allocatedSlots += addr - } - allocatedSlots.toList - } - /** If required, generates callee-save around a main body of instructions. * * This is needed when using `flatMap`, as it is unaware of the register @@ -160,23 +116,17 @@ private [deepembedding] object StrictParsley { * @param instrs the instruction buffer * @param state the code generation state, for label generation */ - private def generateCalleeSave[M[_, +_]: ContOps, R](numRegsUsedByParent: Int, bodyGen: =>M[R, Unit], usedRefs: Set[Ref[_]]) - (implicit instrs: InstrBuffer, state: CodeGenState): M[R, Unit] = { - val reqRegs = usedRefs.size - val localRegs: Set[Ref[_]] @nowarn3 = usedRefs.filterNot(_.allocated): @nowarn3 - val allocatedRegs = allocateRegisters(localRegs, usedRefs) - val calleeSaveRequired = numRegsUsedByParent >= 0 // if this is -1, then we are the top level and have no parent, otherwise it needs to be done - if (calleeSaveRequired && localRegs.nonEmpty) { - val end = state.freshLabel() - val calleeSave = state.freshLabel() - instrs += new instructions.Label(calleeSave) - instrs += new instructions.CalleeSave(end, localRegs, reqRegs, allocatedRegs, numRegsUsedByParent) - bodyGen |> { - instrs += new instructions.Jump(calleeSave) - instrs += new instructions.Label(end) - } + private def allocateAndExpandRefs(minRef: Int, usedRefs: Set[Ref[_]])(implicit instrs: InstrBuffer): Unit = { + var nextSlot = math.max(minRef, 0) + for (r <- usedRefs if !r.allocated) { + r.allocate(nextSlot) + nextSlot += 1 + } + val totalSlotsRequired = nextSlot + // if this is -1, then we are the top level and have no parent, otherwise it needs to be done + if (minRef >= 0 && (minRef < totalSlotsRequired)) { + instrs += new instructions.ExpandRefs(totalSlotsRequired) } - else bodyGen } /** Generates each of the shared, non-recursive, parsers that have been ''used'' by diff --git a/parsley/shared/src/main/scala/parsley/internal/deepembedding/frontend/LazyParsley.scala b/parsley/shared/src/main/scala/parsley/internal/deepembedding/frontend/LazyParsley.scala index eafd61166..300df525d 100644 --- a/parsley/shared/src/main/scala/parsley/internal/deepembedding/frontend/LazyParsley.scala +++ b/parsley/shared/src/main/scala/parsley/internal/deepembedding/frontend/LazyParsley.scala @@ -44,12 +44,7 @@ private [parsley] abstract class LazyParsley[+A] private [deepembedding] { // The instructions used to execute this parser along with the number of registers it uses final private [parsley] lazy val (instrs: Array[Instr], numRegs: Int) = computeInstrs - /** This parser is the result of a `flatMap` operation, and as such must perform - * callee-save on `numRegs` registers (which belong to its parent) - * - * @param numRegs the number of registers the parent uses (these must be saved) - */ - private [deepembedding] def demandCalleeSave(numRegs: Int): Unit = numRegsUsedByParent = numRegs + private [deepembedding] def setMinReferenceAllocation(minRef: Int): Unit = this.minRef = minRef // Internals // To ensure that stack-overflow cannot occur during the processing of particularly @@ -87,7 +82,7 @@ private [parsley] abstract class LazyParsley[+A] private [deepembedding] { final private var cps = false final private [deepembedding] def isCps: Boolean = cps /** how many registers are used by the ''parent'' of this combinator (this combinator is part of a `flatMap` when this is not -1) */ - final private var numRegsUsedByParent = -1 + final private var minRef = -1 /** Computes the instructions associated with this parser as well as the number of * registers it requires in a (possibly) stack-safe way. @@ -120,7 +115,7 @@ private [parsley] abstract class LazyParsley[+A] private [deepembedding] { implicit val letMap: LetMap = LetMap(letFinderState.lets, letFinderState.recs) for { sp <- this.optimised } yield { implicit val state: backend.CodeGenState = new backend.CodeGenState(letFinderState.numRegs) - sp.generateInstructions(numRegsUsedByParent, usedRefs, letMap.bodies) + sp.generateInstructions(minRef, usedRefs, letMap.bodies) } } }, letFinderState.numRegs) diff --git a/parsley/shared/src/main/scala/parsley/internal/machine/instructions/CoreInstrs.scala b/parsley/shared/src/main/scala/parsley/internal/machine/instructions/CoreInstrs.scala index 028f2f3f8..c98d12aa8 100644 --- a/parsley/shared/src/main/scala/parsley/internal/machine/instructions/CoreInstrs.scala +++ b/parsley/shared/src/main/scala/parsley/internal/machine/instructions/CoreInstrs.scala @@ -72,17 +72,17 @@ private [internal] object Apply extends Instr { } // Monadic -private [internal] final class DynCall(f: Any => Array[Instr]) extends Instr { +private [internal] final class DynCall(f: (Any, Int) => Array[Instr]) extends Instr { override def apply(ctx: Context): Unit = { ensureRegularInstruction(ctx) - ctx.call(f(ctx.stack.upop())) + ctx.call(f(ctx.stack.upop(), ctx.regs.size)) } // $COVERAGE-OFF$ override def toString: String = "DynCall(?)" // $COVERAGE-ON$ } private [internal] object DynCall { - def apply[A](f: A => Array[Instr]): DynCall = new DynCall(f.asInstanceOf[Any => Array[Instr]]) + def apply[A](f: (A, Int) => Array[Instr]): DynCall = new DynCall(f.asInstanceOf[(Any, Int) => Array[Instr]]) } // Control Flow diff --git a/parsley/shared/src/main/scala/parsley/internal/machine/instructions/PrimitiveInstrs.scala b/parsley/shared/src/main/scala/parsley/internal/machine/instructions/PrimitiveInstrs.scala index 47e514b01..eaa11efe9 100644 --- a/parsley/shared/src/main/scala/parsley/internal/machine/instructions/PrimitiveInstrs.scala +++ b/parsley/shared/src/main/scala/parsley/internal/machine/instructions/PrimitiveInstrs.scala @@ -5,15 +5,12 @@ */ package parsley.internal.machine.instructions -import parsley.state.Ref import parsley.token.errors.LabelConfig import parsley.internal.errors.ExpectDesc import parsley.internal.machine.Context import parsley.internal.machine.XAssert._ -import org.typelevel.scalaccompat.annotation.nowarn3 - private [internal] final class Satisfies(f: Char => Boolean, expected: Iterable[ExpectDesc]) extends Instr { def this(f: Char => Boolean, expected: LabelConfig) = this(f, expected.asExpectDescs) override def apply(ctx: Context): Unit = { @@ -156,67 +153,11 @@ private [internal] object Span extends Instr { // $COVERAGE-ON$ } -// This instruction holds mutate state, but it is safe to do so, because it's always the first instruction of a DynCall. -private [parsley] final class CalleeSave(var label: Int, localRegs: Set[Ref[_]] @nowarn3, reqSize: Int, slots: List[(Int, Int)], saveArray: Array[AnyRef]) - extends InstrWithLabel { - private def this(label: Int, localRegs: Set[Ref[_]], reqSize: Int, slots: List[Int]) = - this(label, localRegs, reqSize, slots.zipWithIndex, new Array[AnyRef](slots.length)) - // this filters out the slots to ensure we only do callee-save on registers that might exist in the parent - def this(label: Int, localRefs: Set[Ref[_]], reqSize: Int, slots: List[Int], numRegsInContext: Int) = - this(label, localRefs, reqSize, slots.takeWhile(_ < numRegsInContext)) - private var inUse = false - private var oldRegs: Array[AnyRef] = null - - private def save(ctx: Context): Unit = { - for ((slot, idx) <- slots) { - saveArray(idx) = ctx.regs(slot) - ctx.regs(slot) = null - } - // If this is known to increase the size of the register pool, then we need to keep the old array to the side - if (reqSize > ctx.regs.size) { - oldRegs = ctx.regs - ctx.regs = java.util.Arrays.copyOf(oldRegs, reqSize) - } - } - - private def restore(ctx: Context): Unit = { - if (oldRegs != null) { - java.lang.System.arraycopy(ctx.regs, 0, oldRegs, 0, oldRegs.size) - ctx.regs = oldRegs - oldRegs = null - } - for ((slot, idx) <- slots) { - ctx.regs(slot) = saveArray(idx) - saveArray(idx) = null - } - // This is the only way to get them reallocated on the next invocation - localRegs.foreach(_.deallocate()): @nowarn3 - } - - private def continue(ctx: Context): Unit = { - ctx.handlers = ctx.handlers.tail - if (ctx.good) ctx.pc = label - else ctx.fail() - } - +private [parsley] final class ExpandRefs(newSz: Int) extends Instr { override def apply(ctx: Context): Unit = { - // Second-entry, callee-restore and either jump or fail - if (inUse) { - restore(ctx) - inUse = false - continue(ctx) - } - // Entry for the first time, register as a handle, callee-save and inc - else { - ensureRegularInstruction(ctx) - save(ctx) - inUse = true - ctx.pushHandler(ctx.pc) - ctx.inc() + if (newSz > ctx.regs.size) { + ctx.regs = java.util.Arrays.copyOf(ctx.regs, newSz) } + ctx.inc() } - - // $COVERAGE-OFF$ - override def toString: String = s"CalleeSave($label, newSz = $reqSize, slotsToSave = $slots)" - // $COVERAGE-ON$ } diff --git a/parsley/shared/src/main/scala/parsley/registers.scala b/parsley/shared/src/main/scala/parsley/registers.scala index 3c7c51b69..8f6485cce 100644 --- a/parsley/shared/src/main/scala/parsley/registers.scala +++ b/parsley/shared/src/main/scala/parsley/registers.scala @@ -429,8 +429,6 @@ object registers { assert(!allocated) this._v = v } - // This must ONLY be used by CalleeSave in flatMap - private [parsley] def deallocate(): Unit = _v = -1 //override def toString: String = s"Reg(${if (allocated) addr else "unallocated"})" } // $COVERAGE-OFF$ @@ -442,13 +440,12 @@ object registers { /** This function creates a new (global) register of a given type. * * The register created by this function is not allocated to any specific parser until it has been - * used by a parser. It should not be used with multiple different parsers. + * used by a parser. It should not be used with multiple different parsers: while this ''may'' work, + * there is a chance that two such registers collide in allocation, which is undefined behaviour. * * @tparam A the type to be contained in this register during runtime * @return a new register which can contain the given type. - * @note registers created in this manner ''must'' be initialised in the top-level parser and not - * inside a `flatMap`, as this may make them corrupt other registers. They should be used with - * caution. It is recommended to use `makeReg` and `fillReg` where possible. + * * @note They should be used with caution. It is recommended to use `makeReg` and `fillReg` where possible. * @since 2.2.0 */ def make[A]: Reg[A] = new Reg diff --git a/parsley/shared/src/test/scala/parsley/CoreTests.scala b/parsley/shared/src/test/scala/parsley/CoreTests.scala index 316c61d26..9c4d008f8 100644 --- a/parsley/shared/src/test/scala/parsley/CoreTests.scala +++ b/parsley/shared/src/test/scala/parsley/CoreTests.scala @@ -396,10 +396,10 @@ class CoreTests extends ParsleyTest { "flatMap" should "consistently generate a callee-save instruction if needed" in { import parsley.state._ val r = Ref.make[Int] - val p = pure(7).flatMap { _ => - r.set(4) *> r.get + val p = unit.flatMap { _ => + r.update(_ + 1) *> r.get } - (p *> p).parse("") shouldBe Success(4) + (unit.flatMap(_ => r.set(0)) *> p *> p).parse("") shouldBe Success(2) } "span" should "return all the input parsed by a parser, exactly as it was" in {