Skip to content

Commit

Permalink
New Reference Expansion (#248)
Browse files Browse the repository at this point in the history
`CalleeSave` is a problematic instruction that has possible
race-conditions, as well as being badly behaved for recursively bound
`flatMap`s. This system was designed to allow for references to reuse
slots in the array when necessary. However, it is much safer to think of
all the references in the universe being independent; the problem, of
course, is that not all of these references are visible at the
top-level, so there is likely not enough space to accommodate them.

This PR reworks things so that the `Context` feeds back information to a
soon-to-be-generated `flatMap` body about the currently largest
residency found for references. The `flatMap` will always allocate its
references above this, and emit a new instruction that *just* resizes
the refs array.

It is possible that global references find themselves occupying the same
slot as another if it was used across two parsers, and this is more
explicitly checked now up-front in the allocator. Whereas before only if
a reference was out of range would it crash, now it will crash
immediately as soon as a clash for the references of a parser appear:
this avoids the undefined behaviour where two references would write
over each other.

In all, a better system. Fixes #241
  • Loading branch information
j-mie6 authored Jan 5, 2025
2 parents 0f980db + 62d86d8 commit 5712969
Show file tree
Hide file tree
Showing 12 changed files with 57 additions and 210 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ trait PlatformSpecific {
} yield {
src.close()
val internal = p.internal
new Context(internal.instrs, input, internal.numRegs, Some(file.getName)).run()
new Context(internal.instrs, input, internal.numRefs, Some(file.getName)).run()
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion parsley/shared/src/main/scala/parsley/Parsley.scala
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ final class Parsley[+A] private [parsley] (private [parsley] val internal: front
* @group run
*/
def parse[Err: ErrorBuilder](input: String): Result[Err, A] = {
try new Context(internal.instrs, input, internal.numRegs, None).run()
try new Context(internal.instrs, input, internal.numRefs, None).run()
catch {
// $COVERAGE-OFF$
case UserException(err) => throw err // scalastyle:ignore throw
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ package parsley.exceptions

// $COVERAGE-OFF$
private [parsley] class CorruptedReferenceException
extends ParsleyException("A reference has been used across two different parsers in separate calls to parse, causing it to be misallocated")
extends ParsleyException("A reference has been used across two different parsers in separate calls to parse, causing it to clash with another reference")
// $COVERAGE-ON$
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ private [deepembedding] final class >>=[A, B](val p: StrictParsley[A], private [
}
override def codeGen[M[_, +_]: ContOps, R](producesResults: Boolean)(implicit instrs: InstrBuffer, state: CodeGenState): M[R, Unit] = {
suspend(p.codeGen[M, R](producesResults = true)) |> {
instrs += instructions.DynCall[A] { x =>
instrs += instructions.DynCall[A] { (x, refsSz) =>
val q = f(x)
q.demandCalleeSave(state.numRegs)
q.setMinReferenceAllocation(refsSz)
if (implicitly[ContOps[M]].isStackSafe) q.overflows()
q.instrs
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import scala.annotation.tailrec
import scala.collection.mutable

import parsley.XAssert._
import parsley.exceptions.CorruptedReferenceException
import parsley.state.Ref

import parsley.internal.collection.mutable.ResizableArray
Expand Down Expand Up @@ -36,21 +37,21 @@ private [deepembedding] trait StrictParsley[+A] {
* 1. any sharable, fail-only, handler instructions are then generated at the end of the instruction stream
* 1. jump-labels in the code are removed, and tail-call optimisation is applied
*
* @param numRegsUsedByParent the number of registers in use by the parent (should one exist), required by callee-save
* @param usedRefs the registers used by this parser (these may require allocation)
* @param minRef the number of references determined to currently exist according to the context (or -1 if this is root)
* @param usedRefs the references used by this parser (these may require allocation)
* @param recs a stream of pairs of rec nodes and the generators for their strict parsers
* (this is just because more `Cont` operations are performed later)
* @param state the code generator state
* @return the final array of instructions for this parser
*/
final private [deepembedding] def generateInstructions[M[_, +_]: ContOps](numRegsUsedByParent: Int, usedRefs: Set[Ref[_]],
bodyMap: Map[Let[_], StrictParsley[_]])
final private [deepembedding] def generateInstructions[M[_, +_]: ContOps](minRef: Int, usedRefs: Set[Ref[_]], bodyMap: Map[Let[_], StrictParsley[_]])
(implicit state: CodeGenState): Array[Instr] = {
implicit val instrs: InstrBuffer = newInstrBuffer
perform {
generateCalleeSave[M, Array[Instr]](numRegsUsedByParent, this.codeGen(producesResults = true), usedRefs) |> {
// When `numRegsUsedByParent` is -1 this is top level, otherwise it is a flatMap
instrs += (if (numRegsUsedByParent >= 0) instructions.Return else instructions.Halt)
allocateAndExpandRefs(minRef, usedRefs)
this.codeGen[M, Array[Instr]](producesResults = true) |> {
// When `minRef` is -1 this is top level, otherwise it is a flatMap
instrs += (if (minRef >= 0) instructions.Return else instructions.Halt)
val letRets = finaliseLets(bodyMap)
generateHandlers(state.handlers)
finaliseInstrs(instrs, state.nlabels, letRets)
Expand Down Expand Up @@ -97,80 +98,30 @@ private [deepembedding] object StrictParsley {
/** Make a fresh instruction buffer */
private def newInstrBuffer: InstrBuffer = new ResizableArray()

/** Given a set of in-use registers, this function will allocate those that are currented
* unallocated, giving them addresses not currently in use by the allocated registers
*
* @param unallocatedRegs the set of registers that need allocating
* @param regs the set of all registers used by a specific parser
* @return the list of slots that have been freshly allocated to
*/
private def allocateRegisters(unallocatedRegs: Set[Ref[_]], regs: Set[Ref[_]]): List[Int] = {
// Global registers cannot occupy the same slot as another global register
// In a flatMap, that means a newly discovered global register must be allocated to a new slot: this may resize the register pool
assert(unallocatedRegs == regs.filterNot(_.allocated))
if (unallocatedRegs.nonEmpty) {
val usedSlots = regs.collect {
case reg if reg.allocated => reg.addr
}
val freeSlots = (0 until regs.size).filterNot(usedSlots)
applyAllocation(unallocatedRegs, freeSlots)
}
else Nil
}

/** Given a set of unallocated registers and a supply of unoccupied slots, allocates each
* register to one of the slots.
*
* @param regs the set of registers that require allocation
* @param freeSlots the supply of slots that are currently not in-use
* @return the slots that were used for allocation
*/
private def applyAllocation(refs: Set[Ref[_]], freeSlots: Iterable[Int]): List[Int] = {
val allocatedSlots = mutable.ListBuffer.empty[Int]
// TODO: For scala 2.12, use lazyZip and foreach!
for ((ref, addr) <- refs.zip(freeSlots)) {
ref.allocate(addr)
allocatedSlots += addr
}
allocatedSlots.toList
}

/** If required, generates callee-save around a main body of instructions.
/** Allocates references, and, if required, generates an instruction to expand array size.
*
* This is needed when using `flatMap`, as it is unaware of the register
* context of its parent, other than the number used. The expectation is
* that such a parser will save all the registers used by its parents that
* it itself does not explicitly use and restore them when it is completed
* (if the parent has not allocated a register it may be assumed that
* it is local to the `flatMap`: this is a documented limitation of the
* system).
* context of its parents.
*
* @param numRegsUsedByParent the size of the current register array as dictated by the parent
* @param bodyGen a computation that generates instructions into the instruction buffer
* @param reqRegs the number of registered used by the parser in question
* @param allocatedRegs the slots used by the registered allocated local to the parser
* @param minRef the number of references in existance, according to the context
* @param usedRefs the referenced used in this parser that may need allocation
* @param instrs the instruction buffer
* @param state the code generation state, for label generation
*/
private def generateCalleeSave[M[_, +_]: ContOps, R](numRegsUsedByParent: Int, bodyGen: =>M[R, Unit], usedRefs: Set[Ref[_]])
(implicit instrs: InstrBuffer, state: CodeGenState): M[R, Unit] = {
val reqRegs = usedRefs.size
val localRegs = usedRefs.filterNot(_.allocated)
val allocatedRegs = allocateRegisters(localRegs, usedRefs)
val calleeSaveRequired = numRegsUsedByParent >= 0 // if this is -1, then we are the top level and have no parent, otherwise it needs to be done
if (calleeSaveRequired && localRegs.nonEmpty) {
val end = state.freshLabel()
val calleeSave = state.freshLabel()
instrs += new instructions.Push(false) // callee-save is not active
instrs += new instructions.Label(calleeSave)
instrs += new instructions.CalleeSave(end, localRegs, reqRegs, allocatedRegs, numRegsUsedByParent)
bodyGen |> {
instrs += new instructions.Push(true) // callee-save is active
instrs += new instructions.Jump(calleeSave)
instrs += new instructions.Label(end)
}
private def allocateAndExpandRefs(minRef: Int, usedRefs: Set[Ref[_]])(implicit instrs: InstrBuffer): Unit = {
var nextSlot = math.max(minRef, 0)
for (r <- usedRefs if !r.allocated) {
r.allocate(nextSlot)
nextSlot += 1
}
// check that no two references have the same address!
if (usedRefs.groupBy(_.addr).valuesIterator.exists(_.size > 1)) {
throw new CorruptedReferenceException() // scalastyle:ignore throw
}
val totalSlotsRequired = nextSlot
// if this is -1, then we are the top level and have no parent, otherwise it needs to be done
if (minRef >= 0 && (minRef < totalSlotsRequired)) {
instrs += new instructions.ExpandRefs(totalSlotsRequired)
}
else bodyGen
}

/** Generates each of the shared, non-recursive, parsers that have been ''used'' by
Expand Down Expand Up @@ -284,9 +235,9 @@ private [deepembedding] trait MZero extends StrictParsley[Nothing]
/** This is the escapulated state required for code generation,
* which is threaded through the entire backend.
*
* @param numRegs the number of registers required by the parser being generated
* @param numRefs the number of references required by the parser being generated
*/
private [deepembedding] class CodeGenState(val numRegs: Int) {
private [deepembedding] class CodeGenState(val numRefs: Int) {
/** The next jump-label identifier. */
private var current = 0
/** The shared-parsers that have been referenced at some point in the generation so far. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,15 @@ private [parsley] abstract class LazyParsley[+A] private [deepembedding] {
// $COVERAGE-ON$

// The instructions used to execute this parser along with the number of registers it uses
final private [parsley] lazy val (instrs: Array[Instr], numRegs: Int) = computeInstrs
final private [parsley] lazy val (instrs: Array[Instr], numRefs: Int) = computeInstrs

/** This parser is the result of a `flatMap` operation, and as such must perform
* callee-save on `numRegs` registers (which belong to its parent)
/** This parser is the result of a `flatMap` operation, and as such may need to expand
* the refs set. If so, it needs to know what the minimum free slot is according to
* the context.
*
* @param numRegs the number of registers the parent uses (these must be saved)
* @param minRef the smallest ref we are allowed to allocate to.
*/
private [deepembedding] def demandCalleeSave(numRegs: Int): Unit = numRegsUsedByParent = numRegs
private [deepembedding] def setMinReferenceAllocation(minRef: Int): Unit = this.minRef = minRef

// Internals
// To ensure that stack-overflow cannot occur during the processing of particularly
Expand Down Expand Up @@ -82,7 +83,7 @@ private [parsley] abstract class LazyParsley[+A] private [deepembedding] {
final private var cps = false
final private [deepembedding] def isCps: Boolean = cps
/** how many registers are used by the ''parent'' of this combinator (this combinator is part of a `flatMap` when this is not -1) */
final private var numRegsUsedByParent = -1
final private var minRef = -1

/** Computes the instructions associated with this parser as well as the number of
* registers it requires in a (possibly) stack-safe way.
Expand Down Expand Up @@ -114,11 +115,11 @@ private [parsley] abstract class LazyParsley[+A] private [deepembedding] {
val usedRefs: Set[Ref[_]] = letFinderState.usedRefs
implicit val letMap: LetMap = LetMap(letFinderState.lets, letFinderState.recs)
for { sp <- this.optimised } yield {
implicit val state: backend.CodeGenState = new backend.CodeGenState(letFinderState.numRegs)
sp.generateInstructions(numRegsUsedByParent, usedRefs, letMap.bodies)
implicit val state: backend.CodeGenState = new backend.CodeGenState(letFinderState.numRefs)
sp.generateInstructions(minRef, usedRefs, letMap.bodies)
}
}
}, letFinderState.numRegs)
}, letFinderState.numRefs)
}

// Pass 1
Expand Down Expand Up @@ -242,7 +243,7 @@ private [deepembedding] class LetFinderState {
/** Returns all the registers used by the parser */
private [frontend] def usedRefs: Set[Ref[_]] = _usedRefs.toSet
/** Returns the number of registers used by the parser */
private [frontend] def numRegs: Int = _usedRefs.size
private [frontend] def numRefs: Int = _usedRefs.size
}

/** Represents a map of let-bound lazy parsers to their strict equivalents. */
Expand Down
18 changes: 1 addition & 17 deletions parsley/shared/src/main/scala/parsley/internal/diagnostics.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*/
package parsley.internal.diagnostics

import parsley.exceptions.{BadLazinessException, CorruptedReferenceException, ParsleyException}
import parsley.exceptions.{BadLazinessException, ParsleyException}

private [parsley] object UserException {
def unapply(e: Throwable): Option[Throwable] = e match {
Expand All @@ -25,22 +25,6 @@ private [parsley] object UserException {
}
}

private [parsley] object RegisterOutOfBoundsException {
def unapply(e: Throwable): Option[Throwable] = e match {
case e: ArrayIndexOutOfBoundsException => e.getStackTrace.headOption.collect {
// this exception was thrown plainly during the execution of an instruction
// only register arrays are accessed raw like this: therefore it must be an
// out of bounds register.
case ste if ste.getMethodName == "apply"
&& ste.getClassName.startsWith("parsley.internal.machine.instructions") =>
val err = new CorruptedReferenceException
err.addSuppressed(e)
err
}
case _ => None
}
}

private [parsley] object NullParserException {
def unapply(e: Throwable): Option[Throwable] = e match {
// this should only be true when the null was tripped from within the parsley namespace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import parsley.Success
import parsley.XAssert._
import parsley.errors.ErrorBuilder

import parsley.internal.diagnostics.RegisterOutOfBoundsException
import parsley.internal.errors.{CaretWidth, ExpectItem, LineBuilder, UnexpectDesc}
import parsley.internal.machine.errors.{ClassicFancyError, DefuncError, DefuncHints, EmptyHints,
ErrorItemBuilder, ExpectedError, ExpectedErrorWithReason, UnexpectedError}
Expand Down Expand Up @@ -125,15 +124,7 @@ private [parsley] final class Context(private [machine] var instrs: Array[Instr]
}
// $COVERAGE-ON$

private [parsley] def run[Err: ErrorBuilder, A](): Result[Err, A] = {
try go[Err, A]()
catch {
// additional diagnostic checks
// $COVERAGE-OFF$
case RegisterOutOfBoundsException(err) => throw err // scalastyle:ignore throw
// $COVERAGE-ON$
}
}
private [parsley] def run[Err: ErrorBuilder, A](): Result[Err, A] = go[Err, A]()
@tailrec private def go[Err: ErrorBuilder, A](): Result[Err, A] = {
//println(pretty)
if (running) { // this is the likeliest branch, so should be executed with fewest comparisons
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ private [internal] object Apply extends Instr {
}

// Monadic
private [internal] final class DynCall(f: Any => Array[Instr]) extends Instr {
private [internal] final class DynCall(f: (Any, Int) => Array[Instr]) extends Instr {
override def apply(ctx: Context): Unit = {
ensureRegularInstruction(ctx)
ctx.call(f(ctx.stack.upop()))
ctx.call(f(ctx.stack.upop(), ctx.regs.size))
}
// $COVERAGE-OFF$
override def toString: String = "DynCall(?)"
// $COVERAGE-ON$
}
private [internal] object DynCall {
def apply[A](f: A => Array[Instr]): DynCall = new DynCall(f.asInstanceOf[Any => Array[Instr]])
def apply[A](f: (A, Int) => Array[Instr]): DynCall = new DynCall(f.asInstanceOf[(Any, Int) => Array[Instr]])
}

// Control Flow
Expand Down
Loading

0 comments on commit 5712969

Please sign in to comment.