Skip to content

Commit

Permalink
Merge pull request #197 from UQ-PAC/grammar-update
Browse files Browse the repository at this point in the history
Semantics.g4 grammar update
  • Loading branch information
l-kent authored Jun 6, 2024
2 parents 4b3a278 + 3080012 commit 727d098
Show file tree
Hide file tree
Showing 395 changed files with 180 additions and 163 deletions.
51 changes: 29 additions & 22 deletions src/main/antlr4/Semantics.g4
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,59 @@ grammar Semantics;
// See aslp/libASL/asl.ott for reference grammar Bap-ali-plugin/asli_lifer.ml may also be useful for
// visitors

statements: QUOTE stmt QUOTE (COMMA QUOTE stmt QUOTE)*;

stmt: 'Stmt_Assign' OPEN_PAREN lexpr COMMA expr CLOSE_PAREN # Assign
| 'Stmt_ConstDecl' OPEN_PAREN type COMMA lvar=ID COMMA expr CLOSE_PAREN # ConstDecl
| 'Stmt_VarDecl' OPEN_PAREN type COMMA lvar=ID COMMA expr CLOSE_PAREN # VarDecl
| 'Stmt_ConstDecl' OPEN_PAREN type COMMA lvar=ident COMMA expr CLOSE_PAREN # ConstDecl
| 'Stmt_VarDecl' OPEN_PAREN type COMMA lvar=ident COMMA expr CLOSE_PAREN # VarDecl
| 'Stmt_VarDeclsNoInit' OPEN_PAREN type COMMA OPEN_BRACKET lvars CLOSE_BRACKET CLOSE_PAREN # VarDeclsNoInit
| 'Stmt_Assert' OPEN_PAREN expr CLOSE_PAREN # Assert
| 'Stmt_TCall' OPEN_PAREN name=ID COMMA OPEN_BRACKET tes=exprs? CLOSE_BRACKET COMMA OPEN_BRACKET args=exprs? CLOSE_BRACKET CLOSE_PAREN # TCall
| 'Stmt_If' OPEN_PAREN cond=expr COMMA OPEN_BRACKET stmt+ CLOSE_BRACKET COMMA OPEN_BRACKET CLOSE_BRACKET COMMA elseStmt? CLOSE_PAREN # If // theoretically some sort of 'Elsif' could be within the middle brackets?
| 'Stmt_Throw' OPEN_PAREN str=ID* CLOSE_PAREN # Throw
| 'Stmt_TCall' OPEN_PAREN name=ident COMMA OPEN_BRACKET tes=exprs? CLOSE_BRACKET COMMA OPEN_BRACKET args=exprs? CLOSE_BRACKET CLOSE_PAREN # TCall
| 'Stmt_If' OPEN_PAREN cond=expr COMMA OPEN_BRACKET thenStmts=stmts? CLOSE_BRACKET COMMA OPEN_BRACKET CLOSE_BRACKET COMMA OPEN_BRACKET elseStmts=stmts? CLOSE_BRACKET CLOSE_PAREN # If // ASLp transforms else-if into nested if/else statements.
| 'Stmt_Throw' OPEN_PAREN str=ident* CLOSE_PAREN # Throw
;

lvars: OPEN_PAREN ID CLOSE_PAREN (SCOLON OPEN_PAREN ID CLOSE_PAREN)* ;
statements: QUOTE stmt QUOTE (COMMA QUOTE stmt QUOTE)*;

lvars: ident (SCOLON ident)*;

exprs: OPEN_PAREN expr CLOSE_PAREN (SCOLON OPEN_PAREN expr CLOSE_PAREN)*;
exprs: expr (SCOLON expr)*;

elseStmt: OPEN_PAREN 'else' stmt+ CLOSE_PAREN;
stmts: stmt (SCOLON stmt)*;

type: 'Type_Bits' OPEN_PAREN size=expr CLOSE_PAREN # TypeBits
| 'Type_Constructor' OPEN_PAREN str=ID CLOSE_PAREN # TypeConstructor
| 'Type_Register' OPEN_PAREN QUOTE size=(DEC | BINARY) QUOTE COMMA regfield (COMMA regfield)* CLOSE_PAREN # TypeRegister
| 'Type_Constructor' OPEN_PAREN str=ident CLOSE_PAREN # TypeConstructor
| 'Type_Register' OPEN_PAREN QUOTE size=integer QUOTE COMMA regfield (COMMA regfield)* CLOSE_PAREN # TypeRegister
;

regfield: OPEN_PAREN OPEN_BRACKET slice CLOSE_BRACKET COMMA id=ID CLOSE_PAREN;
regfield: OPEN_PAREN OPEN_BRACKET slice CLOSE_BRACKET COMMA id=ident CLOSE_PAREN;

lexpr: 'LExpr_Var' OPEN_PAREN ID CLOSE_PAREN # LExprVar
| 'LExpr_Field' OPEN_PAREN lexpr COMMA field=ID CLOSE_PAREN # LExprField
lexpr: 'LExpr_Var' OPEN_PAREN ident CLOSE_PAREN # LExprVar
| 'LExpr_Field' OPEN_PAREN lexpr COMMA field=ident CLOSE_PAREN # LExprField
| 'LExpr_Array' OPEN_PAREN lexpr COMMA index=expr CLOSE_PAREN # LExprArray
;

expr: 'Expr_Var' OPEN_PAREN ID CLOSE_PAREN # ExprVar
| 'Expr_TApply' OPEN_PAREN ID COMMA OPEN_BRACKET tes=exprs? CLOSE_BRACKET COMMA OPEN_BRACKET args=exprs? CLOSE_BRACKET CLOSE_PAREN # ExprTApply
expr: 'Expr_Var' OPEN_PAREN ident CLOSE_PAREN # ExprVar
| 'Expr_TApply' OPEN_PAREN ident COMMA OPEN_BRACKET tes=exprs? CLOSE_BRACKET COMMA OPEN_BRACKET args=exprs? CLOSE_BRACKET CLOSE_PAREN # ExprTApply
| 'Expr_Slices' OPEN_PAREN expr COMMA OPEN_BRACKET slices CLOSE_BRACKET CLOSE_PAREN # ExprSlices
| 'Expr_Field' OPEN_PAREN expr COMMA field=ID CLOSE_PAREN # ExprField
| 'Expr_Field' OPEN_PAREN expr COMMA field=ident CLOSE_PAREN # ExprField
| 'Expr_Array' OPEN_PAREN array=expr COMMA index=expr CLOSE_PAREN # ExprArray
| 'Expr_LitInt' OPEN_PAREN QUOTE value=(DEC | BINARY) QUOTE CLOSE_PAREN # ExprLitInt
| 'Expr_LitBits' OPEN_PAREN QUOTE value=BINARY QUOTE CLOSE_PAREN # ExprLitBits // in future may need to account for case where whitespace is in the binary string
| integer # ExprLitInt
| bits # ExprLitBits
;

// Slice_HiLo only ever appears within Type_Register fields
slice: 'Slice_LoWd' OPEN_PAREN lo=expr COMMA wd=expr CLOSE_PAREN # Slice_LoWd
| 'Slice_HiLo' OPEN_PAREN hi=expr COMMA lo=expr CLOSE_PAREN # Slice_HiLo
;

// multiple of 'slice' - distinct from Expr_Slices
slices: OPEN_PAREN slice CLOSE_PAREN (SCOLON OPEN_PAREN slice CLOSE_PAREN)*;
slices: slice (SCOLON slice)*;

ident: QUOTE ID QUOTE;

integer: DEC;
bits: BINARY; // in future may need to account for case where whitespace is in the binary string

BINARY: [0-1]+;
BINARY: SQUOTE [0-1]+ SQUOTE;
DEC: [0-9]+;
ID: [a-zA-Z_][a-zA-Z0-9_.]*;

Expand All @@ -60,6 +66,7 @@ COMMA: ',';
OPEN_BRACKET: '[';
CLOSE_BRACKET: ']';
QUOTE: '"';
SQUOTE: '\'';
SCOLON: ';';

// Ignored
Expand Down
159 changes: 78 additions & 81 deletions src/main/scala/translating/GTIRBToIR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,13 @@ class GTIRBToIR(mods: Seq[Module], parserMap: immutable.Map[String, Array[Array[
}
}

private def getPCTarget(block: Block): Register = {
block.statements.last match {
case LocalAssign(lhs: Register, rhs: Register, _) if lhs.name == "_PC" => rhs
case _ => throw Exception(s"expected block ${block.label} to have a program counter assignment at its end")
}
}

private def byteStringToString(byteString: ByteString): String = {
Base64.getUrlEncoder.encodeToString(byteString.toByteArray)
}
Expand Down Expand Up @@ -304,26 +311,6 @@ class GTIRBToIR(mods: Seq[Module], parserMap: immutable.Map[String, Array[Array[
procedure.addBlocks(newBlocks)
newBlockCount += newBlocks.size

if (queue.nonEmpty) {
currentBlock = queue.dequeue()
currentStatement = currentBlock.statements.head
} else {
breakLoop = true
}
// assignment to program counter not associated with an edge
// caused by indirect call that DDisasm fails to identify
// potentially requires splitting block
case l: LocalAssign if l.lhs == Register("_PC", BitVecType(64)) =>
val newBlocks = handleUnidentifiedIndirectCall(l, currentBlock, block.label, newBlockCount)
procedure.addBlocks(newBlocks)
newBlockCount += newBlocks.size

for (n <- newBlocks) {
if (n.statements.nonEmpty) {
queue.enqueue(n)
}
}

if (queue.nonEmpty) {
currentBlock = queue.dequeue()
currentStatement = currentBlock.statements.head
Expand All @@ -343,60 +330,6 @@ class GTIRBToIR(mods: Seq[Module], parserMap: immutable.Map[String, Array[Array[
}
}

// Handles assignments to the program counter that are not related to edges in the GTIRB CFG
// These are likely blr instructions (which are indirect calls) that DDisasm failed to identify as branching
// If the PC assignment is mid-block, the block is split into two, and an indirect call is created at the end of the first block
// If the PC assignment is at the end of the block, an indirect call is added to the block
// The PC assignment is removed in all cases
// No other cases of unhandled program counter assignments have been identified yet
private def handleUnidentifiedIndirectCall(l: LocalAssign, currentBlock: Block, parentLabel: String, newBlockCountIn: Int): ArrayBuffer[Block] = {
val newBlocks = ArrayBuffer[Block]()
var newBlockCount = newBlockCountIn

val target = l.rhs match {
case r: Register => r
case _ => throw Exception(s"unhandled indirect call $l does not assign a register to __PC")
}
val returnTarget = if (currentBlock.statements.hasNext(l)) {
// unidentified indirect call is mid-block
val afterStatements = currentBlock.statements.splitOn(l)
val afterBlock = Block(parentLabel + "$__" + newBlockCount, None, afterStatements)
newBlockCount += 1
newBlocks.append(afterBlock)
afterBlock.replaceJump(currentBlock.jump)
// we are assuming this is a blr instruction and so R30 has been set to point to the next instruction
afterBlock
} else {
// unidentified indirect call is at end of block with fallthrough edge
currentBlock.jump match {
case g: GoTo if g.targets.nonEmpty =>
if (g.targets.size == 1) {
g.targets.head
} else {
// case where goto has multiple targets: create an extra block and point to that
val afterBlock = Block(parentLabel + "$__" + newBlockCount)
newBlockCount += 1
newBlocks.append(afterBlock)
afterBlock.replaceJump(currentBlock.jump)
afterBlock
}
case _ =>
throw Exception(s"unhandled indirect call $l is at end of block ${currentBlock.label} that ends in call ${currentBlock.jump}")
}
}
// check that R30 has been set by previous statement - if it did not then this is a case that requires further investigation
currentBlock.statements.getPrev(l) match {
case LocalAssign(Register("R30", BitVecType(64)), _, _) =>
case _ => throw Exception("unhandled assignment to PC did not set R30 beforehand")
}

val indirectCall = IndirectCall(target, Some(returnTarget))
currentBlock.replaceJump(indirectCall)
currentBlock.statements.remove(l)

newBlocks
}

// handles if statements that are not related to conditional edges in the GTIRB CFG
// this creates new blocks for the contents of the if statements and removes the TempIfs
private def handleIfStatement(i: TempIf, currentBlock: Block, parentLabel: String, newBlockCountIn: Int): ArrayBuffer[Block] = {
Expand Down Expand Up @@ -549,7 +482,7 @@ class GTIRBToIR(mods: Seq[Module], parserMap: immutable.Map[String, Array[Array[
IndirectCall(Register("R30", BitVecType(64)), None)

} else if (edgeLabels.forall { (e: EdgeLabel) => !e.conditional && !e.direct && e.`type` == Type_Branch }) {
// resolved indirect call
// resolved indirect call with multiple blocks as targets
val targets = mutable.Set[Block]()
for (edge <- outgoingEdges) {
if (uuidToBlock.contains(edge.targetUuid)) {
Expand Down Expand Up @@ -579,12 +512,11 @@ class GTIRBToIR(mods: Seq[Module], parserMap: immutable.Map[String, Array[Array[
handleDirectCallWithReturn(edge0, edge1, block)
case (EdgeLabel(false, true, Type_Call, _), EdgeLabel(false, true, Type_Fallthrough, _)) =>
handleDirectCallWithReturn(edge1, edge0, block)
/*
these are probably what blr should resolve to once that's fixed?
case (EdgeLabel(false, true, Type_Fallthrough, _), EdgeLabel(false, false, Type_Call, _)) =>
case (EdgeLabel(false, false, Type_Call, _), EdgeLabel(false, true, Type_Fallthrough, _)) =>
*/

// indirect call with return target
case (EdgeLabel(false, true, Type_Fallthrough, _), EdgeLabel(false, false, Type_Call, _)) =>
handleIndirectCallWithReturn(edge0, edge1, block)
case (EdgeLabel(false, false, Type_Call, _), EdgeLabel(false, true, Type_Fallthrough, _)) =>
handleIndirectCallWithReturn(edge1, edge0, block)
// conditional branch
case (EdgeLabel(true, true, Type_Fallthrough, _), EdgeLabel(true, true, Type_Branch, _)) =>
handleConditionalBranch(edge0, edge1, block, procedure)
Expand All @@ -593,11 +525,76 @@ class GTIRBToIR(mods: Seq[Module], parserMap: immutable.Map[String, Array[Array[
case _ =>
throw Exception(s"cannot resolve outgoing edges from block ${block.label}")
}
} else if (edgeLabels.forall { (e: EdgeLabel) => !e.conditional }) {
// resolved indirect call with multiple procedure targets and fallthrough?
val fallthroughs = ArrayBuffer[Edge]()
val indirectCallTargets = ArrayBuffer[Edge]()
for (edge <- outgoingEdges) {
edge.getLabel match {
case EdgeLabel(false, true, Type_Fallthrough, _) =>
fallthroughs.addOne(edge)
case EdgeLabel(false, false, Type_Call, _) =>
indirectCallTargets.addOne(edge)
case _ =>
}
}
// unhandled case if there is more than one fallthrough, no fallthrough, or no indirect call targets
if (fallthroughs.size != 1 || indirectCallTargets.isEmpty) {
throw Exception(s"cannot resolve outgoing edges from block ${block.label}")
}
handleIndirectCallMultipleResolvedTargets(fallthroughs.head, indirectCallTargets, block, procedure)
} else {
throw Exception(s"cannot resolve outgoing edges from block ${block.label}")
}
}

private def handleIndirectCallMultipleResolvedTargets(fallthrough: Edge, indirectCallTargets: ArrayBuffer[Edge], block: Block, procedure: Procedure): GoTo = {
if (!uuidToBlock.contains(fallthrough.targetUuid)) {
throw Exception(s"block ${block.label} has fallthrough edge to ${byteStringToString(fallthrough.targetUuid)} that does not point to a known block")
}
val returnTarget = uuidToBlock(fallthrough.targetUuid)

val newBlocks = ArrayBuffer[Block]()
val targetRegister = getPCTarget(block)

for (call <- indirectCallTargets) {
// it's odd if an indirect call is only partially resolved, so throw an exception for now because this case will require further investigation
if (!entranceUUIDtoProcedure.contains(call.targetUuid)) {
throw Exception(s"block ${block.label} has resolved indirect call edge to ${byteStringToString(call.targetUuid)} that does not point to a known procedure")
}

val target = entranceUUIDtoProcedure(call.targetUuid)
val resolvedCall = DirectCall(target, Some(returnTarget))

val assume = Assume(BinaryExpr(BVEQ, targetRegister, BitVecLiteral(target.address.get, 64)))
val label = block.label + "$" + target.name
newBlocks.append(Block(label, None, ArrayBuffer(assume), resolvedCall))
}
removePCAssign(block)
procedure.addBlocks(newBlocks)
GoTo(newBlocks)
}

private def handleIndirectCallWithReturn(fallthrough: Edge, call: Edge, block: Block): Call = {
if (!uuidToBlock.contains(fallthrough.targetUuid)) {
throw Exception(s"block ${block.label} has fallthrough edge to ${byteStringToString(fallthrough.targetUuid)} that does not point to a known block")
}
val returnTarget = uuidToBlock(fallthrough.targetUuid)

if (!entranceUUIDtoProcedure.contains(call.targetUuid)) {
// unresolved indirect call
val target = getPCTarget(block)
removePCAssign(block)

IndirectCall(target, Some(returnTarget))
} else {
// resolved indirect call
val target = entranceUUIDtoProcedure(call.targetUuid)
removePCAssign(block)
DirectCall(target, Some(returnTarget))
}
}

private def handleDirectCallWithReturn(fallthrough: Edge, call: Edge, block: Block): DirectCall = {
if (!entranceUUIDtoProcedure.contains(call.targetUuid)) {
throw Exception(s"block ${block.label} has direct call edge to ${byteStringToString(call.targetUuid)} that does not point to a known procedure")
Expand Down
Loading

0 comments on commit 727d098

Please sign in to comment.