Skip to content

Commit

Permalink
Merge branch 'main' into yousif-memory-region-analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
yousifpatti committed Oct 31, 2023
2 parents 63d5909 + e082890 commit 26398dd
Show file tree
Hide file tree
Showing 399 changed files with 70,918 additions and 39,096 deletions.
1,276 changes: 618 additions & 658 deletions examples/simplified_http_parse_basic/example.adt

Large diffs are not rendered by default.

726 changes: 353 additions & 373 deletions examples/simplified_http_parse_basic/example.bir

Large diffs are not rendered by default.

811 changes: 811 additions & 0 deletions examples/simplified_http_parse_basic/example.bpl

Large diffs are not rendered by default.

17 changes: 9 additions & 8 deletions examples/simplified_http_parse_basic/example.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,34 @@
#include <string.h>
#include <stdio.h>

#define MALLOC_SIZE 10

#define MALLOC_SIZE 4
// times out with 64 bit buffer

// cntlm 22

char *buf;
char password = 7; // secret value; has to be a variable so that we get a Gamma_password variable
char stext[11] = "sometext";
char stext[11] = "hel";



int main() {
char *pos = NULL, *dom = NULL;


stext[5] = password;
stext[2] = password;
buf = malloc(strlen(stext) + 1);
memcpy(buf, stext, strlen(stext) + 1); // inlined by -O2

puts(buf);

// find the split between username and password ("username:password")
pos = buf + 2;

*pos = 0;
pos = buf + 1;

// including this makes verification fail
// *pos = 0;

memset(buf, 0, strlen(buf));
memset(buf, 0, strlen(buf) + 1);
free(buf); // requires secret[i] == true

}
Expand Down
223 changes: 121 additions & 102 deletions examples/simplified_http_parse_basic/example.relf

Large diffs are not rendered by default.

44 changes: 20 additions & 24 deletions examples/simplified_http_parse_basic/example.spec
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,50 @@ buf: long
stext: char[11]


DIRECT functions: gamma_load64, gamma_load8, memory_load8_le, bvult64, bvule64, bvsub64
DIRECT functions: gamma_load64, gamma_load8, memory_load8_le, bvult64, bvule64, bvsub64, gamma_load32


Subroutine: #free
Requires DIRECT: "gamma_load64(Gamma_mem, R0)"
Ensures DIRECT: "Gamma_R0 == true"
Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 0bv64)) == true"
Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 1bv64)) == true"
Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 2bv64)) == true"
Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 3bv64)) == true"
Ensures DIRECT: "Gamma_R0 == true"


Subroutine: main
Requires: Gamma_password == false

Requires DIRECT: "gamma_load32(Gamma_mem, memory_load64_le(mem, $stext_addr))"
Requires DIRECT: "R31 == 100bv64"

Subroutine: malloc
Modifies: R0
Ensures: buf == old(buf) && password == old(password)
Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
// modifies R0, Gamma_R0;
Ensures DIRECT: "R0 == 990000bv64"
Ensures DIRECT: "R0 == 990000000bv64"
Ensures DIRECT: "Gamma_R0 == true"


Subroutine: memcpy
Modifies: mem
Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
Ensures DIRECT: "((memory_load64_le(mem, $buf_addr) == old(memory_load64_le(mem, $buf_addr))) && (memory_load8_le(mem, $password_addr) == old(memory_load8_le(mem, $password_addr))))"
// Works
//ensures (forall i: bv64 :: {mem[i]} (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then memory_load8_le(old(mem), bvadd64(bvsub64(i, R0), R1)) else old(memory_load8_le(mem, i))));
// Faster
Modifies: mem
Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
Ensures DIRECT: "(forall i: bv64 :: (Gamma_mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then gamma_load8(old(Gamma_mem), bvadd64(bvsub64(i, R0), R1)) else old(gamma_load8(Gamma_mem, i))))"
Ensures DIRECT: "(forall i: bv64 :: (mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then memory_load8_le(old(mem), bvadd64(bvsub64(i, R0), R1)) else old(memory_load8_le(mem, i))))"
// Does not verify with removed \0 write
// ensures (forall i: bv64 :: (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then memory_load8_le(old(mem), bvadd64(bvsub64(i, R0), R1)) else old(memory_load8_le(mem, i))));

// forall i <= n, Gamma_mem[R0] low
Subroutine: memset
Modifies: mem
Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
Ensures DIRECT: "(forall i: bv64 :: (Gamma_mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then Gamma_R1 else old(Gamma_mem[i])))"
// Works
//ensures (forall i: bv64 :: {mem[i]} (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then R1[8:0] else old(memory_load8_le(mem, i))));
// Faster
Ensures DIRECT: "(forall i: bv64 :: (mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then R1[8:0] else old(memory_load8_le(mem, i))))"
// Does not verify with removed \0 write
// ensures (forall i: bv64 :: (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then R1[8:0] else old(memory_load8_le(mem, i))));


Subroutine: strlen
Ensures: buf == old(buf) && password == old(password)
Modifies: R0
Requires DIRECT: "(memory_load8_le(mem, R0) == 0bv8) || (memory_load8_le(mem, bvadd64(R0, 1bv64)) == 0bv8)|| (memory_load8_le(mem, bvadd64(R0, 2bv64)) == 0bv8)|| (memory_load8_le(mem, bvadd64(R0, 3bv64)) == 0bv8)"
Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
Ensures DIRECT: "Gamma_R0 == true"
Ensures DIRECT: "(forall i: bv64 :: (((bvule64(0bv64, i)) && (bvult64(i, R0)) ==> (mem[bvadd64(old(R0), i)] != 0bv8))))"
Ensures DIRECT: "(mem[bvadd64(old(R0), R0)] == 0bv8)"
Ensures DIRECT: "(bvule64(old(R0), bvadd64(old(R0), R0)))" // doesnt overflow

Ensures DIRECT: "(forall i: bv64 :: (bvule64(old(R0), i)) && (bvult64(i, bvadd64(old(R0), R0))) ==> mem[i] != 0bv8)"
Ensures DIRECT: "(memory_load8_le(mem, bvadd64(old(R0), R0)) == 0bv8)"
Ensures DIRECT: "(bvule64(old(R0), bvadd64(old(R0), R0)))"

9 changes: 4 additions & 5 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,16 +197,15 @@ The binary (i.e `*.out`) can then be generated from a C source file using:
See [src/test/correct/liftone.sh](https://github.com/UQ-PAC/bil-to-boogie-translator/blob/main/src/test/correct/liftone.sh) for more examples
for flag combinations that work.

## Verifying the generated boogie (the programming language), by running boogie (the progam) on the boogie (the program)
## Verifying the generated Boogie file

[Boogie](https://github.com/boogie-org/boogie#installation) can be installed by following the instructions in the given link.

Boogie can be run on the output `*.bpl` file with the command `boogie *.bpl`.
Boogie can be run on the output `*.bpl` file with the command `boogie \useArrayAxioms *.bpl`.

A recent boogie version is needed, for example `Boogie program verifier version 2.4.1.10503, Copyright (c) 2003-2014, Microsoft.`.
It is recommended to use Boogie version 3.0.4 and Z3 version 4.8.8 (which is recommended by Boogie). Other versions and combinations may not have been tested.

With older versions and recent versions of z3 (e.g. `Z3 version 4.8.12 - 64 bit`), Z3 emits warnings about `model_compress`, since the
parameter name was changed. This does not prevent it from working however.
The `\useArrayAxioms` flag is necessary for Boogie versions 2.16.8 and greater; for earlier versions it can be removed.

Boogie can be installed through dotnet and requires dotnet 6.

Expand Down
204 changes: 204 additions & 0 deletions scripts/format_adt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#!/usr/bin/env python3
# vim: ts=2:sw=2:expandtab:autoindent

"""
format_adt.py implements pretty-printing of BAP .adt files
by translating the ADT into Python syntax, then parsing and
formatting the python.
Although this eval()s, it is made safe by using ast.literal_eval
which only supports parsing of a literal Python expression.
"""

import io
import re
import ast
import sys
import time
import pprint
import logging
import argparse
import dataclasses
import contextlib

# grep -E "'([A-Z][a-zA-Z]+)'" src/main/antlr4/BAP_ADT.g4 --only-matching --no-filename | sort | uniq | xargs printf "'%s', " | fold -w80 -s
heads = [
'AND', 'Annotation', 'Arg', 'Args', 'ARSHIFT', 'Attr', 'Attrs', 'BigEndian',
'Blk', 'Blks', 'Both', 'Call', 'Concat', 'Def', 'Defs', 'Direct', 'DIVIDE',
'EQ', 'Extract', 'Goto', 'HIGH', 'Imm', 'In', 'Indirect', 'Int', 'Jmp', 'Jmps',
'LE', 'LittleEndian', 'Load', 'LOW', 'LSHIFT', 'LT', 'Mem', 'Memmap', 'MINUS',
'MOD', 'NEG', 'NEQ', 'NOT', 'OR', 'Out', 'Phi', 'Phis', 'PLUS', 'Program',
'Project', 'Region', 'RSHIFT', 'SDIVIDE', 'Section', 'Sections', 'SIGNED',
'SLE', 'SLT', 'SMOD', 'Store', 'Sub', 'Subs', 'Tid', 'TIMES', 'UNSIGNED',
'Var', 'XOR',
]
heads_joined = '|'.join(heads)
heads_joined_b = heads_joined.encode('ascii')

log = logging.getLogger()

notspace_re = re.compile(rb'\S')
head_re = re.compile(rb'[^\s(]+')
num_re = re.compile(rb'[_0-9xa-fA-F]+')
string_re = re.compile(rb'"(?:[^"\\]|\\.)*"')

@dataclasses.dataclass
class Context:
begin: int
closer: bytes
multiline: bool

flip = {
b'(': b')',
b')': b'(',
b'[': b']',
b']': b'[',
}

def pretty(outfile, data: bytes, spaces: int):
# stack of expression beginning parentheses and their start position
stack: list[Context] = []
i = 0
depth = 0
head = b''

indent = b' ' * spaces

i0 = i - 1
length = len(data)
while i < length:
assert i0 != i
i0 = i

c = bytes((data[i],))
if c.isspace():
while i < length and data[i] in b' \t\r\n':
i += 1
elif c.isdigit():
m = num_re.match(data, i)
assert m
outfile.write(m[0])
i = m.end(0)
elif c == b',':
outfile.write(b',')
i += 1
if stack[-1].multiline:
outfile.write(b'\n')
outfile.write(indent * depth)
else:
outfile.write(b' ')
elif c.isupper():
m = head_re.match(data, i)
assert m
i = m.end(0)
head = m[0]
outfile.write(head)
elif c in b'([':
outfile.write(c)
i += 1
islist = c == b'[' and ']' != chr(data[i])
multiline = islist or head in (b'Project', b'Def', b'Goto', b'Call', b'Sub', b'Blk', b'Arg')
if multiline:
depth += 1
outfile.write(b'\n')
outfile.write(indent * depth)

stack.append(Context(i, flip[c], multiline))
elif c in b')]':
outfile.write(c)
i += 1

s = stack.pop()
if c != s.closer:
raise ValueError(f"mismatched bracket: {flip[s.closer]} at byte {s.begin} closed by {c} at {i}.")
if s.multiline:
depth -= 1
if not stack:
outfile.write(b'\n')
elif c == b'"':
string = string_re.match(data, i)
if not string:
raise ValueError(f"unclosed string beginning at byte {i+1}.")
outfile.write(string[0])
i = string.end(0)
else:
sys.stderr.buffer.write(b'\npreceding text:\n')
sys.stderr.buffer.write(data[max(0,i-100):i])
raise ValueError(f"unsupported @ {i} = {c}")

if stack:
closers = ''.join(chr(x.closer[0]) for x in reversed(stack))
log.warning(f"unclosed brackets. expected: '{closers}'. malformed adt?")


@contextlib.contextmanager
def measure_time(context: str):
log.info(f'starting {context}', stacklevel=3)
start = time.perf_counter()
yield lambda: time.perf_counter() - start
log.debug(f'... done in {time.perf_counter() - start:.3f} seconds', stacklevel=3)


def main(args):
infile = args.input
outfile = args.output
update = args.update
spaces = args.spaces

with measure_time('read'):
data = infile.read()
infile.close()
log.debug(f' read {len(data):,} bytes')

if len(data) > 5000000:
log.warning(f'large input of {len(data):,} bytes. formatting may be slow.')

outbuf = None
if update:
outfile = outbuf = io.BytesIO()

with measure_time('pretty + write' if not update else 'pretty'):
pretty(outfile, data, spaces)

if update:
with measure_time('write'), open(infile.name, 'wb') as outfile:
assert outbuf
outfile.write(outbuf.getbuffer())

if __name__ == '__main__':
logging.basicConfig(format='[%(asctime)s %(module)s:%(lineno)-3d %(levelname)-7s] %(message)s')

argp = argparse.ArgumentParser(description="pretty formats BAP ADT files.")
argp.add_argument('input', nargs='?', type=argparse.FileType('rb'), default=sys.stdin.buffer,
help="input .adt file (default: stdin)")
excl = argp.add_mutually_exclusive_group()
excl.add_argument('output', nargs='?', type=argparse.FileType('wb'), default=sys.stdout.buffer,
help="output file name (default: stdout)")

argp.add_argument('--spaces', '-s', default=1, type=int,
help="indent size in spaces (default: 1)")

excl.add_argument('--update', '-i', action='store_true',
help="write output back to the input file (default: false)")

argp.add_argument('--verbose', '-v', action='count', default=0,
help="print logging output to stderr (default: no, repeatable)")

args = argp.parse_args()

if args.input is sys.stdin and args.update:
argp.error('argument --update/-i: not allowed with stdin input')

if args.verbose == 0:
level = logging.WARN
elif args.verbose == 1:
level = logging.INFO
else:
level = logging.DEBUG
log.setLevel(level)

log.debug(str(args))

with measure_time('format_adt.main'):
main(args)

Loading

0 comments on commit 26398dd

Please sign in to comment.