Merge branch 'main' into yousif-memory-region-analysis

UQ-PAC · Oct 31, 2023 · 26398dd · 26398dd
2 parents 63d5909 + e082890
commit 26398dd
Show file tree

Hide file tree

Showing 399 changed files with 70,918 additions and 39,096 deletions.
diff --git a/examples/simplified_http_parse_basic/example.adt b/examples/simplified_http_parse_basic/example.adt
diff --git a/examples/simplified_http_parse_basic/example.bir b/examples/simplified_http_parse_basic/example.bir
diff --git a/examples/simplified_http_parse_basic/example.bpl b/examples/simplified_http_parse_basic/example.bpl
diff --git a/examples/simplified_http_parse_basic/example.c b/examples/simplified_http_parse_basic/example.c
@@ -2,33 +2,34 @@
 #include <string.h>
 #include <stdio.h>
 
-#define MALLOC_SIZE 10
-
+#define MALLOC_SIZE 4
+// times out with 64 bit buffer
 
 // cntlm 22
 
 char *buf;
 char password = 7; // secret value; has to be a variable so that we get a Gamma_password variable
-char stext[11] = "sometext";
+char stext[11] = "hel";
 
 
 
 int main() {
     char *pos = NULL, *dom = NULL;
 
 
-   stext[5] = password;
+   stext[2] = password;
    buf = malloc(strlen(stext) + 1);
    memcpy(buf, stext, strlen(stext) + 1); // inlined by -O2
 
    puts(buf);
 
    // find the split between username and password ("username:password")
-   pos = buf + 2;
-
-  *pos = 0;
+   pos = buf + 1;
+
+  // including this makes verification fail
+ // *pos = 0;
 
-  memset(buf, 0, strlen(buf));
+  memset(buf, 0, strlen(buf) + 1);
   free(buf); // requires secret[i] == true
 
 }

diff --git a/examples/simplified_http_parse_basic/example.relf b/examples/simplified_http_parse_basic/example.relf
diff --git a/examples/simplified_http_parse_basic/example.spec b/examples/simplified_http_parse_basic/example.spec
@@ -4,54 +4,50 @@ buf: long
 stext: char[11]
 
 
-DIRECT functions: gamma_load64, gamma_load8, memory_load8_le, bvult64, bvule64, bvsub64
+DIRECT functions: gamma_load64, gamma_load8, memory_load8_le, bvult64, bvule64, bvsub64, gamma_load32
 
 
 Subroutine: #free
-Requires DIRECT: "gamma_load64(Gamma_mem, R0)"
-Ensures DIRECT: "Gamma_R0 == true"
+  Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 0bv64)) == true"
+  Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 1bv64)) == true"
+  Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 2bv64)) == true"
+  Requires DIRECT: "gamma_load8(Gamma_mem, bvadd64(R0, 3bv64)) == true"
+  Ensures DIRECT: "Gamma_R0 == true"
 
 
 Subroutine: main
 Requires: Gamma_password == false
-
+Requires DIRECT: "gamma_load32(Gamma_mem, memory_load64_le(mem, $stext_addr))"
+Requires DIRECT: "R31 == 100bv64"
 
 Subroutine: malloc
 Modifies: R0
-Ensures: buf == old(buf) && password == old(password)
+Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
 // modifies R0, Gamma_R0;
-Ensures DIRECT: "R0 == 990000bv64"
+Ensures DIRECT: "R0 == 990000000bv64"
 Ensures DIRECT: "Gamma_R0 == true"
 
 
 Subroutine: memcpy
-Modifies: mem
-Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
-  Ensures DIRECT: "((memory_load64_le(mem, $buf_addr) == old(memory_load64_le(mem, $buf_addr))) && (memory_load8_le(mem, $password_addr) == old(memory_load8_le(mem, $password_addr))))"
-  // Works
-  //ensures (forall i: bv64 :: {mem[i]} (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then memory_load8_le(old(mem), bvadd64(bvsub64(i, R0), R1)) else old(memory_load8_le(mem, i))));
-  // Faster 
+  Modifies: mem
+  Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
+  Ensures DIRECT: "(forall i: bv64 :: (Gamma_mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then gamma_load8(old(Gamma_mem), bvadd64(bvsub64(i, R0), R1)) else old(gamma_load8(Gamma_mem, i))))"
   Ensures DIRECT: "(forall i: bv64 :: (mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then memory_load8_le(old(mem), bvadd64(bvsub64(i, R0), R1)) else old(memory_load8_le(mem, i))))"
-  // Does not verify with removed \0 write
-  // ensures (forall i: bv64 :: (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then memory_load8_le(old(mem), bvadd64(bvsub64(i, R0), R1)) else old(memory_load8_le(mem, i))));
 
 // forall i <= n, Gamma_mem[R0] low
 Subroutine: memset
   Modifies: mem
+  Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
   Ensures DIRECT: "(forall i: bv64 :: (Gamma_mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then Gamma_R1 else old(Gamma_mem[i])))"
-  // Works
-  //ensures (forall i: bv64 :: {mem[i]} (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then R1[8:0] else old(memory_load8_le(mem, i))));
-  // Faster 
   Ensures DIRECT: "(forall i: bv64 :: (mem[i] == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then R1[8:0] else old(memory_load8_le(mem, i))))"
-  // Does not verify with removed \0 write
-  // ensures (forall i: bv64 :: (memory_load8_le(mem, i) == if (bvule64(R0, i) && bvult64(i,bvadd64(R0, R2))) then R1[8:0] else old(memory_load8_le(mem, i))));
 
 
 Subroutine: strlen
-  Ensures: buf == old(buf) && password == old(password)
+  Modifies: R0
+  Requires DIRECT: "(memory_load8_le(mem, R0) == 0bv8) || (memory_load8_le(mem, bvadd64(R0, 1bv64)) == 0bv8)|| (memory_load8_le(mem, bvadd64(R0, 2bv64)) == 0bv8)|| (memory_load8_le(mem, bvadd64(R0, 3bv64)) == 0bv8)"
+  Ensures: buf == old(buf) && password == old(password) && stext==old(stext)
   Ensures DIRECT: "Gamma_R0 == true"
-  Ensures DIRECT: "(forall i: bv64 :: (((bvule64(0bv64, i)) && (bvult64(i, R0)) ==> (mem[bvadd64(old(R0), i)] != 0bv8))))"
-  Ensures DIRECT: "(mem[bvadd64(old(R0), R0)] == 0bv8)"
-  Ensures DIRECT: "(bvule64(old(R0), bvadd64(old(R0), R0)))" // doesnt overflow
-
+  Ensures DIRECT: "(forall i: bv64 :: (bvule64(old(R0), i)) && (bvult64(i, bvadd64(old(R0), R0))) ==> mem[i] != 0bv8)"
+  Ensures DIRECT: "(memory_load8_le(mem, bvadd64(old(R0), R0)) == 0bv8)"
+  Ensures DIRECT: "(bvule64(old(R0), bvadd64(old(R0), R0)))"
 
diff --git a/readme.md b/readme.md
@@ -197,16 +197,15 @@ The binary (i.e `*.out`) can then be generated from a C source file using:
 See [src/test/correct/liftone.sh](https://github.com/UQ-PAC/bil-to-boogie-translator/blob/main/src/test/correct/liftone.sh) for more examples
 for flag combinations that work. 
 
-## Verifying the generated boogie (the programming language), by running boogie (the progam) on the boogie (the program)
+## Verifying the generated Boogie file
 
 [Boogie](https://github.com/boogie-org/boogie#installation) can be installed by following the instructions in the given link.
 
-Boogie can be run on the output `*.bpl` file with the command `boogie *.bpl`. 
+Boogie can be run on the output `*.bpl` file with the command `boogie \useArrayAxioms *.bpl`. 
 
-A recent boogie version is needed, for example `Boogie program verifier version 2.4.1.10503, Copyright (c) 2003-2014, Microsoft.`. 
+It is recommended to use Boogie version 3.0.4 and Z3 version 4.8.8 (which is recommended by Boogie). Other versions and combinations may not have been tested.
 
-With older versions and recent versions of z3 (e.g. `Z3 version 4.8.12 - 64 bit`), Z3 emits warnings about `model_compress`, since the 
-parameter name was changed. This does not prevent it from working however. 
+The `\useArrayAxioms` flag is necessary for Boogie versions 2.16.8 and greater; for earlier versions it can be removed.
 
 Boogie can be installed through dotnet and requires dotnet 6.
 

diff --git a/scripts/format_adt.py b/scripts/format_adt.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+# vim: ts=2:sw=2:expandtab:autoindent
+
+"""
+format_adt.py implements pretty-printing of BAP .adt files
+by translating the ADT into Python syntax, then parsing and
+formatting the python.
+
+Although this eval()s, it is made safe by using ast.literal_eval
+which only supports parsing of a literal Python expression.
+"""
+
+import io
+import re
+import ast
+import sys
+import time
+import pprint
+import logging
+import argparse
+import dataclasses
+import contextlib
+
+# grep -E "'([A-Z][a-zA-Z]+)'" src/main/antlr4/BAP_ADT.g4 --only-matching --no-filename | sort | uniq | xargs printf "'%s', " | fold -w80 -s
+heads = [
+  'AND', 'Annotation', 'Arg', 'Args', 'ARSHIFT', 'Attr', 'Attrs', 'BigEndian', 
+  'Blk', 'Blks', 'Both', 'Call', 'Concat', 'Def', 'Defs', 'Direct', 'DIVIDE', 
+  'EQ', 'Extract', 'Goto', 'HIGH', 'Imm', 'In', 'Indirect', 'Int', 'Jmp', 'Jmps', 
+  'LE', 'LittleEndian', 'Load', 'LOW', 'LSHIFT', 'LT', 'Mem', 'Memmap', 'MINUS', 
+  'MOD', 'NEG', 'NEQ', 'NOT', 'OR', 'Out', 'Phi', 'Phis', 'PLUS', 'Program', 
+  'Project', 'Region', 'RSHIFT', 'SDIVIDE', 'Section', 'Sections', 'SIGNED', 
+  'SLE', 'SLT', 'SMOD', 'Store', 'Sub', 'Subs', 'Tid', 'TIMES', 'UNSIGNED', 
+  'Var', 'XOR',
+]
+heads_joined = '|'.join(heads)
+heads_joined_b = heads_joined.encode('ascii')
+
+log = logging.getLogger()
+
+notspace_re = re.compile(rb'\S')
+head_re = re.compile(rb'[^\s(]+')
+num_re = re.compile(rb'[_0-9xa-fA-F]+')
+string_re = re.compile(rb'"(?:[^"\\]|\\.)*"')
+
+@dataclasses.dataclass
+class Context:
+  begin: int
+  closer: bytes
+  multiline: bool
+
+flip = {
+  b'(': b')',
+  b')': b'(',
+  b'[': b']',
+  b']': b'[',
+}
+
+def pretty(outfile, data: bytes, spaces: int):
+  # stack of expression beginning parentheses and their start position
+  stack: list[Context] = []
+  i = 0
+  depth = 0
+  head = b''
+
+  indent = b' ' * spaces
+
+  i0 = i - 1
+  length = len(data)
+  while i < length:
+    assert i0 != i
+    i0 = i
+
+    c = bytes((data[i],))
+    if c.isspace(): 
+      while i < length and data[i] in b' \t\r\n':
+        i += 1
+    elif c.isdigit():
+      m = num_re.match(data, i)
+      assert m
+      outfile.write(m[0])
+      i = m.end(0)
+    elif c == b',':
+      outfile.write(b',')
+      i += 1
+      if stack[-1].multiline:
+        outfile.write(b'\n')
+        outfile.write(indent * depth)
+      else:
+        outfile.write(b' ')
+    elif c.isupper():
+      m = head_re.match(data, i)
+      assert m
+      i = m.end(0)
+      head = m[0]
+      outfile.write(head)
+    elif c in b'([': 
+      outfile.write(c)
+      i += 1
+      islist = c == b'[' and ']' != chr(data[i])
+      multiline = islist or head in (b'Project', b'Def', b'Goto', b'Call', b'Sub', b'Blk', b'Arg')
+      if multiline:
+        depth += 1
+        outfile.write(b'\n')
+        outfile.write(indent * depth)
+
+      stack.append(Context(i, flip[c], multiline))
+    elif c in b')]':
+      outfile.write(c)
+      i += 1
+
+      s = stack.pop()
+      if c != s.closer:
+        raise ValueError(f"mismatched bracket: {flip[s.closer]} at byte {s.begin} closed by {c} at {i}.")
+      if s.multiline:
+        depth -= 1
+      if not stack:
+        outfile.write(b'\n')
+    elif c == b'"':
+      string = string_re.match(data, i)
+      if not string:
+        raise ValueError(f"unclosed string beginning at byte {i+1}.")
+      outfile.write(string[0])
+      i = string.end(0)
+    else:
+      sys.stderr.buffer.write(b'\npreceding text:\n')
+      sys.stderr.buffer.write(data[max(0,i-100):i])
+      raise ValueError(f"unsupported @ {i} = {c}")
+
+  if stack:
+    closers = ''.join(chr(x.closer[0]) for x in reversed(stack))
+    log.warning(f"unclosed brackets. expected: '{closers}'. malformed adt?")
+
+
+@contextlib.contextmanager
+def measure_time(context: str):
+  log.info(f'starting {context}', stacklevel=3)
+  start = time.perf_counter()
+  yield lambda: time.perf_counter() - start
+  log.debug(f'... done in {time.perf_counter() - start:.3f} seconds', stacklevel=3)
+
+
+def main(args):
+  infile = args.input
+  outfile = args.output
+  update = args.update
+  spaces = args.spaces
+
+  with measure_time('read'):
+    data = infile.read()
+    infile.close()
+    log.debug(f'    read {len(data):,} bytes')
+
+  if len(data) > 5000000:
+    log.warning(f'large input of {len(data):,} bytes. formatting may be slow.')
+
+  outbuf = None
+  if update:
+    outfile = outbuf = io.BytesIO()
+
+  with measure_time('pretty + write' if not update else 'pretty'):
+    pretty(outfile, data, spaces)
+
+  if update:
+    with measure_time('write'), open(infile.name, 'wb') as outfile:
+      assert outbuf
+      outfile.write(outbuf.getbuffer())
+
+if __name__ == '__main__':
+  logging.basicConfig(format='[%(asctime)s %(module)s:%(lineno)-3d %(levelname)-7s] %(message)s')
+
+  argp = argparse.ArgumentParser(description="pretty formats BAP ADT files.")
+  argp.add_argument('input', nargs='?', type=argparse.FileType('rb'), default=sys.stdin.buffer,
+                    help="input .adt file (default: stdin)")
+  excl = argp.add_mutually_exclusive_group()
+  excl.add_argument('output', nargs='?', type=argparse.FileType('wb'), default=sys.stdout.buffer,
+                    help="output file name (default: stdout)")
+
+  argp.add_argument('--spaces', '-s', default=1, type=int, 
+                    help="indent size in spaces (default: 1)")
+
+  excl.add_argument('--update', '-i', action='store_true',
+                    help="write output back to the input file (default: false)")
+
+  argp.add_argument('--verbose', '-v', action='count', default=0,
+                    help="print logging output to stderr (default: no, repeatable)")
+
+  args = argp.parse_args()
+
+  if args.input is sys.stdin and args.update:
+    argp.error('argument --update/-i: not allowed with stdin input')
+
+  if args.verbose == 0:
+    level = logging.WARN
+  elif args.verbose == 1:
+    level = logging.INFO
+  else:
+    level = logging.DEBUG
+  log.setLevel(level)
+
+  log.debug(str(args))
+
+  with measure_time('format_adt.main'):
+    main(args)
+