-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add .adt pretty-printer in format_adt.py
This makes it easier to inspect .adt files manually without the burden of fully parsing them. The formatter works by exploiting the almost-Python syntax of the ADT. It tweaks it into valid Python, then parses and formats it using Python's ast.literal_eval and pprint.pprint modules. After this, it reverses the transformation to produce something very much like BAP.
- Loading branch information
1 parent
c4406e9
commit aacd974
Showing
1 changed file
with
118 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#!/usr/bin/env python3 | ||
# vim: noai:ts=2:sw=2:expandtab | ||
|
||
""" | ||
format_adt.py implements pretty-printing of BAP .adt files | ||
by translating the ADT into Python syntax, then parsing and | ||
formatting the python. | ||
Although this eval()s, it is made safe by using ast.literal_eval | ||
which only supports parsing of a literal Python expression. | ||
""" | ||
|
||
import re | ||
import ast | ||
import sys | ||
import pprint | ||
import typing | ||
import argparse | ||
|
||
# grep -E '[A-Z][a-ZA-Z]+\\(' *.adt --only-matching | sort | uniq | tr -d '(' | xargs -n1 printf "'%s', " | ||
heads = ['Annotation', 'Arg', 'Args', 'ARSHIFT', 'Attr', 'Attrs', 'Blk', 'Blks', 'Both', 'Call', 'Concat', 'Def', 'Defs', 'Direct', 'EQ', 'Extract', 'Goto', 'Imm', 'In', 'Indirect', 'Int', 'Jmps', 'LittleEndian', 'Load', 'LOW', 'Mem', 'Memmap', 'NEQ', 'NOT', 'Out', 'Phis', 'PLUS', 'Program', 'Project', 'Region', 'Section', 'Sections', 'SIGNED', 'Store', 'Sub', 'Subs', 'Tid', 'UNSIGNED', 'Var'] | ||
heads_joined = '|'.join(heads) | ||
|
||
def preprocess(data: str) -> str: | ||
""" | ||
Preprocesses BAP ADT intrinsics (like Program, Subs, ...) into tuple syntax. | ||
For example, Program(1, 2, 3) becomes ("Program", 1, 2, 3). | ||
""" | ||
heads_re = re.compile(f'({heads_joined})[(]') | ||
|
||
data = heads_re.sub(lambda x: '(' + repr(x[1]) + ', ', data) | ||
return data | ||
|
||
class DoubleQuoteStr(str): | ||
def __repr__(self): | ||
# TODO: maybe make more robust? | ||
r = super().__repr__() | ||
r = r[1:-1] | ||
r = r.replace(r"\'", r"'") | ||
r = r.replace(r'"', r'\"') | ||
return '"' + r + '"' | ||
|
||
class UnderscoreInt(int): | ||
def __repr__(self): | ||
return f'{self:_}' | ||
|
||
Exp = tuple | list | str | int | ||
def clean(data: Exp) -> Exp: | ||
""" | ||
Intermediate step before formatting to tweak pprint's formatting. | ||
This ensures we match BAP as close as possible with double-quoted strings | ||
and underscores in Tid. | ||
""" | ||
if isinstance(data, tuple) and data[0] == 'Tid' and not isinstance(data[1], UnderscoreInt): | ||
return clean((data[0], UnderscoreInt(data[1]), ) + data[2:]) | ||
if isinstance(data, str): | ||
return DoubleQuoteStr(data) | ||
if isinstance(data, (list, tuple)): | ||
return data.__class__(map(clean, data)) | ||
return data | ||
|
||
def postprocess(data: str) -> str: | ||
""" | ||
Postprocesses the formatted Python expression to restore the BAP-style intrinsics. | ||
""" | ||
heads_re2 = re.compile(f'[(]"({heads_joined})",(\\s)') | ||
|
||
data = heads_re2.sub(lambda x: x[1] + '(' + ('\n' if x[2] == '\n' else ''), data) | ||
data = data.replace(',)', ')') | ||
return data | ||
|
||
|
||
def main(args): | ||
infile = args.input | ||
outfile = args.output | ||
width = args.width | ||
update = args.update | ||
|
||
data = infile.read() | ||
|
||
out = data | ||
out = preprocess(out) | ||
out = ast.literal_eval(out) | ||
out = clean(out) | ||
out = pprint.pformat(out, indent=width, underscore_numbers=False) | ||
out = postprocess(out) | ||
|
||
if update: | ||
infile.close() | ||
with open(infile.name, 'w') as outfile: | ||
outfile.write(out) | ||
outfile.write('\n') | ||
else: | ||
outfile.write(out) | ||
outfile.write('\n') | ||
outfile.flush() | ||
|
||
if __name__ == '__main__': | ||
argp = argparse.ArgumentParser(description="pretty formats BAP ADT files.") | ||
argp.add_argument('input', nargs='?', type=argparse.FileType('r'), default=sys.stdin, | ||
help="input .adt file (default: stdin)") | ||
excl = argp.add_mutually_exclusive_group() | ||
excl.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | ||
help="output file name (default: stdout)") | ||
|
||
argp.add_argument('--width', '-w', default=1, type=int, | ||
help="indent size in spaces (default: 1)") | ||
|
||
excl.add_argument('--update', '-i', action='store_true', | ||
help="write output back to the input file (default: false)") | ||
|
||
args = argp.parse_args() | ||
|
||
if args.input is sys.stdin and args.update: | ||
argp.error('argument --update/-i: not allowed with stdin input') | ||
|
||
main(args) | ||
|