Skip to content

Commit

Permalink
Added a compiler example.
Browse files Browse the repository at this point in the history
  • Loading branch information
theq629 committed Sep 8, 2012
1 parent bfb7114 commit b7ee42d
Show file tree
Hide file tree
Showing 84 changed files with 612 additions and 0 deletions.
4 changes: 4 additions & 0 deletions tests/compiler/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export PYTHONPATH=../../

test:
./check-compiler -d -o out answer/ testcases/
14 changes: 14 additions & 0 deletions tests/compiler/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
This is a very simple compiler with LLVM, to demonstrate how to set up complex checks. In particular, interesting features are that:
- the command differs for each testcase rather than only for each testcase group
- the command outputs intermediate files which are then checked.

The compiler is for a simple arithmetic expression language. A program consists of a series of statements, one on each line. A statement has one of the following forms, where tokens are separated by one or more whitespace characters:
read ID
print ID
set ID UNIOP ID
set ID ID BINOP ID
ID is any identifier token, which can be any string that does not contain whitespace characters. UNIOP is any character in "-+*/%". BINOPT is any character in "-".

"read ID" reads an integer from a line of standard input. "print ID" prints an integer to a line of standard output. "set ID UNIOP ID" does operation UNIOP on the second ID argument and sets the first ID argument to the result. "set ID ID BINOP ID" does operation BINOP on the second and third ID arguments, and sets the first ID argument to the result. Operations do what you probably expect them to do. ID tokens used as argument to operations must have been previously assigned to.

The compiler from source code to LLVM code simply prints out text rather than using the LLVM libraries. The rest of the compiling process to produce a native executable is done with the LLVM tool programs.
83 changes: 83 additions & 0 deletions tests/compiler/answer/codegen
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python2

import sys

class SyntaxError(Exception):
pass
class SemanticError(Exception):
pass

bin_ops = {
"+": "add",
"-": "sub",
"*": "mul",
"/": "sdiv",
"%": "srem"
}
uni_ops = {
"-": ("sub", 0)
}

print """
declare i32 @read()
declare void @print(i32)
define i32 @main() {
entry:
"""

symbol_table = {}
next_id = 0
def assign(var):
global next_id
llvm_var = "%%%i" % (next_id)
print "\t; %s now %s" % (llvm_var, var)
symbol_table[var] = llvm_var
next_id += 1
return llvm_var
def lookup(var):
if var not in symbol_table:
raise SemanticError()
llvm_var = symbol_table[var]
print "\t; %s is %s" % (llvm_var, var)
return llvm_var

for line_num, line in enumerate(sys.stdin, 1):
print "\t; LINE %i: %s" % (line_num, line.strip("\n"))
tokens = line.split()
if len(tokens) < 2:
raise SyntaxError()
instr = tokens[0]
var = tokens[1]
if instr == 'read':
llvm_var = assign(var)
print "\t%s = call i32 @read()" % (llvm_var)
elif instr == 'set':
llvm_var = assign(var)
if len(tokens) == 4:
op = tokens[2]
arg = tokens[3]
if op not in uni_ops:
raise SyntaxError()
llvm_op, llvm_arg1 = uni_ops[op]
llvm_arg2 = lookup(arg)
print "\t%s = %s i32 %s, %s" % (llvm_var, llvm_op, llvm_arg1, llvm_arg2)
elif len(tokens) == 5:
arg1 = tokens[2]
op = tokens[3]
arg2 = tokens[4]
if op not in bin_ops:
raise SyntaxError()
llvm_op = bin_ops[op]
llvm_arg1, llvm_arg2 = [lookup(a) for a in [arg1, arg2]]
print "\t%s = %s i32 %s, %s" % (llvm_var, llvm_op, llvm_arg1, llvm_arg2)
else:
raise SyntaxError()
elif instr == 'print':
llvm_var = lookup(var)
print "\tcall void @print(i32 %s)" % (llvm_var)

print """
ret i32 0
}
"""
13 changes: 13 additions & 0 deletions tests/compiler/answer/stdlib.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include <stdio.h>

void print(int x)
{
printf("%d\n", x);
}

int read()
{
int i;
scanf("%d", &i);
return i;
}
86 changes: 86 additions & 0 deletions tests/compiler/check-compiler
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python2

import check
import os
import os.path
import difflib
import re
import sys

codegen = "./codegen"
source_extension = ".source"
testcase_run = "testcase-run"
stdlib = "stdlib.c"

def testcase_prefix(testcases_path, group, testcase):
return os.path.join(testcases_path, group, testcase)

def load_files(*paths):
files = [open(fn) for fn in paths]
try:
return [list(f) for f in files]
finally:
for file in files:
file.close()

def command(**args):
source_file = testcase_prefix(args['testcases_path'], args['group'], (args['testcase'])) + source_extension
return [os.path.join(args['check_dir'], testcase_run), "-c", codegen, "-l", stdlib, source_file, args['log_dir'], args['group'], args['testcase']]

def diff_exact(a, b, output):
if a != b:
output.write("Diff in output:\n")
output.writelines(difflib.unified_diff(a, b))
return False
return True

def make_diff_exit_status(fail_fail_msg, fail_succeed_msg):
def diff(a, b, output):
try:
# Normalize to 0 or 1 rather than checking exact error codes
assert len(a) == 1
assert len(b) == 1
a, b = [0 if x == 0 else 1 for x in [int(y[0].strip()) for y in [a, b]]]
except:
output.write("Expected an exit status number but got something else.\n")
return False
if a != b:
if a == 0:
output.write("%s\n" % (fail_fail_msg))
else:
output.write("%s\n" % (fail_succeed_msg))
output.write("Diff in output:\n")
output.writelines(difflib.unified_diff([str(a)], [str(b)]))
return False
return True
return diff

def gold_output_paths(suffix, **args):
gold_path = testcase_prefix(args['testcases_path'], args['group'], (args['testcase'])) + suffix
output_path = testcase_prefix(args['log_dir'], args['group'], (args['testcase'])) + suffix
return gold_path, output_path

def make_file_check_llvm_err(**args):
gold_path, output_path = gold_output_paths(".llvm.ret", **args)
diff_exit_status = make_diff_exit_status("Testcase was expected to succeed but failed.", "Testcase was expected to fail but succeeded.")
return { 'gold': gold_path, 'output': output_path, 'check': diff_exit_status, 'load_lines': True, 'backup': False, 'gold_default': ['0'], 'name': "code generation exit status" }

def make_file_check_run_out(**args):
gold_path, output_path = gold_output_paths(".run.out", **args)
return { 'gold': gold_path, 'output': output_path, 'check': diff_exact, 'load_lines': True, 'backup': False, 'gold_default': [], 'name': "final output from compiled program" }

checks = {
"noarith": {},
"singleop": {},
"long": {}
}

check_defaults = {
'command': command,
'source_files': [codegen],
'stdout': None,
'stderr': None,
'file_checks': [make_file_check_llvm_err, make_file_check_run_out]
}

check.check_all(checks, check_defaults)
162 changes: 162 additions & 0 deletions tests/compiler/testcase-run
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env python

"""
usage: %s [-c CODEGEN] [-l STDLIB] SOURCE-FILE [LOG-DIR [GROUP TESTCASE]]
SOURCE-FILE the source code input file
LOG-DIR an optional directory to put output in
GROUP an optional group name for organizing the output files
TESTCASE an optional testcase name for organizing the output files
Options
-c CODEGEN path to compiler codegen executable
-l STDLIB path to stdlib C file
Output files are as follows:
PREFIX.STAGE main result from STAGE
PREFIX.STAGE.out standard output from STAGE
PREFIX.STATE.err standard error from STAGE
PREFIX.STAGE.ret exist status from STAGE
Stages are:
llvm source code to LLVM code generation
bc assembly to LLVM bitcode
s bitcode to native code
exec linking to make native executable
run running the final executable
Prefix is determined by which arguments are given:
SOURCE-FILE PREFIX is ./NAME
SOURCE-FILE LOG-FILE PREFIX is LOG-FILE/NAME
SOURCE-FILE LOG-FILE GROUP TESTCASE PREFIX is LOG-FILE/GROUP/TESTCASE
NAME is the basename of SOURCE-FILE if SOURCE-FILE has the extension %s, and
otherwise is a unique name generated to avoid conflicting with existing files.
Environment variables:
LLVMAS LLVM assembler, defaults to llvm-as
LLC LLVM native code compiler, defaults to llc
CC C compiler for linking, defaults to gcc
CODEGEN default for the source code to LLVM code compiler, defaults to %s
STDLIB default for the stdlib C file, defaults to %s
"""

import subprocess
import sys
import os
import os.path
import shutil
import shlex
import tempfile

gen_name_prefix = "llvm-run" # filename prefix to use if we have to make up a name for output
source_extension = ".source"
default_codegen = "./codegen"
default_stdlib = "./stdlib.c"
codegen_llvm_out_source = "out"
codegen_env_var = "CODEGEN"
stdlib_env_var = "STDLIB"

llvmas = os.environ.get('LLVMAS') or 'llvm-as'
llc = os.environ.get('LCC') or 'llc -disable-cfi'
cc = os.environ.get('CC') or 'gcc'
codegen = os.environ.get(codegen_env_var) or os.path.join('.', default_codegen)
stdlib = os.environ.get(stdlib_env_var) or default_stdlib

def touch(fname, times=None):
with open(fname, 'a'):
os.utime(fname, times)

def printfile(fname, ostream):
try:
with open(fname, 'r') as istream:
for line in istream:
ostream.write(line)
except IOError:
print >>sys.stderr, 'could not read', fname, 'and print to', ostream

def run(msg, cmd, suffix, inpath, out_prefix):
outpath = out_prefix + suffix
print >>sys.stderr, msg + '...',
try:
infile = open(inpath, 'r') if inpath is not None else None
outoutfile = open(outpath + '.out', 'w')
outerrfile = open(outpath + '.err', 'w')
retval = subprocess.call(shlex.split(cmd), stdin=infile, stdout=outoutfile, stderr=outerrfile)
finally:
if infile is not None:
infile.close()
outoutfile.close()
outerrfile.close()
if retval == 0:
print >>sys.stderr, 'ok'
else:
print >>sys.stderr, "failed (%d)" % (retval)
with open(outpath + '.ret', 'w') as ostream:
ostream.write("%d\n" % (retval))
printfile(outpath + '.out', sys.stdout)
printfile(outpath + '.err', sys.stderr)
return retval == 0

def name_for_source_file(source_file_path, dir):
basename = os.path.basename(source_file_path)
if basename.endswith(source_extension):
return os.path.join(dir, basename[:-len(source_extension)])
else:
file, path = tempfile.mkstemp(dir=dir, prefix=gen_name_prefix + ".", suffix="")
os.close(file)
return path

if __name__ == '__main__':
import getopt

try:
opts, args = getopt.getopt(sys.argv[1:], "c:l:")
for opt, value in opts:
if opt == "-c":
codegen = value
elif opt == "-l":
stdlib = value
if len(args) not in [1, 2, 4]:
raise getopt.GetoptError("Not enough arguments.")
except getopt.GetoptError, e:
print >>sys.stderr, __doc__ % (sys.argv[0], source_extension, default_codegen, default_stdlib)
sys.exit(2)

source_file = args[0]
if len(args) == 1:
out_prefix = name_for_source_file(source_file, ".")
else:
log_dir = args[1]
if not os.path.exists(log_dir):
os.makedirs(log_dir)
if len(args) == 2:
out_prefix = name_for_source_file(source_file, log_dir)
elif len(args) == 4:
group = args[2]
testcase = args[3]
out_prefix = os.path.join(log_dir, group, testcase)

print >>sys.stderr, "output prefix: %s" % (out_prefix)
print >>sys.stderr, "llvmas: %s" % (llvmas)
print >>sys.stderr, "llc: %s" % (llc)
print >>sys.stderr, "cc: %s" % (cc)
print >>sys.stderr, "codegen: %s" % (codegen)
print >>sys.stderr, "stdlib: %s" % (stdlib)

dir = os.path.dirname(out_prefix)
if not os.path.exists(dir):
os.makedirs(dir)

if run("generating llvm code", codegen, ".llvm", source_file, out_prefix):
shutil.copy2("%s.llvm.%s" % (out_prefix, codegen_llvm_out_source), "%s.llvm" % (out_prefix))
run("assembling to bitcode", "%s \"%s.llvm\" -o \"%s.llvm.bc\"" % (llvmas, out_prefix, out_prefix), ".llvm.bc", None, out_prefix)
run("converting to native code", "%s \"%s.llvm.bc\" -o \"%s.llvm.s\"" % (llc, out_prefix, out_prefix), ".llvm.s", None, out_prefix)
run("linking", "%s -o \"%s.llvm.exec\" \"%s.llvm.s\" \"%s\"" % (cc, out_prefix, out_prefix, stdlib), ".exec", None, out_prefix)
run("running", "%s.llvm.exec" % (out_prefix), ".run", None, out_prefix)
else:
# if codegen failed, leave blank files for the check
touch("%s.llvm.ret" % (out_prefix))
touch("%s.run.out" % (out_prefix))

sys.exit(0)
5 changes: 5 additions & 0 deletions tests/compiler/testcases/long/1.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1
2
3
4
5
1 change: 1 addition & 0 deletions tests/compiler/testcases/long/1.run.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4
11 changes: 11 additions & 0 deletions tests/compiler/testcases/long/1.source
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
read a
read b
set c a + b
read d
read e
set f d * e
set g c - f
set h - g
read i
set j h % i
print j
2 changes: 2 additions & 0 deletions tests/compiler/testcases/long/2.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
100
200
1 change: 1 addition & 0 deletions tests/compiler/testcases/long/2.run.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
100
5 changes: 5 additions & 0 deletions tests/compiler/testcases/long/2.source
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
read +
set - - +
read bar
set @#!$ bar + -
print @#!$
Loading

0 comments on commit b7ee42d

Please sign in to comment.