Added a compiler example.

anoopsarkar · Sep 8, 2012 · b7ee42d · b7ee42d
1 parent bfb7114
commit b7ee42d
Show file tree

Hide file tree

Showing 84 changed files with 612 additions and 0 deletions.
diff --git a/tests/compiler/Makefile b/tests/compiler/Makefile
@@ -0,0 +1,4 @@
+export PYTHONPATH=../../
+
+test:
+	./check-compiler -d -o out answer/ testcases/
diff --git a/tests/compiler/README b/tests/compiler/README
@@ -0,0 +1,14 @@
+This is a very simple compiler with LLVM, to demonstrate how to set up complex checks. In particular, interesting features are that:
+- the command differs for each testcase rather than only for each testcase group
+- the command outputs intermediate files which are then checked.
+
+The compiler is for a simple arithmetic expression language. A program consists of a series of statements, one on each line. A statement has one of the following forms, where tokens are separated by one or more whitespace characters:
+	read ID
+	print ID
+	set ID UNIOP ID
+	set ID ID BINOP ID
+ID is any identifier token, which can be any string that does not contain whitespace characters. UNIOP is any character in "-+*/%". BINOPT is any character in "-".
+
+"read ID" reads an integer from a line of standard input. "print ID" prints an integer to a line of standard output. "set ID UNIOP ID" does operation UNIOP on the second ID argument and sets the first ID argument to the result. "set ID ID BINOP ID" does operation BINOP on the second and third ID arguments, and sets the first ID argument to the result. Operations do what you probably expect them to do. ID tokens used as argument to operations must have been previously assigned to.
+
+The compiler from source code to LLVM code simply prints out text rather than using the LLVM libraries. The rest of the compiling process to produce a native executable is done with the LLVM tool programs.
diff --git a/tests/compiler/answer/codegen b/tests/compiler/answer/codegen
@@ -0,0 +1,83 @@
+#!/usr/bin/env python2
+
+import sys
+
+class SyntaxError(Exception):
+	pass
+class SemanticError(Exception):
+	pass
+
+bin_ops = {
+	"+": "add",
+	"-": "sub",
+	"*": "mul",
+	"/": "sdiv",
+	"%": "srem"
+}
+uni_ops = {
+	"-": ("sub", 0)
+}
+
+print """
+declare i32 @read()
+declare void @print(i32)
+
+define i32 @main() {
+	entry:
+"""
+
+symbol_table = {}
+next_id = 0
+def assign(var):
+	global next_id
+	llvm_var = "%%%i" % (next_id)
+	print "\t; %s now %s" % (llvm_var, var)
+	symbol_table[var] = llvm_var
+	next_id += 1
+	return llvm_var
+def lookup(var):
+	if var not in symbol_table:
+		raise SemanticError()
+	llvm_var = symbol_table[var]
+	print "\t; %s is %s" % (llvm_var, var)
+	return llvm_var
+
+for line_num, line in enumerate(sys.stdin, 1):
+	print "\t; LINE %i: %s" % (line_num, line.strip("\n"))
+	tokens = line.split()
+	if len(tokens) < 2:
+	  raise SyntaxError()
+	instr = tokens[0]
+	var = tokens[1]
+	if instr == 'read':
+		llvm_var = assign(var)
+		print "\t%s = call i32 @read()" % (llvm_var)
+	elif instr == 'set':
+		llvm_var = assign(var)
+		if len(tokens) == 4:
+			op = tokens[2]
+			arg = tokens[3]
+			if op not in uni_ops:
+				raise SyntaxError()
+			llvm_op, llvm_arg1 = uni_ops[op]
+			llvm_arg2 = lookup(arg)
+			print "\t%s = %s i32 %s, %s" % (llvm_var, llvm_op, llvm_arg1, llvm_arg2)
+		elif len(tokens) == 5:
+			arg1 = tokens[2]
+			op = tokens[3]
+			arg2 = tokens[4]
+			if op not in bin_ops:
+				raise SyntaxError()
+			llvm_op = bin_ops[op]
+			llvm_arg1, llvm_arg2 = [lookup(a) for a in [arg1, arg2]]
+			print "\t%s = %s i32 %s, %s" % (llvm_var, llvm_op, llvm_arg1, llvm_arg2)
+		else:
+			raise SyntaxError()
+	elif instr == 'print':
+		llvm_var = lookup(var)
+  		print "\tcall void @print(i32 %s)" % (llvm_var)
+
+print """
+	ret i32 0
+}
+"""
diff --git a/tests/compiler/answer/stdlib.c b/tests/compiler/answer/stdlib.c
@@ -0,0 +1,13 @@
+#include <stdio.h>
+
+void print(int x)
+{
+  printf("%d\n", x);
+}
+
+int read()
+{
+  int i;
+  scanf("%d", &i);
+  return i;
+}
diff --git a/tests/compiler/check-compiler b/tests/compiler/check-compiler
@@ -0,0 +1,86 @@
+#!/usr/bin/env python2
+
+import check
+import os
+import os.path
+import difflib
+import re
+import sys
+
+codegen = "./codegen"
+source_extension = ".source"
+testcase_run = "testcase-run"
+stdlib = "stdlib.c"
+
+def testcase_prefix(testcases_path, group, testcase):
+    return os.path.join(testcases_path, group, testcase)
+
+def load_files(*paths):
+    files = [open(fn) for fn in paths]
+    try:
+        return [list(f) for f in files]
+    finally:
+        for file in files:
+            file.close()
+
+def command(**args):
+    source_file = testcase_prefix(args['testcases_path'], args['group'], (args['testcase'])) + source_extension
+    return [os.path.join(args['check_dir'], testcase_run), "-c", codegen, "-l", stdlib, source_file, args['log_dir'], args['group'], args['testcase']]
+
+def diff_exact(a, b, output):
+    if a != b:
+        output.write("Diff in output:\n")
+        output.writelines(difflib.unified_diff(a, b))
+        return False
+    return True
+
+def make_diff_exit_status(fail_fail_msg, fail_succeed_msg):
+    def diff(a, b, output):
+        try:
+            # Normalize to 0 or 1 rather than checking exact error codes
+            assert len(a) == 1
+            assert len(b) == 1
+            a, b = [0 if x == 0 else 1 for x in [int(y[0].strip()) for y in [a, b]]]
+        except:
+            output.write("Expected an exit status number but got something else.\n")
+            return False
+        if a != b:
+            if a == 0:
+                output.write("%s\n" % (fail_fail_msg))
+            else:
+                output.write("%s\n" % (fail_succeed_msg))
+            output.write("Diff in output:\n")
+            output.writelines(difflib.unified_diff([str(a)], [str(b)]))
+            return False
+        return True
+    return diff
+
+def gold_output_paths(suffix, **args):
+    gold_path = testcase_prefix(args['testcases_path'], args['group'], (args['testcase'])) + suffix
+    output_path = testcase_prefix(args['log_dir'], args['group'], (args['testcase'])) + suffix
+    return gold_path, output_path
+
+def make_file_check_llvm_err(**args):
+    gold_path, output_path = gold_output_paths(".llvm.ret", **args)
+    diff_exit_status = make_diff_exit_status("Testcase was expected to succeed but failed.", "Testcase was expected to fail but succeeded.")
+    return { 'gold': gold_path, 'output': output_path, 'check': diff_exit_status, 'load_lines': True, 'backup': False, 'gold_default': ['0'], 'name': "code generation exit status" }
+
+def make_file_check_run_out(**args):
+    gold_path, output_path = gold_output_paths(".run.out", **args)
+    return { 'gold': gold_path, 'output': output_path, 'check': diff_exact, 'load_lines': True, 'backup': False, 'gold_default': [], 'name': "final output from compiled program" }
+
+checks = {
+        "noarith": {},
+        "singleop": {},
+        "long": {}
+    }
+
+check_defaults = {
+        'command': command,
+        'source_files': [codegen],
+        'stdout': None,
+        'stderr': None,
+        'file_checks': [make_file_check_llvm_err, make_file_check_run_out]
+    } 
+
+check.check_all(checks, check_defaults)
diff --git a/tests/compiler/testcase-run b/tests/compiler/testcase-run
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+
+"""
+usage: %s [-c CODEGEN] [-l STDLIB] SOURCE-FILE [LOG-DIR [GROUP TESTCASE]]
+
+SOURCE-FILE  the source code input file
+LOG-DIR     an optional directory to put output in
+GROUP       an optional group name for organizing the output files
+TESTCASE    an optional testcase name for organizing the output files
+
+Options
+-c CODEGEN    path to compiler codegen executable
+-l STDLIB     path to stdlib C file
+
+Output files are as follows:
+PREFIX.STAGE      main result from STAGE
+PREFIX.STAGE.out  standard output from STAGE
+PREFIX.STATE.err  standard error from STAGE
+PREFIX.STAGE.ret  exist status from STAGE
+
+Stages are:
+llvm  source code to LLVM code generation
+bc    assembly to LLVM bitcode
+s     bitcode to native code
+exec  linking to make native executable
+run   running the final executable
+
+Prefix is determined by which arguments are given:
+SOURCE-FILE                          PREFIX is ./NAME
+SOURCE-FILE LOG-FILE                 PREFIX is LOG-FILE/NAME
+SOURCE-FILE LOG-FILE GROUP TESTCASE  PREFIX is LOG-FILE/GROUP/TESTCASE
+
+NAME is the basename of SOURCE-FILE if SOURCE-FILE has the extension %s, and
+otherwise is a unique name generated to avoid conflicting with existing files.
+
+Environment variables:
+LLVMAS        LLVM assembler, defaults to llvm-as
+LLC           LLVM native code compiler, defaults to llc
+CC            C compiler for linking, defaults to gcc
+CODEGEN       default for the source code to LLVM code compiler, defaults to %s
+STDLIB        default for the stdlib C file, defaults to %s
+"""
+
+import subprocess
+import sys
+import os
+import os.path
+import shutil
+import shlex
+import tempfile
+
+gen_name_prefix = "llvm-run" # filename prefix to use if we have to make up a name for output
+source_extension = ".source"
+default_codegen = "./codegen"
+default_stdlib = "./stdlib.c"
+codegen_llvm_out_source = "out"
+codegen_env_var = "CODEGEN"
+stdlib_env_var = "STDLIB"
+
+llvmas = os.environ.get('LLVMAS') or 'llvm-as'
+llc = os.environ.get('LCC') or 'llc -disable-cfi'
+cc = os.environ.get('CC') or 'gcc'
+codegen = os.environ.get(codegen_env_var) or os.path.join('.', default_codegen)
+stdlib = os.environ.get(stdlib_env_var) or default_stdlib
+
+def touch(fname, times=None):
+    with open(fname, 'a'):
+        os.utime(fname, times)
+
+def printfile(fname, ostream):
+    try:
+    	with open(fname, 'r') as istream:
+            for line in istream:
+    		    ostream.write(line)
+    except IOError:
+    	print >>sys.stderr, 'could not read', fname, 'and print to', ostream
+
+def run(msg, cmd, suffix, inpath, out_prefix):
+    outpath = out_prefix + suffix
+    print >>sys.stderr, msg + '...',
+    try:
+        infile = open(inpath, 'r') if inpath is not None else None
+        outoutfile = open(outpath + '.out', 'w')
+        outerrfile = open(outpath + '.err', 'w')
+        retval = subprocess.call(shlex.split(cmd), stdin=infile, stdout=outoutfile, stderr=outerrfile)
+    finally:
+        if infile is not None:
+            infile.close()
+        outoutfile.close()
+        outerrfile.close()
+    if retval == 0: 
+    	print >>sys.stderr, 'ok'
+    else:
+    	print >>sys.stderr, "failed (%d)" % (retval)
+    with open(outpath + '.ret', 'w') as ostream:
+    	ostream.write("%d\n" % (retval))
+    printfile(outpath + '.out', sys.stdout)
+    printfile(outpath + '.err', sys.stderr)
+    return retval == 0
+
+def name_for_source_file(source_file_path, dir):
+    basename = os.path.basename(source_file_path)
+    if basename.endswith(source_extension):
+        return os.path.join(dir, basename[:-len(source_extension)])
+    else:
+        file, path = tempfile.mkstemp(dir=dir, prefix=gen_name_prefix + ".", suffix="")
+        os.close(file)
+        return path
+
+if __name__ == '__main__':
+    import getopt
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "c:l:")
+        for opt, value in opts:
+            if opt == "-c":
+                codegen = value
+            elif opt == "-l":
+                stdlib = value
+        if len(args) not in [1, 2, 4]:
+            raise getopt.GetoptError("Not enough arguments.")
+    except getopt.GetoptError, e:
+        print >>sys.stderr, __doc__ % (sys.argv[0], source_extension, default_codegen, default_stdlib)
+        sys.exit(2)
+
+    source_file = args[0]
+    if len(args) == 1:
+        out_prefix = name_for_source_file(source_file, ".")
+    else:
+      log_dir = args[1]
+      if not os.path.exists(log_dir):
+          os.makedirs(log_dir)
+      if len(args) == 2:
+          out_prefix = name_for_source_file(source_file, log_dir)
+      elif len(args) == 4:
+          group = args[2]
+          testcase = args[3]
+          out_prefix = os.path.join(log_dir, group, testcase)
+
+    print >>sys.stderr, "output prefix: %s" % (out_prefix)
+    print >>sys.stderr, "llvmas: %s" % (llvmas)
+    print >>sys.stderr, "llc: %s" % (llc)
+    print >>sys.stderr, "cc: %s" % (cc)
+    print >>sys.stderr, "codegen: %s" % (codegen)
+    print >>sys.stderr, "stdlib: %s" % (stdlib)
+
+    dir = os.path.dirname(out_prefix)
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+
+    if run("generating llvm code", codegen, ".llvm", source_file, out_prefix):
+        shutil.copy2("%s.llvm.%s" % (out_prefix, codegen_llvm_out_source), "%s.llvm" % (out_prefix))
+        run("assembling to bitcode", "%s \"%s.llvm\" -o \"%s.llvm.bc\"" % (llvmas, out_prefix, out_prefix), ".llvm.bc", None, out_prefix)
+        run("converting to native code", "%s \"%s.llvm.bc\" -o \"%s.llvm.s\"" % (llc, out_prefix, out_prefix), ".llvm.s", None, out_prefix)
+        run("linking", "%s -o \"%s.llvm.exec\" \"%s.llvm.s\" \"%s\"" % (cc, out_prefix, out_prefix, stdlib), ".exec", None, out_prefix)
+        run("running", "%s.llvm.exec" % (out_prefix), ".run", None, out_prefix)
+    else:
+        # if codegen failed, leave blank files for the check
+        touch("%s.llvm.ret" % (out_prefix))
+        touch("%s.run.out" % (out_prefix))
+
+    sys.exit(0)
diff --git a/tests/compiler/testcases/long/1.in b/tests/compiler/testcases/long/1.in
@@ -0,0 +1,5 @@
+1
+2
+3
+4
+5
diff --git a/tests/compiler/testcases/long/1.run.out b/tests/compiler/testcases/long/1.run.out
@@ -0,0 +1 @@
+4
diff --git a/tests/compiler/testcases/long/1.source b/tests/compiler/testcases/long/1.source
@@ -0,0 +1,11 @@
+read a
+read b
+set c a + b
+read d
+read e
+set f d * e
+set g c - f
+set h - g
+read i
+set j h % i
+print j
diff --git a/tests/compiler/testcases/long/2.in b/tests/compiler/testcases/long/2.in
@@ -0,0 +1,2 @@
+100
+200
diff --git a/tests/compiler/testcases/long/2.run.out b/tests/compiler/testcases/long/2.run.out
@@ -0,0 +1 @@
+100
diff --git a/tests/compiler/testcases/long/2.source b/tests/compiler/testcases/long/2.source
@@ -0,0 +1,5 @@
+read +
+set - - +
+read bar
+set @#!$ bar + -
+print @#!$