Skip to content

Commit

Permalink
Merge pull request #103 from JuliaGPU/tb/compile
Browse files Browse the repository at this point in the history
Make the compilation example first-class functionality.
  • Loading branch information
SimonDanisch authored Aug 29, 2017
2 parents 89da86f + b92664a commit d9b78c5
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 106 deletions.
3 changes: 0 additions & 3 deletions examples/compilation/README.md

This file was deleted.

69 changes: 0 additions & 69 deletions examples/compilation/usage.jl

This file was deleted.

30 changes: 30 additions & 0 deletions examples/vadd.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using CUDAdrv, CUDArt
using Base.Test

using Compat

dev = CuDevice(0)
ctx = CuContext(dev)

CUDArt.@compile dev kernel_vadd """
__global__ void kernel_vadd(const float *a, const float *b, float *c)
{
int i = blockIdx.x *blockDim.x + threadIdx.x;
c[i] = a[i] + b[i];
}
"""

dims = (3,4)
a = round.(rand(Float32, dims) * 100)
b = round.(rand(Float32, dims) * 100)

d_a = CuArray(a)
d_b = CuArray(b)
d_c = similar(d_a)

len = prod(dims)
cudacall(kernel_vadd, len, 1, Tuple{Ptr{Cfloat},Ptr{Cfloat},Ptr{Cfloat}}, d_a, d_b, d_c)
c = Array(d_c)
@test a+b c

destroy!(ctx)
1 change: 1 addition & 0 deletions src/CUDArt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ include("device.jl")
include("stream.jl")
include("pointer.jl")
include("arrays.jl")
include("compile.jl")
include("execute.jl")

include("precompile.jl")
Expand Down
57 changes: 23 additions & 34 deletions examples/compilation/library.jl → src/compile.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# EXCLUDE FROM TESTING

using CUDArt
using Compat
export CompileError

# Generate a temporary file with specific suffix
# NOTE: mkstemps is glibc 2.19+, so emulate its behavior
Expand All @@ -28,9 +25,8 @@ macro compile(dev, kernel, code)
:($(esc(kernel)) = _compile($(esc(dev)), $kernel_name, $code, $containing_file)))
end

type CompileError <: Base.WrappedException
immutable CompileError <: Exception
message::String
error
end

const builddir = joinpath(@__DIR__, ".cache")
Expand All @@ -43,7 +39,7 @@ function _compile(dev, kernel, code, containing_file)
mkpath(builddir)
end

# Check if we need to compile
# check if we need to compile
codehash = hex(hash(code))
output = "$builddir/$(kernel)_$(codehash)-$(arch).ptx"
if isfile(output)
Expand All @@ -52,51 +48,44 @@ function _compile(dev, kernel, code, containing_file)
need_compile = true
end

# Compile the source, if necessary
# compile the source, if necessary
if need_compile
# Write the source into a compilable file
# write the source to a compilable file
(source, io) = mkstemps(".cu")
write(io, """
extern "C"
{
$code
}
""")
close(io)
Base.close(io)

compile_flags = vcat(CUDArt.toolchain_flags, ["--gpu-architecture", arch])
try
# TODO: capture STDERR
run(pipeline(`$(CUDArt.toolchain_nvcc) $(compile_flags) -ptx -o $output $source`, stderr=DevNull))
catch ex
isa(ex, ErrorException) || rethrow(ex)
rethrow(CompileError("compilation of kernel $kernel failed (typo in C++ source?)", ex))
finally
rm(source)
err = Pipe()
cmd = `$(CUDArt.toolchain_nvcc) $(compile_flags) -ptx -o $output $source`
result = success(pipeline(cmd; stdout=DevNull, stderr=err))
Base.close(err.in)
rm(source)

errors = readstring(err)
if !result
throw(CompileError("compilation of kernel $kernel failed\n$errors"))
elseif !isempty(errors)
warn("during compilation of kernel $kernel:\n$errors")
end

if !isfile(output)
error("compilation of kernel $kernel failed (no output generated)")
end
end

# Pass the module to the CUDA driver
mod = try
CuModuleFile(output)
catch ex
rethrow(CompileError("loading of kernel $kernel failed (invalid CUDA code?)", ex))
end

# Load the function pointer
func = try
CuFunction(mod, kernel)
catch ex
rethrow(CompileError("could not find kernel $kernel in the compiled binary (wrong function name?)", ex))
end

return func
mod = CUDAdrv.CuModuleFile(output)
return CUDAdrv.CuFunction(mod, kernel)
end

function clean_cache()
rm(builddir; recursive=true)
if ispath(builddir)
@assert isdir(builddir)
rm(builddir; recursive=true)
end
end
69 changes: 69 additions & 0 deletions test/compile.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
using CUDArt
import CUDAdrv
using Base.Test

dev = CUDAdrv.CuDevice(0)
ctx = CUDAdrv.CuContext(dev)

CUDArt.clean_cache() # for deterministic testing purposes


## basic compilation & execution

let
CUDArt.@compile dev kernel """
__global__ void kernel()
{
}
"""

CUDAdrv.cudacall(kernel, 1, 1, ())
end

@test_throws CompileError let
CUDArt.@compile dev kernel """
__global__ void kernel()
{
invalid code
}
"""
end

@test_throws CUDAdrv.CuError let
CUDArt.@compile dev wrongname """
__global__ void kernel()
{
}
"""
end


## argument passing

dims = (16, 16)
len = prod(dims)

CUDArt.@compile dev kernel_copy """
__global__ void kernel_copy(const float *input, float *output)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
output[i] = input[i];
}
"""

let
input = round.(rand(Cfloat, dims) * 100)

input_dev = CUDAdrv.CuArray(input)
output_dev = CUDAdrv.CuArray{Cfloat}(dims)

CUDAdrv.cudacall(kernel_copy, 1, len,
Tuple{Ptr{Cfloat}, Ptr{Cfloat}},
input_dev, output_dev)
output = Array(output_dev)
@test input output
end


CUDAdrv.destroy!(ctx)
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
include("gc.jl")
include("test.jl")
include("compile.jl")
include("examples.jl")

0 comments on commit d9b78c5

Please sign in to comment.