|
| 1 | +import math |
| 2 | +import sys |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import pycuda.autoinit |
| 6 | +import pycuda.driver |
| 7 | +import pycuda.gpuarray |
| 8 | + |
1 | 9 | import cgen as c
|
2 | 10 | from cgen.cuda import CudaGlobal
|
3 | 11 |
|
4 | 12 | from codepy.bpl import BoostPythonModule
|
5 | 13 | from codepy.cuda import CudaModule
|
| 14 | +from codepy.toolchain import guess_nvcc_toolchain, guess_toolchain |
6 | 15 |
|
7 | 16 |
|
8 | 17 | # This file tests the ability to use compile and link CUDA code into the
|
9 | 18 | # Python interpreter. Running this test requires PyCUDA
|
10 | 19 | # as well as CUDA 3.0beta (or greater)
|
11 | 20 |
|
12 |
| - |
13 | 21 | # The host module should include a function which is callable from Python
|
14 | 22 | host_mod = BoostPythonModule()
|
15 | 23 |
|
16 |
| -import math |
17 |
| - |
18 |
| -# Are we on a 32 or 64 bit platform? |
19 |
| -import sys |
20 |
| - |
21 |
| - |
22 | 24 | bitness = math.log(sys.maxsize) + 1
|
23 | 25 | ptr_sz_uint_conv = "K" if bitness > 32 else "I"
|
24 | 26 |
|
|
49 | 51 | "PyObject* remoteResult = PyObject_Call(GPUArrayClass, args, kwargs)",
|
50 | 52 | "return remoteResult"]
|
51 | 53 |
|
52 |
| - |
53 | 54 | host_mod.add_function(
|
54 | 55 | c.FunctionBody(
|
55 |
| - c.FunctionDeclaration(c.Pointer(c.Value("PyObject", "adjacentDifference")), |
56 |
| - [c.Pointer(c.Value("PyObject", "gpuArray"))]), |
| 56 | + c.FunctionDeclaration( |
| 57 | + c.Pointer(c.Value("PyObject", "adjacentDifference")), |
| 58 | + [c.Pointer(c.Value("PyObject", "gpuArray"))]), |
57 | 59 | c.Block([c.Statement(x) for x in statements])))
|
58 | 60 | host_mod.add_to_preamble([c.Include("boost/python/extract.hpp")])
|
59 | 61 |
|
60 |
| - |
61 | 62 | cuda_mod = CudaModule(host_mod)
|
62 | 63 | cuda_mod.add_to_preamble([c.Include("cuda.h")])
|
63 | 64 |
|
|
72 | 73 |
|
73 | 74 | diff = [
|
74 | 75 | c.Template("typename T",
|
75 |
| - CudaGlobal(c.FunctionDeclaration(c.Value("void", "diffKernel"), |
76 |
| - [c.Value("T*", "inputPtr"), |
77 |
| - c.Value("int", "length"), |
78 |
| - c.Value("T*", "outputPtr")]))), |
79 |
| - c.Block([c.Statement(global_index), |
80 |
| - c.If("index == 0", |
81 |
| - c.Statement("outputPtr[0] = inputPtr[0]"), |
82 |
| - c.If("index < length", |
83 |
| - c.Statement(compute_diff), |
84 |
| - c.Statement("")))]), |
| 76 | + CudaGlobal(c.FunctionDeclaration(c.Value("void", "diffKernel"), |
| 77 | + [c.Value("T*", "inputPtr"), |
| 78 | + c.Value("int", "length"), |
| 79 | + c.Value("T*", "outputPtr")]))), |
| 80 | + c.Block([ |
| 81 | + c.Statement(global_index), |
| 82 | + c.If("index == 0", |
| 83 | + c.Statement("outputPtr[0] = inputPtr[0]"), |
| 84 | + c.If("index < length", |
| 85 | + c.Statement(compute_diff), |
| 86 | + c.Statement("")))]), |
85 | 87 |
|
86 | 88 | c.Template("typename T",
|
87 |
| - c.FunctionDeclaration(c.Value("CUdeviceptr", "difference"), |
88 |
| - [c.Value("CUdeviceptr", "inputPtr"), |
89 |
| - c.Value("int", "length")])), |
| 89 | + c.FunctionDeclaration(c.Value("CUdeviceptr", "difference"), |
| 90 | + [c.Value("CUdeviceptr", "inputPtr"), |
| 91 | + c.Value("int", "length")])), |
90 | 92 | c.Block([c.Statement(x) for x in launch])]
|
91 |
| - |
92 | 93 | cuda_mod.add_to_module(diff)
|
| 94 | + |
| 95 | +# CudaModule.add_function also adds a declaration of this function to the |
| 96 | +# BoostPythonModule which is responsible for the host function. |
| 97 | + |
93 | 98 | diff_instance = c.FunctionBody(
|
94 | 99 | c.FunctionDeclaration(c.Value("CUdeviceptr", "diffInstance"),
|
95 |
| - [c.Value("CUdeviceptr", "inputPtr"), |
96 |
| - c.Value("int", "length")]), |
| 100 | + [c.Value("CUdeviceptr", "inputPtr"), |
| 101 | + c.Value("int", "length")]), |
97 | 102 | c.Block([c.Statement("return difference<int>(inputPtr, length)")]))
|
98 |
| - |
99 |
| -# CudaModule.add_function also adds a declaration of this |
100 |
| -# function to the BoostPythonModule which |
101 |
| -# is responsible for the host function. |
102 | 103 | cuda_mod.add_function(diff_instance)
|
103 | 104 |
|
104 |
| -import codepy.jit |
105 |
| -import codepy.toolchain |
106 |
| - |
107 |
| - |
108 |
| -gcc_toolchain = codepy.toolchain.guess_toolchain() |
109 |
| -nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain() |
110 |
| - |
| 105 | +gcc_toolchain = guess_toolchain() |
| 106 | +nvcc_toolchain = guess_nvcc_toolchain() |
111 | 107 | module = cuda_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)
|
112 |
| -import numpy as np |
113 |
| -import pycuda.autoinit |
114 |
| -import pycuda.driver |
115 |
| -import pycuda.gpuarray |
116 |
| - |
117 | 108 |
|
118 |
| -length = 25 |
119 |
| -constant_value = 2 |
120 | 109 | # This is a strange way to create a GPUArray, but is meant to illustrate
|
121 | 110 | # how to construct a GPUArray if the GPU buffer it owns has been
|
122 | 111 | # created by something else
|
123 | 112 |
|
| 113 | +length = 25 |
| 114 | +constant_value = 2 |
| 115 | + |
124 | 116 | pointer = pycuda.driver.mem_alloc(length * 4)
|
125 | 117 | pycuda.driver.memset_d32(pointer, constant_value, length)
|
126 | 118 | a = pycuda.gpuarray.GPUArray((length,), np.int32, gpudata=pointer)
|
|
129 | 121 | golden = [constant_value] + [0] * (length - 1)
|
130 | 122 | difference = [(x-y)*(x-y) for x, y in zip(b, golden, strict=True)]
|
131 | 123 | error = sum(difference)
|
| 124 | + |
132 | 125 | if error == 0:
|
133 | 126 | print("Test passed!")
|
134 | 127 | else:
|
|
0 commit comments