-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Start * Complete 1D * Complete 2D * Complete 3D * Minor change
- Loading branch information
Showing
7 changed files
with
351 additions
and
380 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,86 @@ | ||
# Kernel configurators are used for determining the number of threads and | ||
# blocks to be used in the kernel, which optimizes the use of GPU resources. | ||
|
||
# Kernel configurator for 1D CUDA array | ||
function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1}) | ||
config = launch_configuration(kernel.fun) | ||
# 1D kernel configurator | ||
# We hardcode 32 threads per block for 1D kernels | ||
function kernel_configurator_1d(kernel::HostKernel, x::Int) | ||
# config = launch_configuration(kernel.fun) # not used in this case | ||
|
||
threads = min(length(array), config.threads) | ||
blocks = cld(length(array), threads) | ||
threads = 32 # warp size is 32, if block size is less than 32, it will be padded to 32 | ||
blocks = cld(x, threads[1]) | ||
|
||
return (threads = threads, blocks = blocks) | ||
end | ||
|
||
# Kernel configurator for 2D CUDA array | ||
function configurator_2d(kernel::HostKernel, array::CuArray{<:Any, 2}) | ||
config = launch_configuration(kernel.fun) | ||
# 2D kernel configurator | ||
# We hardcode 32 threads for x dimension per block, and y dimension is determined | ||
# by the number of threads returned by the launch configuration | ||
function kernel_configurator_2d(kernel::HostKernel, x::Int, y::Int) | ||
config = launch_configuration(kernel.fun) # get the number of threads | ||
|
||
threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 2))), 2)) | ||
blocks = map(cld, size(array), threads) | ||
# y dimension | ||
dims_y1 = cld(x * y, 32) | ||
dims_y2 = max(fld(config.threads, 32), 1) | ||
|
||
dims_y = min(dims_y1, dims_y2) | ||
|
||
# x dimension is hardcoded to warp size 32 | ||
threads = (32, dims_y) | ||
blocks = (cld(x, threads[1]), cld(y, threads[2])) | ||
|
||
return (threads = threads, blocks = blocks) | ||
end | ||
|
||
# Kernel configurator for 3D CUDA array | ||
function configurator_3d(kernel::HostKernel, array::CuArray{<:Any, 3}) | ||
config = launch_configuration(kernel.fun) | ||
# 3D kernel configurator | ||
# We hardcode 32 threads for x dimension per block, y and z dimensions are determined | ||
# by the number of threads returned by the launch configuration | ||
function kernel_configurator_3d(kernel::HostKernel, x::Int, y::Int, z::Int) | ||
config = launch_configuration(kernel.fun) # get the number of threads | ||
|
||
# y dimension | ||
dims_y1 = cld(x * y, 32) | ||
dims_y2 = max(fld(config.threads, 32), 1) | ||
|
||
dims_y = min(dims_y1, dims_y2) | ||
|
||
threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 3))), 3)) | ||
blocks = map(cld, size(array), threads) | ||
# z dimension | ||
dims_z1 = cld(x * y * z, 32 * dims_y) | ||
dims_z2 = max(fld(config.threads, 32 * dims_y), 1) | ||
|
||
dims_z = min(dims_z1, dims_z2) | ||
|
||
# x dimension is hardcoded to warp size 32 | ||
threads = (32, dims_y, dims_z) | ||
blocks = (cld(x, threads[1]), cld(y, threads[2]), cld(z, threads[3])) | ||
|
||
return (threads = threads, blocks = blocks) | ||
end | ||
|
||
# Deprecated old kernel configurators below | ||
|
||
# function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1}) | ||
# config = launch_configuration(kernel.fun) | ||
|
||
# threads = min(length(array), config.threads) | ||
# blocks = cld(length(array), threads) | ||
|
||
# return (threads = threads, blocks = blocks) | ||
# end | ||
|
||
# function configurator_2d(kernel::HostKernel, array::CuArray{<:Any, 2}) | ||
# config = launch_configuration(kernel.fun) | ||
|
||
# threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 2))), 2)) | ||
# blocks = map(cld, size(array), threads) | ||
|
||
# return (threads = threads, blocks = blocks) | ||
# end | ||
|
||
# function configurator_3d(kernel::HostKernel, array::CuArray{<:Any, 3}) | ||
# config = launch_configuration(kernel.fun) | ||
|
||
# threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 3))), 3)) | ||
# blocks = map(cld, size(array), threads) | ||
|
||
# return (threads = threads, blocks = blocks) | ||
# end |
Oops, something went wrong.