Skip to content

Commit

Permalink
Feature: Improve Kernel Decorator (#69)
Browse files Browse the repository at this point in the history
This pull request addresses issue #68 by changing the implantation of
kernel decorate, so the function runs multiple times depending on the
number of blocks and the number of threads for each block

---------

Co-authored-by: EmilyBourne <[email protected]>
Co-authored-by: bauom <[email protected]>
  • Loading branch information
3 people committed Jan 8, 2025
1 parent 593d592 commit 8bce3e1
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 3 deletions.
18 changes: 18 additions & 0 deletions docs/cuda.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,22 @@ def my_kernel():
my_kernel[1, 1]()

```
## Cuda Device Methods
The following methods are available for CUDA devices in Pyccel and can be called from either kernels or device functions. Currently, the only import syntax supported is:
```python
from pyccel import cuda
```
Using an alias for the import is not supported, so this is not allowed:

```python
from pyccel import cuda as py_cu
```

| Method | Description |
|--------|-------------|






88 changes: 88 additions & 0 deletions pyccel/cuda/cuda_thread_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#------------------------------------------------------------------------------------------#
# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. #
#------------------------------------------------------------------------------------------#
"""
This module contains all the CUDA thread indexing methods
"""
class CudaThreadIndexing:
"""
Class representing the CUDA thread indexing.
Class representing the CUDA thread indexing.
Parameters
----------
block_idx : int
The index of the block in the x-dimension.
thread_idx : int
The index of the thread in the x-dimension.
"""
def __init__(self, block_idx, thread_idx):
self._block_idx = block_idx
self._thread_idx = thread_idx

def threadIdx(self, dim):
"""
Get the thread index.
Get the thread index.
Parameters
----------
dim : int
The dimension of the indexing. It can be:
- 0 for the x-dimension
- 1 for the y-dimension
- 2 for the z-dimension
Returns
-------
int
The index of the thread in the specified dimension of its block.
"""
return self._thread_idx

def blockIdx(self, dim):
"""
Get the block index.
Get the block index.
Parameters
----------
dim : int
The dimension of the indexing. It can be:
- 0 for the x-dimension
- 1 for the y-dimension
- 2 for the z-dimension
Returns
-------
int
The index of the block in the specified dimension.
"""
return self._block_idx

def blockDim(self, dim):
"""
Get the block dimension.
Get the block dimension.
Parameters
----------
dim : int
The dimension of the indexing. It can be:
- 0 for the x-dimension
- 1 for the y-dimension
- 2 for the z-dimension
Returns
-------
int
The size of the block in the specified dimension.
"""
return 0

20 changes: 19 additions & 1 deletion pyccel/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
This module contains all the provided decorator methods.
"""
from pyccel.cuda.cuda_thread_indexing import CudaThreadIndexing
import warnings

__all__ = (
Expand Down Expand Up @@ -139,7 +140,24 @@ class KernelAccessor:
def __init__(self, f):
self._f = f
def __getitem__(self, args):
return self._f
num_blocks, num_threads = args
def internal_loop(*args, **kwargs):
"""
The internal loop for kernel execution.
The internal loop for kernel execution.
"""
for b in range(num_blocks):
for t in range(num_threads):
cu = CudaThreadIndexing(b, t)
if 'cuda' in self._f.__globals__:
self._f.__globals__['cuda'].threadIdx = cu.threadIdx
self._f.__globals__['cuda'].blockIdx = cu.blockIdx
self._f.__globals__['cuda'].blockDim = cu.blockDim
else:
self._f.__globals__['cuda'] = cu
self._f(*args, **kwargs)
return internal_loop

return KernelAccessor(f)

Expand Down
15 changes: 15 additions & 0 deletions tests/pyccel/scripts/kernel/block_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import kernel
from pyccel import cuda

@kernel
def print_block():
print(cuda.blockIdx(0)) # pylint: disable=no-member

def f():
print_block[5,5]()
cuda.synchronize()

if __name__ == '__main__':
f()

2 changes: 1 addition & 1 deletion tests/pyccel/scripts/kernel/device_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import device, kernel
from pyccel import cuda
from pyccel import cuda

@device
def device_call():
Expand Down
2 changes: 1 addition & 1 deletion tests/pyccel/scripts/kernel/hello_kernel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import kernel
from pyccel import cuda
from pyccel import cuda

@kernel
def say_hello(its_morning : bool):
Expand Down
15 changes: 15 additions & 0 deletions tests/pyccel/scripts/kernel/thread_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import kernel
from pyccel import cuda

@kernel
def print_block():
print(cuda.threadIdx(0)) # pylint: disable=no-member

def f():
print_block[5,5]()
cuda.synchronize()

if __name__ == '__main__':
f()

24 changes: 24 additions & 0 deletions tests/pyccel/test_pyccel.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,8 @@ def test_elemental(language):
pyccel_test("scripts/decorators_elemental.py", language = language)

#------------------------------------------------------------------------------


@pytest.mark.cuda
def test_hello_kernel(gpu_available):
types = str
Expand All @@ -710,7 +712,29 @@ def test_kernel_collision(gpu_available):
language="cuda", execute_code=gpu_available)

#------------------------------------------------------------------------------
def test_block_idx():
test_file = get_abs_path("scripts/kernel/block_idx.py")
cwd = get_abs_path(os.path.dirname(test_file))

pyth_out = get_python_output(test_file, cwd)

python_block_idx = list(map(int, pyth_out.split()))

for i in range(5):
assert python_block_idx.count(i) == 5
#------------------------------------------------------------------------------
def test_thread_idx():
test_file = get_abs_path("scripts/kernel/thread_idx.py")
cwd = get_abs_path(os.path.dirname(test_file))

pyth_out = get_python_output(test_file, cwd)

python_idx = list(map(int, pyth_out.split()))

for i in range(5):
assert python_idx.count(i) == 5

#------------------------------------------------------------------------------
@pytest.mark.cuda
def test_device_call(gpu_available):
types = str
Expand Down

0 comments on commit 8bce3e1

Please sign in to comment.