Skip to content

Commit

Permalink
[lang] Fix ndarray cuda dealloc when using preallocated memory (#3829)
Browse files Browse the repository at this point in the history
* Identify preallocate case

* Add test for caching behavior

* Auto Format

* Add a note

* Auto Format

Co-authored-by: Taichi Gardener <[email protected]>
  • Loading branch information
qiao-bo and taichi-gardener authored Dec 20, 2021
1 parent 420e6b6 commit 490677a
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
4 changes: 3 additions & 1 deletion taichi/backends/cuda/cuda_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ DeviceAllocation CudaDevice::allocate_memory(const AllocParams &params) {
info.size = params.size;
info.is_imported = false;
info.use_cached = false;
info.use_preallocated = false;

DeviceAllocation alloc;
alloc.alloc_id = allocations_.size();
Expand All @@ -48,6 +49,7 @@ DeviceAllocation CudaDevice::allocate_memory_runtime(
info.size = taichi::iroundup(params.size, taichi_page_size);
info.is_imported = false;
info.use_cached = params.use_cached;
info.use_preallocated = true;

DeviceAllocation alloc;
alloc.alloc_id = allocations_.size();
Expand All @@ -69,7 +71,7 @@ void CudaDevice::dealloc_memory(DeviceAllocation handle) {
TI_ERROR("the CudaCachingAllocator is not initialized");
}
caching_allocator_->release(info.size, (uint64_t *)info.ptr);
} else {
} else if (!info.use_preallocated) {
CUDADriver::get_instance().mem_free(info.ptr);
info.ptr = nullptr;
}
Expand Down
11 changes: 11 additions & 0 deletions taichi/backends/cuda/cuda_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ class CudaDevice : public Device {
void *ptr{nullptr};
size_t size{0};
bool is_imported{false};
/* Note: Memory allocation in CUDA device.
* CudaDevice can use either its own cuda malloc mechanism via
* `allocate_memory` or the preallocated memory managed by Llvmprogramimpl
* via `allocate_memory_runtime`. The `use_preallocated` is used to track
* this option. For now, we keep both options and the preallocated method is
* used by default for CUDA backend. The `use_cached` is to enable/disable
* the caching behavior in `allocate_memory_runtime`. Later it should be
* always enabled, for now we keep both options to allow a scenario when
* using preallocated memory while disabling the caching behavior.
* */
bool use_preallocated{true};
bool use_cached{false};
};

Expand Down
12 changes: 12 additions & 0 deletions tests/python/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,18 @@ def _test_ndarray_deepcopy():
assert y[4][1, 0] == 9


def test_ndarray_cuda_caching_allocator():
ti.init(arch=ti.cuda,
ndarray_use_torch=False,
ndarray_use_cached_allocator=True)
n = 8
a = ti.ndarray(ti.i32, shape=(n))
a.fill(2)
a = 1
b = ti.ndarray(ti.i32, shape=(n))
b.fill(2)


@ti.test(arch=supported_archs_taichi_ndarray, ndarray_use_torch=False)
def test_ndarray_rw_cache():
a = ti.Vector.ndarray(3, ti.f32, ())
Expand Down

0 comments on commit 490677a

Please sign in to comment.