Skip to content

Commit

Permalink
Add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
pxl-th committed Dec 17, 2024
1 parent 40e5447 commit cd7a8da
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 2 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

Expand All @@ -23,6 +24,7 @@ LinearAlgebra = "1"
Printf = "1"
Random = "1"
Reexport = "1"
ScopedValues = "1"
Serialization = "1"
Statistics = "1"
julia = "1.10"
85 changes: 83 additions & 2 deletions src/host/allocations_cache.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
using Base.ScopedValues
@static if VERSION < v"1.11"
using ScopedValues
else
using Base.ScopedValues
end

const CacheAllocatorName = ScopedValue(:none)

Expand All @@ -23,6 +27,19 @@ function get_pool!(cache::CacheAllocator{T}, pool::Symbol, uid::UInt64) where T
return uid_pool
end

"""
alloc!(alloc_f, cache::CacheAllocator, ::Type{T}, dims::Dims{N}; skip_free::Bool) where {T, N}
Attempt to retrieve cached allocation from `cache` using eltype `T` and `dims`
as keys for searching.
If no such allocation is found, execute `alloc_f` that does actual allocation,
store it in cache for future use and return it.
`skip_free::Bool` is used together with `PerDeviceCacheAllocator.free_immediately`.
When `true` arrays are bulk-freed instead of stored in cache.
In this case `alloc!` will avoid looking into "free" part of `cache`
and execute `alloc_f` immediately, storing allocation for future bulk-freeing.
"""
function alloc!(alloc_f, cache::CacheAllocator, ::Type{T}, dims::Dims{N}; skip_free::Bool) where {T, N}
x = nothing
uid = hash((T, dims))
Expand Down Expand Up @@ -55,7 +72,7 @@ function free_busy!(cache::CacheAllocator; free_immediately::Bool)
free_pool = get_pool!(cache, :free, uid)
Base.@lock cache.lock begin
if free_immediately
for p in busy_pool unsafe_free!(p) end
map(unsafe_free!, busy_pool)
else
append!(free_pool, busy_pool)
end
Expand Down Expand Up @@ -119,6 +136,11 @@ function Base.sizeof(pdcache::PerDeviceCacheAllocator, device, name::Symbol)
return sz
end

"""
invalidate_cache_allocator!(kab::Backend, name::Symbol)
Free all memory held by `name`d cached allocator given KernelAbstractions `backend`.
"""
invalidate_cache_allocator!(kab::Backend, name::Symbol) =
invalidate_cache_allocator!(cache_allocator(kab), device(kab), name)

Expand Down Expand Up @@ -149,6 +171,47 @@ function free_busy!(kab::Backend, name::Symbol)
free_busy!(named_cache_allocator!(pdcache, device(kab), name); pdcache.free_immediately)
end

"""
@cache_scope backend name expr
Evaluate expression `expr` using `name`d caching allocator
for the given KernelAbstractions `backend`.
When during execution of `expr` gpu allocation is requested,
allocator will try to find such allocation in "free" parts of cache,
marking them as "busy" and returning allocation to the user.
If no allocation is found in "free" part, an actual allocation is performed,
marking it as "busy" and returned to the user.
**After** the execution of `expr` all "busy" allocations are marked as "free"
thus they can be re-used next time the program enters this scope.
This is useful to apply in a repeating block of code to avoid relying on
GC to free gpu memory in time.
`name` is a `Symbol` that defines which allocator to use
(`:none` is reserved and means no allocator).
# Example
In following example we apply caching allocator at every iteration of the for-loop.
Every iteration requires 2 GiB of gpu memory, without caching allocator
GC wouldn't be able to free arrays in time resulting in higher memory usage.
With caching allocator, memory usage stays at exactly 2 GiB.
After the loop, we free all cached memory if there's any.
```julia
kab = CUDABackend()
n = 1024^3
for i in 1:1000
@cache_scope kab :loop begin
sin.(CUDA.rand(Float32, n))
end
end
invalidate_cache_allocator!(kab, :loop)
```
"""
macro cache_scope(backend, name, expr)
quote
res = @with $(esc(CacheAllocatorName)) => $(esc(name)) $(esc(expr))
Expand All @@ -157,6 +220,12 @@ macro cache_scope(backend, name, expr)
end
end

"""
@no_cache_scope expr
Evaluate expression `expr` without using caching allocator.
This is useful to call from within `@cache_scope` to avoid caching arrays.
"""
macro no_cache_scope(expr)
quote
@with $(esc(CacheAllocatorName)) => :none $(esc(expr))
Expand All @@ -165,6 +234,18 @@ end

# Interface API.

"""
cache_allocator(::Backend)
Given KernelAbstractions `backend`, return corresponding `PerDeviceCacheAllocator` for it.
Each GPU backend must implement this.
"""
cache_allocator(::Backend) = error("Not implemented.")

"""
device(::Backend)
Given KernelAbstractions `backend`, return current device.
Each GPU backend must implement this.
"""
device(::Backend) = error("Not implemented.")

0 comments on commit cd7a8da

Please sign in to comment.