Skip to content

Commit

Permalink
Fix benchmarking CI and benchmark Shared and Private storage modes (#437
Browse files Browse the repository at this point in the history
)
  • Loading branch information
christiangnrd authored Sep 30, 2024
1 parent 3bbda32 commit b999285
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 90 deletions.
2 changes: 0 additions & 2 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ steps:
build.message !~ /\[skip special\]/
timeout_in_minutes: 60

# we want to benchmark every commit on the master branch, even if it failed CI
- wait: ~
# continue_on_failure: true

- group: ":racehorse: Benchmarks"
steps:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/Benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ on:
- main
paths:
- "src/**/*"
- "lib/**/*"
- "ext/**/*"
- "perf/**/*"
- ".buildkite/**/*"
Expand All @@ -21,8 +22,9 @@ on:
- main
paths:
- "src/**/*"
- "lib/**/*"
- "ext/**/*"
- "benchmarks/**/*"
- "perf/**/*"
- ".buildkite/**/*"
- "Project.toml"
- ".github/workflows/Benchmark.yml"
Expand Down
176 changes: 89 additions & 87 deletions perf/array.jl
Original file line number Diff line number Diff line change
@@ -1,110 +1,112 @@
group = addgroup!(SUITE, "array")

const m = 512
const n = 1000

# generate some arrays
cpu_mat = rand(rng, Float32, m, n)
gpu_mat = MtlArray{Float32}(undef, size(cpu_mat))
gpu_vec = reshape(gpu_mat, length(gpu_mat))
gpu_arr_3d = reshape(gpu_mat, (m, 40, 25))
gpu_arr_4d = reshape(gpu_mat, (m, 10, 10, 10))
gpu_mat_ints = MtlArray(rand(rng, Int, m, n))
gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
gpu_mat_bools = MtlArray(rand(rng, Bool, m, n))
gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))

group["construct"] = @benchmarkable MtlArray{Int}(undef, 1)

group["copy"] = @async_benchmarkable copy($gpu_mat)

gpu_mat2 = copy(gpu_mat)
let group = addgroup!(group, "copyto!")
group["cpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat, $cpu_mat)
group["gpu_to_cpu"] = @async_benchmarkable copyto!($cpu_mat, $gpu_mat)
group["gpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat2, $gpu_mat)
end
for (S, smname) in [(Metal.PrivateStorage,"private"), (Metal.SharedStorage,"shared")]
group = addgroup!(SUITE, "$smname array")

# generate some arrays
cpu_mat = rand(rng, Float32, m, n)
gpu_mat = MtlMatrix{Float32,S}(undef, size(cpu_mat))
gpu_vec = reshape(gpu_mat, length(gpu_mat))
gpu_arr_3d = reshape(gpu_mat, (m, 40, 25))
gpu_arr_4d = reshape(gpu_mat, (m, 10, 10, 10))
gpu_mat_ints = MtlMatrix{Int,S}(rand(rng, Int, m, n))
gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
gpu_mat_bools = MtlMatrix{Bool,S}(rand(rng, Bool, m, n))
gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))

group["construct"] = @benchmarkable MtlArray{Int,1,$S}(undef, 1)

group["copy"] = @benchmarkable Metal.@sync copy($gpu_mat)

gpu_mat2 = copy(gpu_mat)
let group = addgroup!(group, "copyto!")
group["cpu_to_gpu"] = @benchmarkable Metal.@sync copyto!($gpu_mat, $cpu_mat)
group["gpu_to_cpu"] = @benchmarkable Metal.@sync copyto!($cpu_mat, $gpu_mat)
group["gpu_to_gpu"] = @benchmarkable Metal.@sync copyto!($gpu_mat2, $gpu_mat)
end

let group = addgroup!(group, "iteration")
group["scalar"] = @benchmarkable Metal.@allowscalar [$gpu_vec[i] for i in 1:10]
let group = addgroup!(group, "iteration")
group["scalar"] = @benchmarkable Metal.@allowscalar [$gpu_vec[i] for i in 1:10]

group["logical"] = @benchmarkable $gpu_vec[$gpu_vec_bools]
group["logical"] = @benchmarkable $gpu_vec[$gpu_vec_bools]

let group = addgroup!(group, "findall")
group["bool"] = @benchmarkable findall($gpu_vec_bools)
group["int"] = @benchmarkable findall(isodd, $gpu_vec_ints)
end
let group = addgroup!(group, "findall")
group["bool"] = @benchmarkable findall($gpu_vec_bools)
group["int"] = @benchmarkable findall(isodd, $gpu_vec_ints)
end

let group = addgroup!(group, "findfirst")
group["bool"] = @benchmarkable findfirst($gpu_vec_bools)
group["int"] = @benchmarkable findfirst(isodd, $gpu_vec_ints)
end
let group = addgroup!(group, "findfirst")
group["bool"] = @benchmarkable findfirst($gpu_vec_bools)
group["int"] = @benchmarkable findfirst(isodd, $gpu_vec_ints)
end

let group = addgroup!(group, "findmin") # findmax
group["1d"] = @async_benchmarkable findmin($gpu_vec)
group["2d"] = @async_benchmarkable findmin($gpu_mat; dims=1)
let group = addgroup!(group, "findmin") # findmax
group["1d"] = @benchmarkable Metal.@sync findmin($gpu_vec)
group["2d"] = @benchmarkable Metal.@sync findmin($gpu_mat; dims=1)
end
end
end

# let group = addgroup!(group, "reverse")
# group["1d"] = @async_benchmarkable reverse($gpu_vec)
# group["2d"] = @async_benchmarkable reverse($gpu_mat; dims=1)
# group["1d_inplace"] = @async_benchmarkable reverse!($gpu_vec)
# group["2d_inplace"] = @async_benchmarkable reverse!($gpu_mat; dims=1)
# end

group["broadcast"] = @async_benchmarkable $gpu_mat .= 0f0
# let group = addgroup!(group, "reverse")
# group["1d"] = @benchmarkable Metal.@sync reverse($gpu_vec)
# group["2d"] = @benchmarkable Metal.@sync reverse($gpu_mat; dims=1)
# group["1d_inplace"] = @benchmarkable Metal.@sync reverse!($gpu_vec)
# group["2d_inplace"] = @benchmarkable Metal.@sync reverse!($gpu_mat; dims=1)
# end

# no need to test inplace version, which performs the same operation (but with an alloc)
let group = addgroup!(group, "accumulate")
group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec)
group["2d"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=1)
end
group["broadcast"] = @benchmarkable Metal.@sync $gpu_mat .= 0f0

let group = addgroup!(group, "reductions")
let group = addgroup!(group, "reduce")
group["1d"] = @async_benchmarkable reduce(+, $gpu_vec)
group["2d"] = @async_benchmarkable reduce(+, $gpu_mat; dims=1)
# no need to test inplace version, which performs the same operation (but with an alloc)
let group = addgroup!(group, "accumulate")
group["1d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_vec)
group["2d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat; dims=1)
end

let group = addgroup!(group, "mapreduce")
group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec)
group["2d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=1)
end
let group = addgroup!(group, "reductions")
let group = addgroup!(group, "reduce")
group["1d"] = @benchmarkable Metal.@sync reduce(+, $gpu_vec)
group["2d"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat; dims=1)
end

# used by sum, prod, minimum, maximum, all, any, count
end
let group = addgroup!(group, "mapreduce")
group["1d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_vec)
group["2d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat; dims=1)
end

let group = addgroup!(group, "random")
let group = addgroup!(group, "rand")
group["Float32"] = @async_benchmarkable Metal.rand(Float32, m*n)
group["Int64"] = @async_benchmarkable Metal.rand(Int64, m*n)
# used by sum, prod, minimum, maximum, all, any, count
end

let group = addgroup!(group, "rand!")
group["Float32"] = @async_benchmarkable Metal.rand!($gpu_vec)
group["Int64"] = @async_benchmarkable Metal.rand!($gpu_vec_ints)
let group = addgroup!(group, "random")
let group = addgroup!(group, "rand")
group["Float32"] = @benchmarkable Metal.@sync Metal.rand(Float32, m*n)
group["Int64"] = @benchmarkable Metal.@sync Metal.rand(Int64, m*n)
end

let group = addgroup!(group, "rand!")
group["Float32"] = @benchmarkable Metal.@sync Metal.rand!($gpu_vec)
group["Int64"] = @benchmarkable Metal.@sync Metal.rand!($gpu_vec_ints)
end

let group = addgroup!(group, "randn")
group["Float32"] = @benchmarkable Metal.@sync Metal.randn(Float32, m*n)
# group["Int64"] = @benchmarkable Metal.@sync Metal.randn(Int64, m*n)
end

let group = addgroup!(group, "randn!")
group["Float32"] = @benchmarkable Metal.@sync Metal.randn!($gpu_vec)
# group["Int64"] = @benchmarkable Metal.@sync Metal.randn!($gpu_vec_ints)
end
end

let group = addgroup!(group, "randn")
group["Float32"] = @async_benchmarkable Metal.randn(Float32, m*n)
# group["Int64"] = @async_benchmarkable Metal.randn(Int64, m*n)
end
# let group = addgroup!(group, "sorting")
# group["1d"] = @benchmarkable Metal.@sync sort($gpu_vec)
# group["2d"] = @benchmarkable Metal.@sync sort($gpu_mat; dims=1)
# group["by"] = @benchmarkable Metal.@sync sort($gpu_vec; by=sin)
# end

let group = addgroup!(group, "randn!")
group["Float32"] = @async_benchmarkable Metal.randn!($gpu_vec)
# group["Int64"] = @async_benchmarkable Metal.randn!($gpu_vec_ints)
let group = addgroup!(group, "permutedims")
group["2d"] = @benchmarkable Metal.@sync permutedims($gpu_mat, (2,1))
group["3d"] = @benchmarkable Metal.@sync permutedims($gpu_arr_3d, (3,1,2))
group["4d"] = @benchmarkable Metal.@sync permutedims($gpu_arr_4d, (2,1,4,3))
end
end

# let group = addgroup!(group, "sorting")
# group["1d"] = @async_benchmarkable sort($gpu_vec)
# group["2d"] = @async_benchmarkable sort($gpu_mat; dims=1)
# group["by"] = @async_benchmarkable sort($gpu_vec; by=sin)
# end

let group = addgroup!(group, "permutedims")
group["2d"] = @async_benchmarkable permutedims($gpu_mat, (2,1))
group["3d"] = @async_benchmarkable permutedims($gpu_arr_3d, (3,1,2))
group["4d"] = @async_benchmarkable permutedims($gpu_arr_4d, (2,1,4,3))
end

0 comments on commit b999285

Please sign in to comment.