Skip to content

Commit

Permalink
Update main_matrix_multiplication.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
Smart781 authored Oct 6, 2024
1 parent d161290 commit 77fc1be
Showing 1 changed file with 14 additions and 20 deletions.
34 changes: 14 additions & 20 deletions src/main_matrix_multiplication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const size_t gflops = ((size_t) M * K * N * 2) / (1000 * 1000 * 1000); // умн

std::vector<float> computeCPU(const float *as, const float *bs)
{
std::vector<float> cs(M*N, 0);
std::vector<float> cs(M * N, 0);

timer t;
for (int iter = 0; iter < benchmarkingItersCPU; ++iter) {
Expand Down Expand Up @@ -50,29 +50,26 @@ struct KernelConfig {

KernelConfig makeNaiveConfig(unsigned int tile_size)
{
throw std::runtime_error("not implemented");
std::string kernel_name = "matrix_multiplication_naive";
gpu::WorkSize work_size(0, 0/*TODO*/);
gpu::WorkSize work_size(tile_size, tile_size, M / tile_size, N / tile_size);
std::string defines;
std::string prefix = "[naive, ts=" + std::to_string(tile_size) + "]";
return KernelConfig{kernel_name, work_size, defines, prefix};
}

KernelConfig makeLocalConfig(unsigned int tile_size)
{
throw std::runtime_error("not implemented");
std::string kernel_name = "matrix_multiplication_local";
gpu::WorkSize work_size(0, 0/*TODO*/);
gpu::WorkSize work_size(tile_size, tile_size, M / tile_size, N / tile_size);
std::string defines = "-DTILE_SIZE=" + std::to_string(tile_size);
std::string prefix = "[local, ts=" + std::to_string(tile_size) + "]";
return KernelConfig{kernel_name, work_size, defines, prefix};
}

KernelConfig makeLocalWPTConfig(unsigned int tile_size, unsigned int wpt)
{
throw std::runtime_error("not implemented");
std::string kernel_name = "matrix_multiplication_local_wpt";
gpu::WorkSize work_size(0, 0/*TODO*/);
gpu::WorkSize work_size(tile_size / wpt, tile_size, M / wpt, N);
std::string defines = "-DTILE_SIZE=" + std::to_string(tile_size) + " -DWORK_PER_THREAD=" + std::to_string(wpt);
std::string prefix = "[local wpt, ts=" + std::to_string(tile_size) + ", wpt=" + std::to_string(wpt) + "]";
return KernelConfig{kernel_name, work_size, defines, prefix};
Expand All @@ -81,12 +78,12 @@ KernelConfig makeLocalWPTConfig(unsigned int tile_size, unsigned int wpt)
void runTest(const KernelConfig &config, const float *as, const float *bs, const float *cs_cpu_reference)
{
gpu::gpu_mem_32f as_gpu, bs_gpu, cs_gpu;
as_gpu.resizeN(M*K);
bs_gpu.resizeN(K*N);
cs_gpu.resizeN(M*N);
as_gpu.resizeN(M * K);
bs_gpu.resizeN(K * N);
cs_gpu.resizeN(M * N);

as_gpu.writeN(as, M*K);
bs_gpu.writeN(bs, K*N);
as_gpu.writeN(as, M * K);
bs_gpu.writeN(bs, K * N);

ocl::Kernel matrix_multiplication_kernel(matrix_multiplication, matrix_multiplication_length, config.kernel_name, config.defines);
matrix_multiplication_kernel.compile();
Expand All @@ -101,8 +98,8 @@ void runTest(const KernelConfig &config, const float *as, const float *bs, const
std::cout << " GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << " GPU: " << gflops / t.lapAvg() << " GFlops" << std::endl;

std::vector<float> cs(M*N, 0);
cs_gpu.readN(cs.data(), M*N);
std::vector<float> cs(M * N, 0);
cs_gpu.readN(cs.data(), M * N);

// Проверяем корректность результатов
double diff_sum = 0;
Expand Down Expand Up @@ -130,9 +127,9 @@ int main(int argc, char **argv)
context.init(device.device_id_opencl);
context.activate();

std::vector<float> as(M*K, 0);
std::vector<float> bs(K*N, 0);
FastRandom r(M+K+N);
std::vector<float> as(M * K, 0);
std::vector<float> bs(K * N, 0);
FastRandom r(M + K + N);
for (unsigned int i = 0; i < as.size(); ++i) {
as[i] = r.nextf();
}
Expand All @@ -143,9 +140,6 @@ int main(int argc, char **argv)

const std::vector<float> cs_cpu_reference = computeCPU(as.data(), bs.data());

// TODO uncomment
return 0;

runTest(makeNaiveConfig(4), as.data(), bs.data(), cs_cpu_reference.data());
runTest(makeNaiveConfig(8), as.data(), bs.data(), cs_cpu_reference.data());
runTest(makeNaiveConfig(16), as.data(), bs.data(), cs_cpu_reference.data());
Expand Down

0 comments on commit 77fc1be

Please sign in to comment.