-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcuda_computation.cu
59 lines (47 loc) · 1.11 KB
/
cuda_computation.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include "cuda_computation.h"
#include "cuda_runtime.h"
#include <stdio.h>
__global__ void kernel(float *a, size_t n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
a[i] = a[i] + 1;
}
}
constexpr auto threads = 8;
void *get_device_buffer(size_t s)
{
void *device;
auto rc = cudaMalloc(&device, s);
if (rc == cudaSuccess)
{
return device;
}
return nullptr;
}
void free_device_buffer(void *device)
{
cudaFree(device);
}
bool cuda_computation1(float *host, size_t n)
{
auto s = n * sizeof(float);
float *device = (float *)get_device_buffer(s);
if (!device)
{
return false;
}
auto rc = cudaMemcpy(device, host, s, cudaMemcpyHostToDevice);
if (rc != cudaSuccess)
{
free_device_buffer(device);
return false;
}
const size_t blocks = (n + threads - 1) / threads;
kernel<<<blocks, threads>>>(device, n);
bool ret = cudaDeviceSynchronize() == cudaSuccess;
ret &= cudaMemcpy(host, device, s, cudaMemcpyDeviceToHost) == cudaSuccess;
free_device_buffer(device);
return ret;
}