-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathheatTest.cu
89 lines (72 loc) · 2.01 KB
/
heatTest.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <cstdio>
#include "heatCPU.h"
#include "heatGPU.h"
#include "output.h"
#include "helper_cuda.h"
int main(int argc, char * argv[])
{
dim3 grid, tBlock;
dim3 n;
float err;
float * uIn,
* uOut,
* uCheck;
float * uInDevice,
* uOutDevice;
size_t size_tot;
float dt = 1;
if (argc < 2)
{
n = { 8, 8, 8 };
}
else
{
unsigned int nfix;
nfix = atoi(argv[1]);
n = {nfix, nfix, nfix};
}
size_tot = sizeof(float) * n.x * n.y * n.z;
uIn = (float *) malloc(size_tot);
uOut = (float *) malloc(size_tot);
uCheck = (float *) malloc(size_tot);
checkCudaErrors(cudaMalloc(&uInDevice, size_tot));
checkCudaErrors(cudaMalloc(&uOutDevice, size_tot));
putValOnBoundary(uIn, n, 1.0);
putValOnBoundary(uOut, n, 1.0);
putValOnBoundary(uCheck, n, 1.0);
checkCudaErrors(cudaMemcpy(uInDevice, uIn, size_tot, cudaMemcpyHostToDevice));
tBlock = { 8, 8, 8 };
grid.x = n.x / tBlock.x + (n.x % tBlock.x ? 1 : 0 ) ;
grid.y = n.y / tBlock.y + (n.y % tBlock.y ? 1 : 0 ) ;
grid.z = n.z / tBlock.z + (n.z % tBlock.z ? 1 : 0 ) ;
printf("n : %d %d %d\n", n.x, n.y, n.z);
printf("grid : %d %d %d\n", grid.x, grid.y, grid.z);
printf("block : %d %d %d\n", tBlock.x, tBlock.y, tBlock.z);
heatGPU<<<grid, tBlock>>>(uInDevice, uOutDevice, n, dt);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaMemcpy(uCheck, uOutDevice, size_tot, cudaMemcpyDeviceToHost));
heatCPU(uIn, uOut, n, dt);
err = computeError(uCheck, uOut, n);
if (err > 1e-12)
{
printf("******%s******\n", "Program failed");
}
else
{
printf("err = %.3e\n", err);
if (n.x <= 16)
{
printf("uCheck = \n");
printTensor(uCheck, n);
printf("\n uOut = \n");
printTensor(uOut, n);
}
}
free(uIn);
free(uOut);
free(uCheck);
checkCudaErrors(cudaFree(uInDevice));
checkCudaErrors(cudaFree(uOutDevice));
return 0;
}