-
Notifications
You must be signed in to change notification settings - Fork 1
/
cufft.cu
79 lines (67 loc) · 1.87 KB
/
cufft.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#include "stdio.h"
#include "cuda_runtime.h"
#include "cufft.h"
#include "device_launch_parameters.h"
//#define PRINT
#define REP_TIMES 100
float testmoduleGPU(int dimx,int dimy) {
int N[2];
N[0] = dimx, N[1] = dimy;
int LENGTH = N[0] * N[1];
cufftComplex *input = (cufftComplex*) malloc(LENGTH * sizeof(cufftComplex));
cufftComplex *output = (cufftComplex*) malloc(
LENGTH * sizeof(cufftComplex));
int i;
for (i = 0; i < N[0] * N[1]; i++) {
input[i].x = i % 1000;
input[i].y = 0;
}
cufftComplex *d_inputData, *d_outData;
cudaMalloc((void**) &d_inputData, N[0] * N[1] * sizeof(cufftComplex));
cudaMalloc((void**) &d_outData, N[0] * N[1] * sizeof(cufftComplex));
cudaMemcpy(d_inputData, input, N[0] * N[1] * sizeof(cufftComplex),
cudaMemcpyHostToDevice);
cufftHandle plan;
cufftPlan2d(&plan, N[0], N[1], CUFFT_C2C);
cudaEvent_t start1;
cudaEventCreate(&start1);
cudaEvent_t stop1;
cudaEventCreate(&stop1);
cudaEventRecord(start1, NULL);
for (int i = 0; i < 100; i++) {
//cufftExecC2C(plan, d_inputData, d_outData, CUFFT_FORWARD);
cufftExecC2C(plan, d_inputData, d_outData, CUFFT_INVERSE);
}
cudaEventRecord(stop1, NULL);
cudaEventSynchronize(stop1);
float msecTotal1 = 0.0f;
cudaEventElapsedTime(&msecTotal1, start1, stop1);
cudaMemcpy(output, d_outData, LENGTH * sizeof(cufftComplex),
cudaMemcpyDeviceToHost);
/*
for (i = 0; i < 10; i++) {
printf("%f %f \n", output[i].x, output[i].y);
}
*/
cufftDestroy(plan);
free(input);
free(output);
cudaFree(d_inputData);
cudaFree(d_outData);
return msecTotal1;
}
int main() {
double timeres[10];
//128=2^7 ; 8192=2^13
int pownum=3;
for(pownum=7;pownum<=13;pownum++)
{
double avertime = 0;
for (int i = 0; i < 10; i++) {
timeres[i] = testmoduleGPU(pow(2,pownum),pow(2,pownum));
printf("ITER %f ", timeres[i]);
avertime += timeres[i];
}
printf("\n AVER %f \n", avertime / 10);
}
}