-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlyap_calculate.cu
119 lines (87 loc) · 2.66 KB
/
lyap_calculate.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, cuda
#include <driver_functions.h>
#include <cuda_runtime.h>
// CUDA utilities and system includes from the CUDA SDK samples
#include <helper_cuda.h>
#include <helper_functions.h>
#include "kernel.hpp"
#include "scene.hpp"
#include "params.hpp"
// Image and grid parameters
const unsigned int volumeWidth = 512;
const unsigned int volumeHeight = volumeWidth;
const unsigned int volumeDepth = volumeWidth;
const unsigned int blockSize = 8;
const dim3 blocks(volumeWidth / blockSize, volumeHeight / blockSize, volumeDepth / blockSize);
const dim3 threads(blockSize, blockSize, blockSize);
LyapParams *curP = &prm;
LyapCam *curC = &cam;
unsigned int curL = 0;
#if USE_LMINMAX
#define LMIN prm->lMin
#define LMAX prm->lMax
#else
#define LMIN 0.0
#define LMAX 4.0
#endif
// Device array of lyapunov exponents
float *cudaExps = 0;
// Device sequence array
Int *cudaSeq;
void cuda_load_sequence(unsigned char *seqStr)
{
size_t actual_length;
Int *seq;
actual_length = scene_convert_sequence(&seq, seqStr);
checkCudaErrors(cudaMalloc(&cudaSeq, actual_length * sizeof(Int)));
checkCudaErrors(cudaMemcpy(cudaSeq, seq, actual_length * sizeof(Int), cudaMemcpyHostToDevice));
free(seq);
}
void render()
{
params_init();
cuda_load_sequence(sequence);
size_t expsSize = sizeof(float) * volumeWidth * volumeHeight * volumeDepth;
// Allocate points memory
checkCudaErrors(cudaMalloc(&cudaExps, expsSize));
// call CUDA kernel, writing results to PBO
// for(int i = 0; i < passes; ++i) {
// void *dummy;
kernel_calc_volume<<<blocks, threads>>>(cudaExps, prm, cudaSeq);
// cudaMemcpyAsync(dummy, dummy, 1, cudaMemcpyDeviceToDevice);
// }
getLastCudaError("kernel failed");
printf("Points size = %ld\n", expsSize);
float *myExps = (float *)malloc(expsSize);
printf("malloc'ed %p.\n", myExps);
checkCudaErrors(cudaMemcpy( myExps, cudaExps, expsSize, cudaMemcpyDeviceToHost ));
FILE *fp = fopen("exps.raw", "wb");
fwrite(myExps, 1, expsSize, fp);
fclose(fp);
free(myExps);
getLastCudaError("dump failed");
}
void cleanup()
{
checkCudaErrors(cudaFree(cudaExps));
checkCudaErrors(cudaFree(cudaSeq));
}
int choose_cuda_device(int argc, char **argv, bool use_gl)
{
int result = 0;
result = findCudaDevice(argc, (const char **)argv);
return result;
}
int main(int argc, char **argv)
{
// Use command-line specified CUDA device, otherwise use device with
// highest Gflops/s
choose_cuda_device(argc, argv, true);
render();
cleanup();
exit(EXIT_SUCCESS);
}