-
Notifications
You must be signed in to change notification settings - Fork 1
/
testCuda.cu
61 lines (46 loc) · 2.31 KB
/
testCuda.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include <cuda.h>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <vector>
#define MY_CUDA_CHECK(flag) \
do { \
cudaError_t _tmpVal; \
if ( (_tmpVal = flag) != cudaSuccess ) { \
std::ostringstream ostr; \
ostr << "CUDNN Function Failed (" << __FILE__ << "," << __LINE__ << ") " << cudaGetErrorString(_tmpVal); \
throw std::runtime_error(ostr.str()); \
} \
} \
while (0)
template <typename T>
static T FRAND(void)
{
double d = static_cast<double>(rand() / (static_cast<double>(RAND_MAX)));
return static_cast<T>(d);
}
template <typename T>
static T RAN_GEN(T A, T B)
{
T r = (FRAND<T>() * (B - A)) + A;
return r;
}
int main()
{
size_t szInData = 2000*64*sizeof(float);
size_t szOutData = 2000*sizeof(float);
void *inDevData;
void *outDevData;
MY_CUDA_CHECK( cudaMalloc(&inDevData, szInData) );
MY_CUDA_CHECK( cudaMalloc(&outDevData, szOutData) );
std::vector<float> inHostData;
std::vector<float> outHostData;
inHostData.resize( szInData/ sizeof(float) );
outHostData.resize( szOutData/ sizeof(float) );
for (auto& inVal : inHostData)
inVal = RAN_GEN<float>(0.0f, 1.0f);
std::fill(outHostData.begin(), outHostData.end(), 0.0f);
MY_CUDA_CHECK( cudaMemcpy(inDevData, inHostData.data(), szInData, cudaMemcpyHostToDevice) );
MY_CUDA_CHECK( cudaMemcpy(outDevData, outHostData.data(), szOutData, cudaMemcpyHostToDevice) );
cudaDeviceSynchronize();
};