-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
153 lines (127 loc) · 4.49 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <vector>
#include <cuda_runtime.h>
#if defined(__APPLE__) || defined(__MACOSX)
# include <OpenCL/cl.h>
#else
# include <CL/cl.h>
#endif
constexpr bool callSuccessful(cudaError_t status)
{
return status == cudaSuccess;
}
constexpr bool callSuccessful(cl_int status)
{
return status == CL_SUCCESS;
}
#define CHECK_GPU_CALL(x) \
do \
{ \
if (!callSuccessful((x))) \
{ \
std::cerr << __func__ << ':' << __LINE__ << " Error in: " << #x << '\n'; \
std::exit(-1); \
} \
} \
while (false) \
int main(int, char**)
{
const std::size_t N = 1024;
std::unique_ptr<int> h_ptr(new int[1024]);
std::fill_n(h_ptr.get(), N, 0);
// OpenCL
cl_uint ocl_platform_count = 0;
CHECK_GPU_CALL(
clGetPlatformIDs(0,
nullptr,
&ocl_platform_count));
std::vector<cl_platform_id> ocl_platform_ids(ocl_platform_count);
CHECK_GPU_CALL(
clGetPlatformIDs(ocl_platform_count,
ocl_platform_ids.data(),
nullptr));
cl_uint ocl_device_count = 0;
CHECK_GPU_CALL(
clGetDeviceIDs(ocl_platform_ids[0],
CL_DEVICE_TYPE_GPU,
0,
nullptr,
&ocl_device_count));
std::vector<cl_device_id> ocl_device_ids(ocl_device_count);
CHECK_GPU_CALL(
clGetDeviceIDs(ocl_platform_ids[0],
CL_DEVICE_TYPE_GPU,
ocl_device_count,
ocl_device_ids.data(),
nullptr));
const cl_context_properties ocl_context_props [] =
{
CL_CONTEXT_PLATFORM,
reinterpret_cast<cl_context_properties>(ocl_platform_ids[0]),
0, 0
};
cl_int ocl_error = CL_SUCCESS;
cl_context ocl_context = clCreateContext(ocl_context_props,
ocl_device_count,
ocl_device_ids.data(),
nullptr,
nullptr,
&ocl_error);
CHECK_GPU_CALL(ocl_error);
cl_device_id ocl_device_id = ocl_device_ids[0];
cl_command_queue ocl_queue = clCreateCommandQueueWithProperties(ocl_context,
ocl_device_id,
0,
&ocl_error);
CHECK_GPU_CALL(ocl_error);
cl_mem ocl_buffer = clCreateBuffer(ocl_context,
CL_MEM_USE_HOST_PTR,
N * sizeof(int),
h_ptr.get(),
&ocl_error);
CHECK_GPU_CALL(ocl_error);
// init CUDA
CHECK_GPU_CALL(cudaHostRegister(h_ptr.get(), N * sizeof(*h_ptr), 0));
// initialize using OpenCL
int* mapped_buffer = static_cast<int*>(clEnqueueMapBuffer(ocl_queue,
ocl_buffer,
CL_TRUE,
CL_MAP_WRITE,
0,
N * sizeof(int),
0,
nullptr,
nullptr,
&ocl_error));
CHECK_GPU_CALL(ocl_error);
std::fill_n(h_ptr.get(), N, 42);
CHECK_GPU_CALL(
clEnqueueUnmapMemObject(ocl_queue,
ocl_buffer,
mapped_buffer,
0,
nullptr,
nullptr));
// retrieve using CUDA
std::vector<int> v(N);
CHECK_GPU_CALL(
cudaMemcpy(v.data(),
h_ptr.get(),
N * sizeof(*h_ptr),
cudaMemcpyDeviceToHost));
if (!std::all_of(v.begin(), v.end(), [](int i) { return i == 42; }))
{
std::cerr << "Did not retrieve correct data.\n";
std::exit(-1);
}
// free OpenCL
CHECK_GPU_CALL(clReleaseMemObject(ocl_buffer));
CHECK_GPU_CALL(clReleaseCommandQueue(ocl_queue));
CHECK_GPU_CALL(clReleaseContext(ocl_context));
// free CUDA
CHECK_GPU_CALL(cudaHostUnregister(h_ptr.get()));
return 0;
}