1 #include <clpeak.h>
2 #include <cstdlib>
3
runTransferBandwidthTest(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)4 int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
5 {
6 if (!isTransferBW)
7 return 0;
8
9 float timed, gbps;
10 cl::NDRange globalSize, localSize;
11 cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
12 uint iters = devInfo.transferBWIters;
13 Timer timer;
14 float *arr = NULL;
15
16 uint64_t maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
17 uint64_t numItems = roundToMultipleOf(maxItems, devInfo.maxWGSize, devInfo.transferBWMaxSize);
18
19 try
20 {
21 arr = static_cast<float *>(aligned_alloc(64, numItems * sizeof(float)));
22 memset(arr, 0, numItems * sizeof(float));
23 cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));
24
25 log->print(NEWLINE TAB TAB "Transfer bandwidth (GBPS)" NEWLINE);
26 log->xmlOpenTag("transfer_bandwidth");
27 log->xmlAppendAttribs("unit", "gbps");
28
29 ///////////////////////////////////////////////////////////////////////////
30 // enqueueWriteBuffer
31 log->print(TAB TAB TAB "enqueueWriteBuffer : ");
32
33 // Dummy warm-up
34 queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
35 queue.finish();
36
37 timed = 0;
38
39 if (useEventTimer)
40 {
41 for (uint i = 0; i < iters; i++)
42 {
43 cl::Event timeEvent;
44 queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
45 queue.finish();
46 timed += timeInUS(timeEvent);
47 }
48 }
49 else
50 {
51 Timer timer;
52
53 timer.start();
54 for (uint i = 0; i < iters; i++)
55 {
56 queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
57 }
58 queue.finish();
59 timed = timer.stopAndTime();
60 }
61 timed /= static_cast<float>(iters);
62
63 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
64 log->print(gbps);
65 log->print(NEWLINE);
66 log->xmlRecord("enqueuewritebuffer", gbps);
67 ///////////////////////////////////////////////////////////////////////////
68 // enqueueReadBuffer
69 log->print(TAB TAB TAB "enqueueReadBuffer : ");
70
71 // Dummy warm-up
72 queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
73 queue.finish();
74
75 timed = 0;
76 if (useEventTimer)
77 {
78 for (uint i = 0; i < iters; i++)
79 {
80 cl::Event timeEvent;
81 queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
82 queue.finish();
83 timed += timeInUS(timeEvent);
84 }
85 }
86 else
87 {
88 Timer timer;
89
90 timer.start();
91 for (uint i = 0; i < iters; i++)
92 {
93 queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
94 }
95 queue.finish();
96 timed = timer.stopAndTime();
97 }
98 timed /= static_cast<float>(iters);
99
100 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
101 log->print(gbps);
102 log->print(NEWLINE);
103 log->xmlRecord("enqueuereadbuffer", gbps);
104 ///////////////////////////////////////////////////////////////////////////
105 // enqueueWriteBuffer non-blocking
106 log->print(TAB TAB TAB "enqueueWriteBuffer non-blocking : ");
107
108 // Dummy warm-up
109 queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
110 queue.finish();
111
112 timed = 0;
113
114 if (useEventTimer)
115 {
116 for (uint i = 0; i < iters; i++)
117 {
118 cl::Event timeEvent;
119 queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
120 queue.finish();
121 timed += timeInUS(timeEvent);
122 }
123 }
124 else
125 {
126 Timer timer;
127
128 timer.start();
129 for (uint i = 0; i < iters; i++)
130 {
131 queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
132 }
133 queue.finish();
134 timed = timer.stopAndTime();
135 }
136 timed /= static_cast<float>(iters);
137
138 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
139 log->print(gbps);
140 log->print(NEWLINE);
141 log->xmlRecord("enqueuewritebuffer_nonblocking", gbps);
142 ///////////////////////////////////////////////////////////////////////////
143 // enqueueReadBuffer non-blocking
144 log->print(TAB TAB TAB "enqueueReadBuffer non-blocking : ");
145
146 // Dummy warm-up
147 queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
148 queue.finish();
149
150 timed = 0;
151 if (useEventTimer)
152 {
153 for (uint i = 0; i < iters; i++)
154 {
155 cl::Event timeEvent;
156 queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
157 queue.finish();
158 timed += timeInUS(timeEvent);
159 }
160 }
161 else
162 {
163 Timer timer;
164
165 timer.start();
166 for (uint i = 0; i < iters; i++)
167 {
168 queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
169 }
170 queue.finish();
171 timed = timer.stopAndTime();
172 }
173 timed /= static_cast<float>(iters);
174
175 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
176 log->print(gbps);
177 log->print(NEWLINE);
178 log->xmlRecord("enqueuereadbuffer_nonblocking", gbps);
179 ///////////////////////////////////////////////////////////////////////////
180 // enqueueMapBuffer
181 log->print(TAB TAB TAB "enqueueMapBuffer(for read) : ");
182
183 queue.finish();
184
185 timed = 0;
186 if (useEventTimer)
187 {
188 for (uint i = 0; i < iters; i++)
189 {
190 cl::Event timeEvent;
191 void *mapPtr;
192
193 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)), NULL, &timeEvent);
194 queue.finish();
195 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
196 queue.finish();
197 timed += timeInUS(timeEvent);
198 }
199 }
200 else
201 {
202 for (uint i = 0; i < iters; i++)
203 {
204 Timer timer;
205 void *mapPtr;
206
207 timer.start();
208 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
209 queue.finish();
210 timed += timer.stopAndTime();
211
212 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
213 queue.finish();
214 }
215 }
216 timed /= static_cast<float>(iters);
217
218 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
219 log->print(gbps);
220 log->print(NEWLINE);
221 log->xmlRecord("enqueuemapbuffer", gbps);
222 ///////////////////////////////////////////////////////////////////////////
223
224 // memcpy from mapped ptr
225 log->print(TAB TAB TAB TAB "memcpy from mapped ptr : ");
226 queue.finish();
227
228 timed = 0;
229 for (uint i = 0; i < iters; i++)
230 {
231 cl::Event timeEvent;
232 void *mapPtr;
233
234 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
235 queue.finish();
236
237 timer.start();
238 memcpy(arr, mapPtr, (numItems * sizeof(float)));
239 timed += timer.stopAndTime();
240
241 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
242 queue.finish();
243 }
244 timed /= static_cast<float>(iters);
245
246 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
247 log->print(gbps);
248 log->print(NEWLINE);
249 log->xmlRecord("memcpy_from_mapped_ptr", gbps);
250
251 ///////////////////////////////////////////////////////////////////////////
252
253 // enqueueUnmap
254 log->print(TAB TAB TAB "enqueueUnmap(after write) : ");
255
256 queue.finish();
257
258 timed = 0;
259 if (useEventTimer)
260 {
261 for (uint i = 0; i < iters; i++)
262 {
263 cl::Event timeEvent;
264 void *mapPtr;
265
266 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
267 queue.finish();
268 queue.enqueueUnmapMemObject(clBuffer, mapPtr, NULL, &timeEvent);
269 queue.finish();
270 timed += timeInUS(timeEvent);
271 }
272 }
273 else
274 {
275 for (uint i = 0; i < iters; i++)
276 {
277 Timer timer;
278 void *mapPtr;
279
280 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
281 queue.finish();
282
283 timer.start();
284 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
285 queue.finish();
286 timed += timer.stopAndTime();
287 }
288 }
289 timed /= static_cast<float>(iters);
290 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
291
292 log->print(gbps);
293 log->print(NEWLINE);
294 log->xmlRecord("enqueueunmap", gbps);
295 ///////////////////////////////////////////////////////////////////////////
296
297 // memcpy to mapped ptr
298 log->print(TAB TAB TAB TAB "memcpy to mapped ptr : ");
299 queue.finish();
300
301 timed = 0;
302 for (uint i = 0; i < iters; i++)
303 {
304 cl::Event timeEvent;
305 void *mapPtr;
306
307 mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
308 queue.finish();
309
310 timer.start();
311 memcpy(mapPtr, arr, (numItems * sizeof(float)));
312 timed += timer.stopAndTime();
313
314 queue.enqueueUnmapMemObject(clBuffer, mapPtr);
315 queue.finish();
316 }
317 timed /= static_cast<float>(iters);
318
319 gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
320 log->print(gbps);
321 log->print(NEWLINE);
322 log->xmlRecord("memcpy_to_mapped_ptr", gbps);
323
324 ///////////////////////////////////////////////////////////////////////////
325 log->xmlCloseTag(); // transfer_bandwidth
326
327 if (arr)
328 std::free(arr);
329 }
330 catch (cl::Error &error)
331 {
332 stringstream ss;
333 ss << error.what() << " (" << error.err() << ")" NEWLINE
334 << TAB TAB TAB "Tests skipped" NEWLINE;
335 log->print(ss.str());
336
337 if (arr)
338 {
339 std::free(arr);
340 }
341 return -1;
342 }
343
344 return 0;
345 }
346