• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <clpeak.h>
2 #include <cstdlib>
3 
runTransferBandwidthTest(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)4 int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
5 {
6   if (!isTransferBW)
7     return 0;
8 
9   float timed, gbps;
10   cl::NDRange globalSize, localSize;
11   cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
12   uint iters = devInfo.transferBWIters;
13   Timer timer;
14   float *arr = NULL;
15 
16   uint64_t maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
17   uint64_t numItems = roundToMultipleOf(maxItems, devInfo.maxWGSize, devInfo.transferBWMaxSize);
18 
19   try
20   {
21     arr = static_cast<float *>(aligned_alloc(64, numItems * sizeof(float)));
22     memset(arr, 0, numItems * sizeof(float));
23     cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));
24 
25     log->print(NEWLINE TAB TAB "Transfer bandwidth (GBPS)" NEWLINE);
26     log->xmlOpenTag("transfer_bandwidth");
27     log->xmlAppendAttribs("unit", "gbps");
28 
29     ///////////////////////////////////////////////////////////////////////////
30     // enqueueWriteBuffer
31     log->print(TAB TAB TAB "enqueueWriteBuffer              : ");
32 
33     // Dummy warm-up
34     queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
35     queue.finish();
36 
37     timed = 0;
38 
39     if (useEventTimer)
40     {
41       for (uint i = 0; i < iters; i++)
42       {
43         cl::Event timeEvent;
44         queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
45         queue.finish();
46         timed += timeInUS(timeEvent);
47       }
48     }
49     else
50     {
51       Timer timer;
52 
53       timer.start();
54       for (uint i = 0; i < iters; i++)
55       {
56         queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
57       }
58       queue.finish();
59       timed = timer.stopAndTime();
60     }
61     timed /= static_cast<float>(iters);
62 
63     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
64     log->print(gbps);
65     log->print(NEWLINE);
66     log->xmlRecord("enqueuewritebuffer", gbps);
67     ///////////////////////////////////////////////////////////////////////////
68     // enqueueReadBuffer
69     log->print(TAB TAB TAB "enqueueReadBuffer               : ");
70 
71     // Dummy warm-up
72     queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
73     queue.finish();
74 
75     timed = 0;
76     if (useEventTimer)
77     {
78       for (uint i = 0; i < iters; i++)
79       {
80         cl::Event timeEvent;
81         queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
82         queue.finish();
83         timed += timeInUS(timeEvent);
84       }
85     }
86     else
87     {
88       Timer timer;
89 
90       timer.start();
91       for (uint i = 0; i < iters; i++)
92       {
93         queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
94       }
95       queue.finish();
96       timed = timer.stopAndTime();
97     }
98     timed /= static_cast<float>(iters);
99 
100     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
101     log->print(gbps);
102     log->print(NEWLINE);
103     log->xmlRecord("enqueuereadbuffer", gbps);
104     ///////////////////////////////////////////////////////////////////////////
105     // enqueueWriteBuffer non-blocking
106     log->print(TAB TAB TAB "enqueueWriteBuffer non-blocking : ");
107 
108     // Dummy warm-up
109     queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
110     queue.finish();
111 
112     timed = 0;
113 
114     if (useEventTimer)
115     {
116       for (uint i = 0; i < iters; i++)
117       {
118         cl::Event timeEvent;
119         queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
120         queue.finish();
121         timed += timeInUS(timeEvent);
122       }
123     }
124     else
125     {
126       Timer timer;
127 
128       timer.start();
129       for (uint i = 0; i < iters; i++)
130       {
131         queue.enqueueWriteBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
132       }
133       queue.finish();
134       timed = timer.stopAndTime();
135     }
136     timed /= static_cast<float>(iters);
137 
138     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
139     log->print(gbps);
140     log->print(NEWLINE);
141     log->xmlRecord("enqueuewritebuffer_nonblocking", gbps);
142     ///////////////////////////////////////////////////////////////////////////
143     // enqueueReadBuffer non-blocking
144     log->print(TAB TAB TAB "enqueueReadBuffer non-blocking  : ");
145 
146     // Dummy warm-up
147     queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
148     queue.finish();
149 
150     timed = 0;
151     if (useEventTimer)
152     {
153       for (uint i = 0; i < iters; i++)
154       {
155         cl::Event timeEvent;
156         queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
157         queue.finish();
158         timed += timeInUS(timeEvent);
159       }
160     }
161     else
162     {
163       Timer timer;
164 
165       timer.start();
166       for (uint i = 0; i < iters; i++)
167       {
168         queue.enqueueReadBuffer(clBuffer, CL_FALSE, 0, (numItems * sizeof(float)), arr);
169       }
170       queue.finish();
171       timed = timer.stopAndTime();
172     }
173     timed /= static_cast<float>(iters);
174 
175     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
176     log->print(gbps);
177     log->print(NEWLINE);
178     log->xmlRecord("enqueuereadbuffer_nonblocking", gbps);
179     ///////////////////////////////////////////////////////////////////////////
180     // enqueueMapBuffer
181     log->print(TAB TAB TAB "enqueueMapBuffer(for read)      : ");
182 
183     queue.finish();
184 
185     timed = 0;
186     if (useEventTimer)
187     {
188       for (uint i = 0; i < iters; i++)
189       {
190         cl::Event timeEvent;
191         void *mapPtr;
192 
193         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)), NULL, &timeEvent);
194         queue.finish();
195         queue.enqueueUnmapMemObject(clBuffer, mapPtr);
196         queue.finish();
197         timed += timeInUS(timeEvent);
198       }
199     }
200     else
201     {
202       for (uint i = 0; i < iters; i++)
203       {
204         Timer timer;
205         void *mapPtr;
206 
207         timer.start();
208         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
209         queue.finish();
210         timed += timer.stopAndTime();
211 
212         queue.enqueueUnmapMemObject(clBuffer, mapPtr);
213         queue.finish();
214       }
215     }
216     timed /= static_cast<float>(iters);
217 
218     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
219     log->print(gbps);
220     log->print(NEWLINE);
221     log->xmlRecord("enqueuemapbuffer", gbps);
222     ///////////////////////////////////////////////////////////////////////////
223 
224     // memcpy from mapped ptr
225     log->print(TAB TAB TAB TAB "memcpy from mapped ptr        : ");
226     queue.finish();
227 
228     timed = 0;
229     for (uint i = 0; i < iters; i++)
230     {
231       cl::Event timeEvent;
232       void *mapPtr;
233 
234       mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
235       queue.finish();
236 
237       timer.start();
238       memcpy(arr, mapPtr, (numItems * sizeof(float)));
239       timed += timer.stopAndTime();
240 
241       queue.enqueueUnmapMemObject(clBuffer, mapPtr);
242       queue.finish();
243     }
244     timed /= static_cast<float>(iters);
245 
246     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
247     log->print(gbps);
248     log->print(NEWLINE);
249     log->xmlRecord("memcpy_from_mapped_ptr", gbps);
250 
251     ///////////////////////////////////////////////////////////////////////////
252 
253     // enqueueUnmap
254     log->print(TAB TAB TAB "enqueueUnmap(after write)       : ");
255 
256     queue.finish();
257 
258     timed = 0;
259     if (useEventTimer)
260     {
261       for (uint i = 0; i < iters; i++)
262       {
263         cl::Event timeEvent;
264         void *mapPtr;
265 
266         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
267         queue.finish();
268         queue.enqueueUnmapMemObject(clBuffer, mapPtr, NULL, &timeEvent);
269         queue.finish();
270         timed += timeInUS(timeEvent);
271       }
272     }
273     else
274     {
275       for (uint i = 0; i < iters; i++)
276       {
277         Timer timer;
278         void *mapPtr;
279 
280         mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
281         queue.finish();
282 
283         timer.start();
284         queue.enqueueUnmapMemObject(clBuffer, mapPtr);
285         queue.finish();
286         timed += timer.stopAndTime();
287       }
288     }
289     timed /= static_cast<float>(iters);
290     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
291 
292     log->print(gbps);
293     log->print(NEWLINE);
294     log->xmlRecord("enqueueunmap", gbps);
295     ///////////////////////////////////////////////////////////////////////////
296 
297     // memcpy to mapped ptr
298     log->print(TAB TAB TAB TAB "memcpy to mapped ptr          : ");
299     queue.finish();
300 
301     timed = 0;
302     for (uint i = 0; i < iters; i++)
303     {
304       cl::Event timeEvent;
305       void *mapPtr;
306 
307       mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
308       queue.finish();
309 
310       timer.start();
311       memcpy(mapPtr, arr, (numItems * sizeof(float)));
312       timed += timer.stopAndTime();
313 
314       queue.enqueueUnmapMemObject(clBuffer, mapPtr);
315       queue.finish();
316     }
317     timed /= static_cast<float>(iters);
318 
319     gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
320     log->print(gbps);
321     log->print(NEWLINE);
322     log->xmlRecord("memcpy_to_mapped_ptr", gbps);
323 
324     ///////////////////////////////////////////////////////////////////////////
325     log->xmlCloseTag(); // transfer_bandwidth
326 
327     if (arr)
328       std::free(arr);
329   }
330   catch (cl::Error &error)
331   {
332     stringstream ss;
333     ss << error.what() << " (" << error.err() << ")" NEWLINE
334        << TAB TAB TAB "Tests skipped" NEWLINE;
335     log->print(ss.str());
336 
337     if (arr)
338     {
339       std::free(arr);
340     }
341     return -1;
342   }
343 
344   return 0;
345 }
346