1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17
18 // Design:
19 // To test sub buffers, we first create one main buffer. We then create several sub-buffers and
20 // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
21 // what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
22 // sub-buffers.
23
24 class SubBufferWrapper : public clMemWrapper
25 {
26 public:
27 cl_mem mParentBuffer;
28 size_t mOrigin;
29 size_t mSize;
30
Allocate(cl_mem parent,cl_mem_flags flags,size_t origin,size_t size)31 cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
32 {
33 mParentBuffer = parent;
34 mOrigin = origin;
35 mSize = size;
36
37 cl_buffer_region region;
38 region.origin = mOrigin;
39 region.size = mSize;
40
41 cl_int error;
42 mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error );
43 return error;
44 }
45 };
46
47 class Action
48 {
49 public:
~Action()50 virtual ~Action() {}
51 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
52 virtual const char * GetName( void ) const = 0;
53
54 static MTdata d;
GetRandSeed(void)55 static MTdata GetRandSeed( void )
56 {
57 if ( d == 0 )
58 d = init_genrand( gRandomSeed );
59 return d;
60 }
FreeRandSeed()61 static void FreeRandSeed() {
62 if ( d != 0 ) {
63 free_mtdata(d);
64 d = 0;
65 }
66 }
67 };
68
69 MTdata Action::d = 0;
70
71 class ReadWriteAction : public Action
72 {
73 public:
~ReadWriteAction()74 virtual ~ReadWriteAction() {}
GetName(void) const75 virtual const char * GetName( void ) const { return "ReadWrite";}
76
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)77 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
78 {
79 cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
80 if (!tempBuffer) {
81 log_error("Out of memory\n");
82 return -1;
83 }
84 cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
85 test_error( error, "Unable to enqueue buffer read" );
86
87 size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
88 size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
89
90 for ( size_t i = start; i < end; i++ )
91 {
92 tempBuffer[ i ] |= tag;
93 parentBufferState[ i + buffer1.mOrigin ] |= tag;
94 }
95
96 error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
97 test_error( error, "Unable to enqueue buffer write" );
98 free(tempBuffer);
99 return CL_SUCCESS;
100 }
101 };
102
103 #ifndef MAX
104 #define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
105 #endif
106 #ifndef MIN
107 #define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
108 #endif
109
110 class CopyAction : public Action
111 {
112 public:
~CopyAction()113 virtual ~CopyAction() {}
GetName(void) const114 virtual const char * GetName( void ) const { return "Copy";}
115
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)116 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
117 {
118 // Copy from sub-buffer 1 to sub-buffer 2
119 size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
120
121 size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
122 size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
123
124 cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
125 test_error( error, "Unable to enqueue buffer copy" );
126
127 memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
128
129 return CL_SUCCESS;
130 }
131 };
132
133 class MapAction : public Action
134 {
135 public:
~MapAction()136 virtual ~MapAction() {}
GetName(void) const137 virtual const char * GetName( void ) const { return "Map";}
138
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)139 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
140 {
141 size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
142 size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
143
144 cl_int error;
145 void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
146 start, size, 0, NULL, NULL, &error );
147 test_error( error, "Unable to map buffer" );
148
149 cl_char *cPtr = (cl_char *)mappedPtr;
150 for ( size_t i = 0; i < size; i++ )
151 {
152 cPtr[ i ] |= tag;
153 parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
154 }
155
156 error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
157 test_error( error, "Unable to unmap buffer" );
158
159 return CL_SUCCESS;
160 }
161 };
162
163 class KernelReadWriteAction : public Action
164 {
165 public:
~KernelReadWriteAction()166 virtual ~KernelReadWriteAction() {}
GetName(void) const167 virtual const char * GetName( void ) const { return "KernelReadWrite";}
168
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)169 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
170 {
171 const char *kernelCode[] = {
172 "__kernel void readTest( __global char *inBuffer, char tag )\n"
173 "{\n"
174 " int tid = get_global_id(0);\n"
175 " inBuffer[ tid ] |= tag;\n"
176 "}\n" };
177
178 clProgramWrapper program;
179 clKernelWrapper kernel;
180 cl_int error;
181
182 if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
183 {
184 return -1;
185 }
186
187 size_t threads[1] = { buffer1.mSize };
188
189 error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
190 test_error( error, "Unable to set kernel argument" );
191 error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
192 test_error( error, "Unable to set kernel argument" );
193
194 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
195 test_error( error, "Unable to queue kernel" );
196
197 for ( size_t i = 0; i < buffer1.mSize; i++ )
198 parentBufferState[ i + buffer1.mOrigin ] |= tag;
199
200 return CL_SUCCESS;
201 }
202 };
203
get_reasonable_buffer_size(cl_device_id device,size_t & outSize)204 cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
205 {
206 cl_ulong maxAllocSize;
207 cl_int error;
208
209 // Get the largest possible buffer we could allocate
210 error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
211 test_error( error, "Unable to get max alloc size" );
212
213 // Don't create a buffer quite that big, just so we have some space left over for other work
214 outSize = (size_t)( maxAllocSize / 5 );
215
216 // Cap at 32M so tests complete in a reasonable amount of time.
217 if ( outSize > 32 << 20 )
218 outSize = 32 << 20;
219
220 return CL_SUCCESS;
221 }
222
find_subbuffer_by_index(SubBufferWrapper * subBuffers,size_t numSubBuffers,size_t index)223 size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
224 {
225 for ( size_t i = 0; i < numSubBuffers; i++ )
226 {
227 if ( subBuffers[ i ].mOrigin > index )
228 return numSubBuffers;
229 if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
230 return i;
231 }
232 return numSubBuffers;
233 }
234
235 // This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
236 // can't overlap)
test_sub_buffers_read_write_core(cl_context context,cl_command_queue queueA,cl_command_queue queueB,size_t mainSize,size_t addressAlign)237 int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
238 {
239 clMemWrapper mainBuffer;
240 SubBufferWrapper subBuffers[ 8 ];
241 size_t numSubBuffers;
242 cl_int error;
243 size_t i;
244 MTdata m = init_genrand( 22 );
245
246
247 cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
248 cl_char * actualResults = (cl_char*)calloc(1,mainSize);
249
250 for ( i = 0; i < mainSize / 4; i++ )
251 ((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
252
253 free_mtdata( m );
254
255 // Create the main buffer to test against
256 mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
257 test_error( error, "Unable to create test main buffer" );
258
259 // Create some sub-buffers to use
260 size_t toStartFrom = 0;
261 for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
262 {
263 size_t endRange = toStartFrom + ( mainSize / 4 );
264 if ( endRange > mainSize )
265 endRange = mainSize;
266
267 size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
268 size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
269 error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
270 test_error( error, "Unable to allocate sub buffer" );
271
272 toStartFrom = offset + size;
273 if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
274 break;
275 }
276
277 ReadWriteAction rwAction;
278 MapAction mapAction;
279 CopyAction copyAction;
280 KernelReadWriteAction kernelAction;
281
282 Action * actions[] = { &rwAction, &mapAction, ©Action, &kernelAction };
283 int numErrors = 0;
284
285 // Do the following steps twice, to make sure the parent gets updated *and* we can
286 // still work on the sub-buffers
287 cl_command_queue prev_queue = queueA;
288 for ( int time = 0; time < 2; time++ )
289 {
290 // Randomly apply actions to the set of sub buffers
291 size_t i;
292 for ( i = 0; i < 64; i++ )
293 {
294 int which = random_in_range( 0, 3, Action::GetRandSeed() );
295 int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
296 int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
297 int whichBufferB;
298 do
299 {
300 whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
301 } while ( whichBufferB == whichBufferA );
302
303 cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
304 if (queue != prev_queue) {
305 error = clFinish( prev_queue );
306 test_error( error, "Error finishing other queue." );
307
308 prev_queue = queue;
309 }
310
311 error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
312 test_error( error, "Unable to execute action against sub buffers" );
313 }
314
315 error = clFinish( queueA );
316 test_error( error, "Error finishing queueA." );
317
318 error = clFinish( queueB );
319 test_error( error, "Error finishing queueB." );
320
321 // Validate by reading the final contents of the main buffer and
322 // validating against our ref copy we generated
323 error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
324 test_error( error, "Unable to enqueue buffer read" );
325
326 for ( i = 0; i < mainSize; i += 65536 )
327 {
328 size_t left = 65536;
329 if ( ( i + left ) > mainSize )
330 left = mainSize - i;
331
332 if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
333 continue;
334
335 // The fast compare failed, so we need to determine where exactly the failure is
336
337 for ( size_t j = 0; j < left; j++ )
338 {
339 if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
340 {
341 // Hit a failure; report the subbuffer at this address as having failed
342 size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
343 if ( sbThatFailed == numSubBuffers )
344 {
345 log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
346 // Since this is a nonsensical, don't bother continuing to check
347 // (we will, however, print our map of sub-buffers for comparison)
348 for ( size_t k = 0; k < numSubBuffers; k++ )
349 {
350 log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
351 }
352 return -1;
353 }
354 log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
355 size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
356 i = newPos & ~65535;
357 j = newPos - i;
358 numErrors++;
359 }
360 }
361 }
362 }
363
364 free(mainBufferContents);
365 free(actualResults);
366 Action::FreeRandSeed();
367
368 return numErrors;
369 }
370
test_sub_buffers_read_write(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)371 int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
372 {
373 cl_int error;
374 size_t mainSize;
375 cl_uint addressAlignBits;
376
377 // Get the size of the main buffer to use
378 error = get_reasonable_buffer_size( deviceID, mainSize );
379 test_error( error, "Unable to get reasonable buffer size" );
380
381 // Determine the alignment of the device so we can make sure sub buffers are valid
382 error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
383 test_error( error, "Unable to get device's address alignment" );
384
385 size_t addressAlign = addressAlignBits/8;
386
387 return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
388 }
389
390 // This test performs the same basic operations as sub_buffers_read_write, but instead of a single
391 // device, it creates a context and buffer shared between two devices, then executes commands
392 // on queues for each device to ensure that everything still operates as expected.
test_sub_buffers_read_write_dual_devices(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)393 int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
394 {
395 cl_int error;
396
397
398 // First obtain the second device
399 cl_device_id otherDevice = GetOpposingDevice( deviceID );
400 if ( otherDevice == NULL )
401 {
402 log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
403 return -1;
404 }
405 if ( otherDevice == deviceID )
406 {
407 log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
408 return 0;
409 }
410
411 // Determine the device id.
412 size_t param_size;
413 error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, ¶m_size );
414 test_error( error, "Error obtaining device name" );
415
416 #if !(defined(_WIN32) && defined(_MSC_VER))
417 char device_name[param_size];
418 #else
419 char* device_name = (char*)_malloca(param_size);
420 #endif
421 error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
422 test_error( error, "Error obtaining device name" );
423
424 log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
425
426 // Create a shared context for these two devices
427 cl_device_id devices[ 2 ] = { deviceID, otherDevice };
428 clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
429 test_error( error, "Unable to create shared context" );
430
431 // Create two queues (can't use the existing one, because it's on the wrong context)
432 clCommandQueueWrapper queue1 = clCreateCommandQueue( testingContext, deviceID, 0, &error );
433 test_error( error, "Unable to create command queue on main device" );
434
435 clCommandQueueWrapper queue2 = clCreateCommandQueue( testingContext, otherDevice, 0, &error );
436 test_error( error, "Unable to create command queue on secondary device" );
437
438 // Determine the reasonable buffer size and address alignment that applies to BOTH devices
439 size_t maxBuffer1, maxBuffer2;
440 error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
441 test_error( error, "Unable to get buffer size for main device" );
442
443 error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
444 test_error( error, "Unable to get buffer size for secondary device" );
445 maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
446
447 cl_uint addressAlign1Bits, addressAlign2Bits;
448 error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
449 test_error( error, "Unable to get main device's address alignment" );
450
451 error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
452 test_error( error, "Unable to get secondary device's address alignment" );
453
454 cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
455
456 // Finally time to run!
457 return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
458 }
459
read_buffer_via_kernel(cl_context context,cl_command_queue queue,cl_mem buffer,size_t length,cl_char * outResults)460 cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
461 {
462 const char *kernelCode[] = {
463 "__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
464 "{\n"
465 " int tid = get_global_id(0);\n"
466 " outBuffer[ tid ] = inBuffer[ tid ];\n"
467 "}\n" };
468
469 clProgramWrapper program;
470 clKernelWrapper kernel;
471 cl_int error;
472
473 if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
474 {
475 return -1;
476 }
477
478 size_t threads[1] = { length };
479
480 clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
481 test_error( error, "Unable to create output stream" );
482
483 error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
484 test_error( error, "Unable to set kernel argument" );
485 error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
486 test_error( error, "Unable to set kernel argument" );
487
488 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
489 test_error( error, "Unable to queue kernel" );
490
491 error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
492 test_error( error, "Unable to read results from kernel" );
493
494 return CL_SUCCESS;
495 }
496
497
test_sub_buffers_overlapping(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)498 int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
499 {
500 cl_int error;
501 size_t mainSize;
502 cl_uint addressAlign;
503
504 clMemWrapper mainBuffer;
505 SubBufferWrapper subBuffers[ 16 ];
506
507
508 // Create the main buffer to test against
509 error = get_reasonable_buffer_size( deviceID, mainSize );
510 test_error( error, "Unable to get reasonable buffer size" );
511
512 mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
513 test_error( error, "Unable to create test main buffer" );
514
515 // Determine the alignment of the device so we can make sure sub buffers are valid
516 error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
517 test_error( error, "Unable to get device's address alignment" );
518
519 // Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
520 for ( size_t i = 0; i < 16; i++ )
521 {
522 size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
523 size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
524
525 error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
526 test_error( error, "Unable to allocate sub buffer" );
527 }
528
529 /// For logging, we determine the amount of overlap we just generated
530 // Build a fast in-out map to help with generating the stats
531 int sbMap[ 32 ], mapSize = 0;
532 for ( int i = 0; i < 16; i++ )
533 {
534 int j;
535 for ( j = 0; j < mapSize; j++ )
536 {
537 size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
538 : subBuffers[ sbMap[ j ] ].mOrigin;
539 if ( subBuffers[ i ].mOrigin < pt )
540 {
541 // Origin is before this part of the map, so move map forward so we can insert
542 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
543 sbMap[ j ] = i;
544 mapSize++;
545 break;
546 }
547 }
548 if ( j == mapSize )
549 {
550 sbMap[ j ] = i;
551 mapSize++;
552 }
553
554 size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
555 for ( j = 0; j < mapSize; j++ )
556 {
557 size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
558 : subBuffers[ sbMap[ j ] ].mOrigin;
559 if ( endPt < pt )
560 {
561 // Origin is before this part of the map, so move map forward so we can insert
562 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
563 sbMap[ j ] = -( i + 1 );
564 mapSize++;
565 break;
566 }
567 }
568 if ( j == mapSize )
569 {
570 sbMap[ j ] = -( i + 1 );
571 mapSize++;
572 }
573 }
574 long long delta = 0;
575 size_t maxOverlap = 1, overlap = 0;
576 for ( int i = 0; i < 32; i++ )
577 {
578 if ( sbMap[ i ] >= 0 )
579 {
580 overlap++;
581 if ( overlap > 1 )
582 delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
583 if ( overlap > maxOverlap )
584 maxOverlap = overlap;
585 }
586 else
587 {
588 if ( overlap > 1 )
589 delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
590 overlap--;
591 }
592 }
593
594 log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
595 16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
596
597 // Write some random contents to the main buffer
598 cl_char * contents = new cl_char[ mainSize ];
599 generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
600
601 error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
602 test_error( error, "Unable to write to main buffer" );
603
604 // Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
605 cl_char * tempBuffer = new cl_char[ mainSize ];
606
607 int numErrors = 0;
608 for ( size_t i = 0; i < 16; i++ )
609 {
610 // Read from this buffer
611 int which = random_in_range( 0, 1, Action::GetRandSeed() );
612 if ( which )
613 error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
614 else
615 error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
616 test_error( error, "Unable to read sub buffer contents" );
617
618 if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
619 {
620 log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
621 numErrors++;
622 }
623 }
624
625 delete [] contents;
626 delete [] tempBuffer;
627 Action::FreeRandSeed();
628
629 return numErrors;
630 }
631
632