• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 
18 // Design:
19 // To test sub buffers, we first create one main buffer. We then create several sub-buffers and
20 // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
21 // what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
22 // sub-buffers.
23 
24 class SubBufferWrapper : public clMemWrapper
25 {
26 public:
27     cl_mem mParentBuffer;
28     size_t mOrigin;
29     size_t mSize;
30 
Allocate(cl_mem parent,cl_mem_flags flags,size_t origin,size_t size)31     cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
32     {
33         mParentBuffer = parent;
34         mOrigin = origin;
35         mSize = size;
36 
37         cl_buffer_region region;
38         region.origin = mOrigin;
39         region.size = mSize;
40 
41         cl_int error;
42         mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
43         return error;
44     }
45 };
46 
47 class Action
48 {
49 public:
~Action()50     virtual ~Action() {}
51     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
52     virtual const char * GetName( void ) const = 0;
53 
54     static MTdata d;
GetRandSeed(void)55     static MTdata GetRandSeed( void )
56     {
57         if ( d == 0 )
58             d = init_genrand( gRandomSeed );
59         return d;
60     }
FreeRandSeed()61     static void FreeRandSeed() {
62         if ( d != 0 ) {
63             free_mtdata(d);
64             d = 0;
65         }
66     }
67 };
68 
69 MTdata Action::d = 0;
70 
71 class ReadWriteAction : public Action
72 {
73 public:
~ReadWriteAction()74     virtual ~ReadWriteAction() {}
GetName(void) const75     virtual const char * GetName( void ) const { return "ReadWrite";}
76 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)77     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
78     {
79         cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
80         if (!tempBuffer) {
81             log_error("Out of memory\n");
82             return -1;
83         }
84         cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
85         test_error( error, "Unable to enqueue buffer read" );
86 
87         size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
88         size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
89 
90         for ( size_t i = start; i < end; i++ )
91         {
92             tempBuffer[ i ] |= tag;
93             parentBufferState[ i + buffer1.mOrigin ] |= tag;
94         }
95 
96         error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
97         test_error( error, "Unable to enqueue buffer write" );
98         free(tempBuffer);
99         return CL_SUCCESS;
100     }
101 };
102 
103 #ifndef MAX
104 #define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
105 #endif
106 #ifndef MIN
107 #define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
108 #endif
109 
110 class CopyAction : public Action
111 {
112 public:
~CopyAction()113     virtual ~CopyAction() {}
GetName(void) const114     virtual const char * GetName( void ) const { return "Copy";}
115 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)116     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
117     {
118         // Copy from sub-buffer 1 to sub-buffer 2
119         size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
120 
121         size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
122         size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
123 
124         cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
125         test_error( error, "Unable to enqueue buffer copy" );
126 
127         memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
128 
129         return CL_SUCCESS;
130     }
131 };
132 
133 class MapAction : public Action
134 {
135 public:
~MapAction()136     virtual ~MapAction() {}
GetName(void) const137     virtual const char * GetName( void ) const { return "Map";}
138 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)139     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
140     {
141         size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
142         size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
143 
144         cl_int error;
145         void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
146                                                start, size, 0, NULL, NULL, &error );
147         test_error( error, "Unable to map buffer" );
148 
149         cl_char *cPtr = (cl_char *)mappedPtr;
150         for ( size_t i = 0; i < size; i++ )
151         {
152             cPtr[ i ] |= tag;
153             parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
154         }
155 
156         error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
157         test_error( error, "Unable to unmap buffer" );
158 
159         return CL_SUCCESS;
160     }
161 };
162 
163 class KernelReadWriteAction : public Action
164 {
165 public:
~KernelReadWriteAction()166     virtual ~KernelReadWriteAction() {}
GetName(void) const167     virtual const char * GetName( void ) const { return "KernelReadWrite";}
168 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)169     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
170     {
171         const char *kernelCode[] = {
172             "__kernel void readTest( __global char *inBuffer, char tag )\n"
173             "{\n"
174             "    int tid = get_global_id(0);\n"
175             "    inBuffer[ tid ] |= tag;\n"
176             "}\n" };
177 
178         clProgramWrapper program;
179         clKernelWrapper kernel;
180         cl_int error;
181 
182         if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
183         {
184             return -1;
185         }
186 
187         size_t threads[1] = { buffer1.mSize };
188 
189         error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
190         test_error( error, "Unable to set kernel argument" );
191         error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
192         test_error( error, "Unable to set kernel argument" );
193 
194         error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
195         test_error( error, "Unable to queue kernel" );
196 
197         for ( size_t i = 0; i < buffer1.mSize; i++ )
198             parentBufferState[ i + buffer1.mOrigin ] |= tag;
199 
200         return CL_SUCCESS;
201     }
202 };
203 
get_reasonable_buffer_size(cl_device_id device,size_t & outSize)204 cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
205 {
206     cl_ulong maxAllocSize;
207     cl_int error;
208 
209     // Get the largest possible buffer we could allocate
210     error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
211     test_error( error, "Unable to get max alloc size" );
212 
213     // Don't create a buffer quite that big, just so we have some space left over for other work
214     outSize = (size_t)( maxAllocSize / 5 );
215 
216     // Cap at 32M so tests complete in a reasonable amount of time.
217     if ( outSize > 32 << 20 )
218         outSize = 32 << 20;
219 
220     return CL_SUCCESS;
221 }
222 
find_subbuffer_by_index(SubBufferWrapper * subBuffers,size_t numSubBuffers,size_t index)223 size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
224 {
225     for ( size_t i = 0; i < numSubBuffers; i++ )
226     {
227         if ( subBuffers[ i ].mOrigin > index )
228             return numSubBuffers;
229         if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
230             return i;
231     }
232     return numSubBuffers;
233 }
234 
235 // This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
236 // can't overlap)
test_sub_buffers_read_write_core(cl_context context,cl_command_queue queueA,cl_command_queue queueB,size_t mainSize,size_t addressAlign)237 int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
238 {
239     clMemWrapper mainBuffer;
240     SubBufferWrapper subBuffers[ 8 ];
241     size_t numSubBuffers;
242     cl_int error;
243     size_t i;
244     MTdata m = init_genrand( 22 );
245 
246 
247     cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
248     cl_char * actualResults      = (cl_char*)calloc(1,mainSize);
249 
250     for ( i = 0; i < mainSize / 4; i++ )
251         ((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
252 
253     free_mtdata( m );
254 
255     // Create the main buffer to test against
256     mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
257     test_error( error, "Unable to create test main buffer" );
258 
259     // Create some sub-buffers to use
260     size_t toStartFrom = 0;
261     for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
262     {
263         size_t endRange = toStartFrom + ( mainSize / 4 );
264         if ( endRange > mainSize )
265             endRange = mainSize;
266 
267         size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
268         size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
269         error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
270         test_error( error, "Unable to allocate sub buffer" );
271 
272         toStartFrom = offset + size;
273         if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
274             break;
275     }
276 
277     ReadWriteAction rwAction;
278     MapAction mapAction;
279     CopyAction copyAction;
280     KernelReadWriteAction kernelAction;
281 
282     Action * actions[] = { &rwAction, &mapAction, &copyAction, &kernelAction };
283     int numErrors = 0;
284 
285     // Do the following steps twice, to make sure the parent gets updated *and* we can
286     // still work on the sub-buffers
287     cl_command_queue prev_queue = queueA;
288     for ( int time = 0; time < 2; time++ )
289     {
290         // Randomly apply actions to the set of sub buffers
291         size_t i;
292         for (  i = 0; i < 64; i++ )
293         {
294             int which = random_in_range( 0, 3, Action::GetRandSeed() );
295             int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
296             int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
297             int whichBufferB;
298             do
299             {
300                 whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
301             } while ( whichBufferB == whichBufferA );
302 
303             cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
304             if (queue != prev_queue) {
305                 error = clFinish( prev_queue );
306                 test_error( error, "Error finishing other queue." );
307 
308                 prev_queue = queue;
309             }
310 
311             error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
312             test_error( error, "Unable to execute action against sub buffers" );
313         }
314 
315         error = clFinish( queueA );
316         test_error( error, "Error finishing queueA." );
317 
318         error = clFinish( queueB );
319         test_error( error, "Error finishing queueB." );
320 
321         // Validate by reading the final contents of the main buffer and
322         // validating against our ref copy we generated
323         error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
324         test_error( error, "Unable to enqueue buffer read" );
325 
326         for ( i = 0; i < mainSize; i += 65536 )
327         {
328             size_t left = 65536;
329             if ( ( i + left ) > mainSize )
330                 left = mainSize - i;
331 
332             if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
333                 continue;
334 
335             // The fast compare failed, so we need to determine where exactly the failure is
336 
337             for ( size_t j = 0; j < left; j++ )
338             {
339                 if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
340                 {
341                     // Hit a failure; report the subbuffer at this address as having failed
342                     size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
343                     if ( sbThatFailed == numSubBuffers )
344                     {
345                         log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
346                         // Since this is a nonsensical, don't bother continuing to check
347                         // (we will, however, print our map of sub-buffers for comparison)
348                         for ( size_t k = 0; k < numSubBuffers; k++ )
349                         {
350                             log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
351                         }
352                         return -1;
353                     }
354                     log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
355                     size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
356                     i = newPos & ~65535;
357                     j = newPos - i;
358                     numErrors++;
359                 }
360             }
361         }
362     }
363 
364     free(mainBufferContents);
365     free(actualResults);
366     Action::FreeRandSeed();
367 
368     return numErrors;
369 }
370 
test_sub_buffers_read_write(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)371 int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
372 {
373     cl_int error;
374     size_t mainSize;
375     cl_uint addressAlignBits;
376 
377     // Get the size of the main buffer to use
378     error = get_reasonable_buffer_size( deviceID, mainSize );
379     test_error( error, "Unable to get reasonable buffer size" );
380 
381     // Determine the alignment of the device so we can make sure sub buffers are valid
382     error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
383     test_error( error, "Unable to get device's address alignment" );
384 
385     size_t addressAlign = addressAlignBits/8;
386 
387     return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
388 }
389 
390 // This test performs the same basic operations as sub_buffers_read_write, but instead of a single
391 // device, it creates a context and buffer shared between two devices, then executes commands
392 // on queues for each device to ensure that everything still operates as expected.
test_sub_buffers_read_write_dual_devices(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)393 int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
394 {
395     cl_int error;
396 
397 
398     // First obtain the second device
399     cl_device_id otherDevice = GetOpposingDevice( deviceID );
400     if ( otherDevice == NULL )
401     {
402         log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
403         return -1;
404     }
405     if ( otherDevice == deviceID )
406     {
407         log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
408         return 0;
409     }
410 
411     // Determine the device id.
412     size_t param_size;
413     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, &param_size );
414     test_error( error, "Error obtaining device name" );
415 
416 #if !(defined(_WIN32) && defined(_MSC_VER))
417     char device_name[param_size];
418 #else
419     char* device_name = (char*)_malloca(param_size);
420 #endif
421     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
422     test_error( error, "Error obtaining device name" );
423 
424     log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
425 
426     // Create a shared context for these two devices
427     cl_device_id devices[ 2 ] = { deviceID, otherDevice };
428     clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
429     test_error( error, "Unable to create shared context" );
430 
431     // Create two queues (can't use the existing one, because it's on the wrong context)
432     clCommandQueueWrapper queue1 = clCreateCommandQueue( testingContext, deviceID, 0, &error );
433     test_error( error, "Unable to create command queue on main device" );
434 
435     clCommandQueueWrapper queue2 = clCreateCommandQueue( testingContext, otherDevice, 0, &error );
436     test_error( error, "Unable to create command queue on secondary device" );
437 
438     // Determine the reasonable buffer size and address alignment that applies to BOTH devices
439     size_t maxBuffer1, maxBuffer2;
440     error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
441     test_error( error, "Unable to get buffer size for main device" );
442 
443     error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
444     test_error( error, "Unable to get buffer size for secondary device" );
445     maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
446 
447     cl_uint addressAlign1Bits, addressAlign2Bits;
448     error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
449     test_error( error, "Unable to get main device's address alignment" );
450 
451     error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
452     test_error( error, "Unable to get secondary device's address alignment" );
453 
454     cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
455 
456     // Finally time to run!
457     return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
458 }
459 
read_buffer_via_kernel(cl_context context,cl_command_queue queue,cl_mem buffer,size_t length,cl_char * outResults)460 cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
461 {
462     const char *kernelCode[] = {
463         "__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
464         "{\n"
465         "    int tid = get_global_id(0);\n"
466         "    outBuffer[ tid ] = inBuffer[ tid ];\n"
467         "}\n" };
468 
469     clProgramWrapper program;
470     clKernelWrapper kernel;
471     cl_int error;
472 
473     if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
474     {
475         return -1;
476     }
477 
478     size_t threads[1] = { length };
479 
480     clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
481     test_error( error, "Unable to create output stream" );
482 
483     error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
484     test_error( error, "Unable to set kernel argument" );
485     error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
486     test_error( error, "Unable to set kernel argument" );
487 
488     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
489     test_error( error, "Unable to queue kernel" );
490 
491     error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
492     test_error( error, "Unable to read results from kernel" );
493 
494     return CL_SUCCESS;
495 }
496 
497 
test_sub_buffers_overlapping(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)498 int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
499 {
500     cl_int error;
501     size_t mainSize;
502     cl_uint addressAlign;
503 
504     clMemWrapper mainBuffer;
505     SubBufferWrapper subBuffers[ 16 ];
506 
507 
508     // Create the main buffer to test against
509     error = get_reasonable_buffer_size( deviceID, mainSize );
510     test_error( error, "Unable to get reasonable buffer size" );
511 
512     mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
513     test_error( error, "Unable to create test main buffer" );
514 
515     // Determine the alignment of the device so we can make sure sub buffers are valid
516     error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
517     test_error( error, "Unable to get device's address alignment" );
518 
519     // Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
520     for ( size_t i = 0; i < 16; i++ )
521     {
522         size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
523         size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
524 
525         error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
526         test_error( error, "Unable to allocate sub buffer" );
527     }
528 
529     /// For logging, we determine the amount of overlap we just generated
530     // Build a fast in-out map to help with generating the stats
531     int sbMap[ 32 ], mapSize = 0;
532     for ( int i = 0; i < 16; i++ )
533     {
534         int j;
535         for ( j = 0; j < mapSize; j++ )
536         {
537             size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
538                         : subBuffers[ sbMap[ j ] ].mOrigin;
539             if ( subBuffers[ i ].mOrigin < pt )
540             {
541                 // Origin is before this part of the map, so move map forward so we can insert
542                 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
543                 sbMap[ j ] = i;
544                 mapSize++;
545                 break;
546             }
547         }
548         if ( j == mapSize )
549         {
550             sbMap[ j ] = i;
551             mapSize++;
552         }
553 
554         size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
555         for ( j = 0; j < mapSize; j++ )
556         {
557             size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
558                         : subBuffers[ sbMap[ j ] ].mOrigin;
559             if ( endPt < pt )
560             {
561                 // Origin is before this part of the map, so move map forward so we can insert
562                 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
563                 sbMap[ j ] = -( i + 1 );
564                 mapSize++;
565                 break;
566             }
567         }
568         if ( j == mapSize )
569         {
570             sbMap[ j ] = -( i + 1 );
571             mapSize++;
572         }
573     }
574     long long delta = 0;
575     size_t maxOverlap = 1, overlap = 0;
576     for ( int i = 0; i < 32; i++ )
577     {
578         if ( sbMap[ i ] >= 0 )
579         {
580             overlap++;
581             if ( overlap > 1 )
582                 delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
583             if ( overlap > maxOverlap )
584                 maxOverlap = overlap;
585         }
586         else
587         {
588             if ( overlap > 1 )
589                 delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
590             overlap--;
591         }
592     }
593 
594     log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
595               16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
596 
597     // Write some random contents to the main buffer
598     cl_char * contents = new cl_char[ mainSize ];
599     generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
600 
601     error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
602     test_error( error, "Unable to write to main buffer" );
603 
604     // Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
605     cl_char * tempBuffer = new cl_char[ mainSize ];
606 
607     int numErrors = 0;
608     for ( size_t i = 0; i < 16; i++ )
609     {
610         // Read from this buffer
611         int which = random_in_range( 0, 1, Action::GetRandSeed() );
612         if ( which )
613             error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
614         else
615             error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
616         test_error( error, "Unable to read sub buffer contents" );
617 
618         if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
619         {
620             log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
621             numErrors++;
622         }
623     }
624 
625     delete [] contents;
626     delete [] tempBuffer;
627     Action::FreeRandSeed();
628 
629     return numErrors;
630 }
631 
632