1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/typeWrappers.h"
18 #include "harness/testHarness.h"
19 #include "harness/conversions.h"
20
21 #include <vector>
22
23 typedef long long int lld;
24 typedef long long unsigned llu;
25
26 const char *test_kernels[] = {
27 "__kernel void kernelA(__global int *dst)\n"
28 "{\n"
29 "\n"
30 " dst[get_global_id(0)]*=3;\n"
31 "\n"
32 "}\n"
33 "__kernel void kernelB(__global int *dst)\n"
34 "{\n"
35 "\n"
36 " dst[get_global_id(0)]++;\n"
37 "\n"
38 "}\n"
39 };
40
41 #define TEST_SIZE 512
42 #define MAX_QUEUES 1000
43
printPartition(cl_device_partition_property partition)44 const char *printPartition(cl_device_partition_property partition)
45 {
46 switch (partition) {
47 case (0): return "<NONE>";
48 case (CL_DEVICE_PARTITION_EQUALLY): return "CL_DEVICE_PARTITION_EQUALLY";
49 case (CL_DEVICE_PARTITION_BY_COUNTS): return "CL_DEVICE_PARTITION_BY_COUNTS";
50 case (CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN): return "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN";
51 default: return "<unknown>";
52 } // switch
53 }
54
printAffinity(cl_device_affinity_domain affinity)55 const char *printAffinity(cl_device_affinity_domain affinity)
56 {
57 switch (affinity) {
58 case (0): return "<NONE>";
59 case (CL_DEVICE_AFFINITY_DOMAIN_NUMA): return "CL_DEVICE_AFFINITY_DOMAIN_NUMA";
60 case (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE";
61 case (CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE";
62 case (CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE";
63 case (CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE";
64 case (CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE): return "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE";
65 default: return "<unknown>";
66 } // switch
67 }
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const cl_device_id * parentDevice)68 int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const cl_device_id *parentDevice )
69 {
70 int error = CL_SUCCESS;
71
72 /* Create the program object from source */
73 error = create_single_kernel_helper_create_program(context, outProgram, numKernelLines, kernelProgram);
74 if( *outProgram == NULL || error != CL_SUCCESS)
75 {
76 print_error( error, "clCreateProgramWithSource failed" );
77 return error;
78 }
79
80 /* Compile the program */
81 int buildProgramFailed = 0;
82 int printedSource = 0;
83 error = clBuildProgram( *outProgram, ((parentDevice == NULL) ? 0 : 1), parentDevice, NULL, NULL, NULL );
84 if (error != CL_SUCCESS)
85 {
86 unsigned int i;
87 print_error(error, "clBuildProgram failed");
88 buildProgramFailed = 1;
89 printedSource = 1;
90 log_error( "Original source is: ------------\n" );
91 for( i = 0; i < numKernelLines; i++ )
92 log_error( "%s", kernelProgram[ i ] );
93 }
94
95 // Verify the build status on all devices
96 cl_uint deviceCount = 0;
97 error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
98 if (error != CL_SUCCESS) {
99 print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
100 return error;
101 }
102
103 if (deviceCount == 0) {
104 log_error("No devices found for program.\n");
105 return -1;
106 }
107
108 cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
109 if( NULL == devices )
110 return -1;
111 memset( devices, 0, deviceCount * sizeof( cl_device_id ));
112 error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
113 if (error != CL_SUCCESS) {
114 print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
115 free( devices );
116 return error;
117 }
118
119 cl_uint z;
120 for( z = 0; z < deviceCount; z++ )
121 {
122 char deviceName[4096] = "";
123 error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
124 if (error != CL_SUCCESS || deviceName[0] == '\0') {
125 log_error("Device \"%d\" failed to return a name\n", z);
126 print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
127 }
128
129 cl_build_status buildStatus;
130 error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
131 if (error != CL_SUCCESS) {
132 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
133 free( devices );
134 return error;
135 }
136
137 if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
138 char log[10240] = "";
139 if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
140
141 char statusString[64] = "";
142 if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
143 sprintf(statusString, "CL_BUILD_SUCCESS");
144 else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
145 sprintf(statusString, "CL_BUILD_NONE");
146 else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
147 sprintf(statusString, "CL_BUILD_ERROR");
148 else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
149 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
150 else
151 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
152
153 if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
154 error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
155 if (error != CL_SUCCESS || log[0]=='\0'){
156 log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
157 if (error) {
158 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
159 free( devices );
160 return error;
161 } else {
162 log_error("clGetProgramBuildInfo returned an empty log.\n");
163 free( devices );
164 return -1;
165 }
166 }
167 // In this case we've already printed out the code above.
168 if (!printedSource)
169 {
170 unsigned int i;
171 log_error( "Original source is: ------------\n" );
172 for( i = 0; i < numKernelLines; i++ )
173 log_error( "%s", kernelProgram[ i ] );
174 printedSource = 1;
175 }
176 log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
177 log_error( "%s\n", log );
178 log_error( "\n----------\n" );
179 free( devices );
180 return -1;
181 }
182 }
183
184 /* And create a kernel from it */
185 *outKernel = clCreateKernel( *outProgram, kernelName, &error );
186 if( *outKernel == NULL || error != CL_SUCCESS)
187 {
188 print_error( error, "Unable to create kernel" );
189 free( devices );
190 return error;
191 }
192
193 free( devices );
194 return 0;
195 }
196
197 template<class T>
198 class AutoDestructArray
199 {
200 public:
AutoDestructArray(T * arr)201 AutoDestructArray(T* arr) : m_arr(arr) {}
~AutoDestructArray()202 ~AutoDestructArray() { if (m_arr) delete [] m_arr; }
203
204 private:
205 T* m_arr;
206 };
207
test_device_set(size_t deviceCount,size_t queueCount,cl_device_id * devices,int num_elements,cl_device_id * parentDevice=NULL)208 int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices, int num_elements, cl_device_id *parentDevice = NULL)
209 {
210 int error;
211 clContextWrapper context;
212 clProgramWrapper program;
213 clKernelWrapper kernels[2];
214 clMemWrapper stream;
215 clCommandQueueWrapper queues[MAX_QUEUES];
216 size_t threads[1], localThreads[1];
217 int data[TEST_SIZE];
218 int outputData[TEST_SIZE];
219 int expectedResults[TEST_SIZE];
220 int *expectedResultsOneDeviceArray = new int[deviceCount * TEST_SIZE];
221 int **expectedResultsOneDevice = (int**)alloca(sizeof(int**) * deviceCount);
222 size_t i;
223 AutoDestructArray<int> autoDestruct(expectedResultsOneDeviceArray);
224
225 for (i=0; i<deviceCount; i++) {
226 expectedResultsOneDevice[i] = expectedResultsOneDeviceArray + (i * TEST_SIZE);
227 }
228
229 memset(queues, 0, sizeof(queues));
230
231 RandomSeed seed( gRandomSeed );
232
233 if (queueCount > MAX_QUEUES) {
234 log_error("Number of queues (%ld) is greater than the number for which the test was written (%d).", queueCount, MAX_QUEUES);
235 return -1;
236 }
237
238 log_info("Testing with %ld queues on %ld devices, %ld kernel executions.\n", queueCount, deviceCount, queueCount*num_elements/TEST_SIZE);
239
240 for (i=0; i<deviceCount; i++) {
241 size_t deviceNameSize;
242 error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &deviceNameSize);
243 test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
244 char *deviceName = (char *)alloca(deviceNameSize * (sizeof(char)));
245 error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, deviceNameSize, deviceName, NULL);
246 test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
247 log_info("Device %ld is \"%s\".\n", i, deviceName);
248 }
249
250 /* Create a context */
251 context = clCreateContext( NULL, (cl_uint)deviceCount, devices, notify_callback, NULL, &error );
252 test_error( error, "Unable to create testing context" );
253
254 /* Create our kernels (they all have the same arguments so we don't need multiple ones for each device) */
255 if( create_single_kernel_helper( context, &program, &kernels[0], 1, test_kernels, "kernelA", parentDevice ) != 0 )
256 {
257 return -1;
258 }
259
260 kernels[1] = clCreateKernel(program, "kernelB", &error);
261 test_error(error, "clCreateKernel failed");
262
263
264 /* Now create I/O streams */
265 for( i = 0; i < TEST_SIZE; i++ )
266 data[i] = genrand_int32(seed);
267
268 stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
269 sizeof(cl_int) * TEST_SIZE, data, &error);
270 test_error( error, "Unable to create test array" );
271
272 // Update the expected results
273 for( i = 0; i < TEST_SIZE; i++ ) {
274 expectedResults[i] = data[i];
275 for (size_t j=0; j<deviceCount; j++)
276 expectedResultsOneDevice[j][i] = data[i];
277 }
278
279
280 // Set the arguments
281 error = clSetKernelArg( kernels[0], 0, sizeof( stream ), &stream);
282 test_error( error, "Unable to set kernel arguments" );
283 error = clSetKernelArg( kernels[1], 0, sizeof( stream ), &stream);
284 test_error( error, "Unable to set kernel arguments" );
285
286 /* Run the test */
287 threads[0] = (size_t)TEST_SIZE;
288
289 error = get_max_common_work_group_size( context, kernels[0], threads[0], &localThreads[ 0 ] );
290 test_error( error, "Unable to calc work group size" );
291
292 /* Create work queues */
293 for( i = 0; i < queueCount; i++ )
294 {
295 queues[i] = clCreateCommandQueueWithProperties( context, devices[ i % deviceCount ], 0, &error );
296 if (error != CL_SUCCESS || queues[i] == NULL) {
297 log_info("Could not create queue[%d].\n", (int)i);
298 queueCount = i;
299 break;
300 }
301 }
302 log_info("Testing with %d queues.\n", (int)queueCount);
303
304 /* Enqueue executions */
305 for( int z = 0; z<num_elements/TEST_SIZE; z++) {
306 for( i = 0; i < queueCount; i++ )
307 {
308 // Randomly choose a kernel to execute.
309 int kernel_selection = (int)get_random_float(0, 2, seed);
310 error = clEnqueueNDRangeKernel( queues[ i ], kernels[ kernel_selection ], 1, NULL, threads, localThreads, 0, NULL, NULL );
311 test_error( error, "Kernel execution failed" );
312
313 // Update the expected results
314 for( int j = 0; j < TEST_SIZE; j++ ) {
315 expectedResults[j] = (kernel_selection) ? expectedResults[j]+1 : expectedResults[j]*3;
316 expectedResultsOneDevice[i % deviceCount][j] = (kernel_selection) ? expectedResultsOneDevice[i % deviceCount][j]+1 : expectedResultsOneDevice[i % deviceCount][j]*3;
317 }
318
319 // Force the queue to finish so the next one will be in sync
320 error = clFinish(queues[i]);
321 test_error( error, "clFinish failed");
322 }
323 }
324
325 /* Read results */
326 int errors = 0;
327 for (int q = 0; q<(int)queueCount; q++) {
328 error = clEnqueueReadBuffer( queues[ q ], stream, CL_TRUE, 0, sizeof(cl_int)*TEST_SIZE, (char *)outputData, 0, NULL, NULL );
329 test_error( error, "Unable to get result data set" );
330
331 int errorsThisTime = 0;
332 /* Verify all of the data now */
333 for( i = 0; i < TEST_SIZE; i++ )
334 {
335 if( expectedResults[ i ] != outputData[ i ] )
336 {
337 log_error( "ERROR: Sample data did not verify for queue %d on device %ld (sample %d, expected %d, got %d)\n",
338 q, q % deviceCount, (int)i, expectedResults[ i ], outputData[ i ] );
339 for (size_t j=0; j<deviceCount; j++) {
340 if (expectedResultsOneDevice[j][i] == outputData[i])
341 log_info("Sample consistent with only device %ld having modified the data.\n", j);
342 }
343 errorsThisTime++;
344 break;
345 }
346 }
347 if (errorsThisTime)
348 errors++;
349 }
350
351 /* All done now! */
352 if (errors)
353 return -1;
354 return 0;
355 }
356
357
init_device_partition_test(cl_device_id parentDevice,cl_uint & maxComputeUnits,cl_uint & maxSubDevices)358 int init_device_partition_test(cl_device_id parentDevice, cl_uint &maxComputeUnits, cl_uint &maxSubDevices)
359 {
360 int err = clGetDeviceInfo(parentDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL);
361 test_error( err, "Unable to get maximal number of compute units" );
362 err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL);
363 test_error( err, "Unable to get maximal number of sub-devices" );
364
365 log_info("Maximal number of sub-devices on device %p is %d.\n", parentDevice, maxSubDevices );
366 return 0;
367 }
368
test_device_partition_type_support(cl_device_id parentDevice,const cl_device_partition_property partitionType,const cl_device_affinity_domain affinityDomain)369 int test_device_partition_type_support(cl_device_id parentDevice, const cl_device_partition_property partitionType, const cl_device_affinity_domain affinityDomain)
370 {
371 typedef std::vector< cl_device_partition_property > properties_t;
372 properties_t supportedProps( 3 ); // only 3 types defined in the spec (but implementation can define more)
373 size_t const propSize = sizeof( cl_device_partition_property ); // Size of one property in bytes.
374 size_t size; // size of all properties in bytes.
375 cl_int err;
376 size = 0;
377 err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, & size );
378 if ( err == CL_SUCCESS ) {
379 if ( size % propSize != 0 ) {
380 log_error( "ERROR: clGetDeviceInfo: Bad size of returned partition properties (%llu), it must me a multiply of partition property size (%llu)\n", llu( size ), llu( propSize ) );
381 return -1;
382 }
383 supportedProps.resize( size / propSize );
384 size = 0;
385 err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, supportedProps.size() * propSize, & supportedProps.front(), & size );
386 test_error_ret( err, "Unable to get device partition properties (2)", -1 );
387 } else if ( err == CL_INVALID_VALUE ) {
388 log_error( "ERROR: clGetDeviceInfo: CL_DEVICE_PARTITION_PROPERTIES is not supported.\n" );
389 return -1;
390 } else {
391 test_error_ret( err, "Unable to get device partition properties (1)", -1 );
392 };
393 for ( int i = 0; i < supportedProps.size(); i++)
394 {
395 if (supportedProps[i] == partitionType)
396 {
397 if (partitionType == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
398 {
399 cl_device_affinity_domain supportedAffinityDomain;
400 err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(supportedAffinityDomain), &supportedAffinityDomain, NULL);
401 test_error( err, "Unable to get supported affinity domains" );
402 if (supportedAffinityDomain & affinityDomain)
403 return 0;
404 }
405 else
406 return 0;
407 }
408 }
409
410 return -1;
411 }
412
test_partition_of_device(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_device_partition_property * partition_type,cl_uint starting_property,cl_uint ending_property)413 int test_partition_of_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_device_partition_property *partition_type,
414 cl_uint starting_property, cl_uint ending_property)
415 {
416 cl_uint maxComputeUnits;
417 cl_uint maxSubDevices; // maximal number of sub-devices that can be created in one call to clCreateSubDevices
418 int err = 0;
419
420 if (init_device_partition_test(deviceID, maxComputeUnits, maxSubDevices) != 0)
421 return -1;
422
423 if (maxComputeUnits <= 1)
424 return 0;
425 // confirm that this devices reports how it was partitioned
426 if (partition_type != NULL)
427 { // if we're not the root device
428 size_t psize;
429 err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, 0, NULL, &psize);
430 test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
431 cl_device_partition_property *properties_returned = (cl_device_partition_property *)alloca(psize);
432 err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, psize, (void *) properties_returned, NULL);
433 test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
434
435 // test returned type
436 for (cl_uint i = 0;i < psize / sizeof(cl_device_partition_property);i++) {
437 if (properties_returned[i] != partition_type[i]) {
438 if (!(partition_type[0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
439 i == 1 && partition_type[1] == CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE &&
440 (properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_NUMA ||
441 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ||
442 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ||
443 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ||
444 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE))) {
445 log_error("properties_returned[%d] 0x%x != 0x%x partition_type[%d].", i, properties_returned[i], partition_type[i], i);
446 return -1;
447 }
448 }
449 } // for
450 }
451
452 #define PROPERTY_TYPES 8
453 cl_device_partition_property partitionProp[PROPERTY_TYPES][5] = {
454 { CL_DEVICE_PARTITION_EQUALLY, maxComputeUnits / 2, 0, 0, 0 } ,
455 { CL_DEVICE_PARTITION_BY_COUNTS, 1, maxComputeUnits - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 } ,
456 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0, 0, 0 } ,
457 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE, 0, 0, 0 } ,
458 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE, 0, 0, 0 } ,
459 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, 0, 0 } ,
460 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE, 0, 0, 0 } ,
461 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0, 0, 0 }
462 };
463
464 // loop thru each type, creating sub-devices for each type
465 for (cl_uint i = starting_property;i < ending_property;i++) {
466
467 if (test_device_partition_type_support(deviceID, partitionProp[i][0], partitionProp[i][1]) != 0)
468 {
469 if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
470 {
471 log_info( "Device partition type \"%s\" \"%s\" is not supported on device %p. Skipping test...\n",
472 printPartition(partitionProp[i][0]),
473 printAffinity(partitionProp[i][1]), deviceID);
474 }
475 else
476 {
477 log_info( "Device partition type \"%s\" is not supported on device %p. Skipping test...\n",
478 printPartition(partitionProp[i][0]), deviceID);
479 }
480 continue;
481 }
482
483 if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
484 {
485 log_info("Testing on device %p partition type \"%s\" \"%s\"\n", deviceID, printPartition(partitionProp[i][0]),
486 printAffinity(partitionProp[i][1]));
487 }
488 else
489 {
490 log_info("Testing on device %p partition type \"%s\" (%d,%d)\n", deviceID, printPartition(partitionProp[i][0]),
491 partitionProp[i][1], partitionProp[i][2]);
492 }
493
494 cl_uint deviceCount;
495
496 // how many sub-devices can we create?
497 err = clCreateSubDevices(deviceID, partitionProp[i], 0, NULL, &deviceCount);
498 if ( err == CL_DEVICE_PARTITION_FAILED ) {
499 log_info( "The device %p could not be further partitioned.\n", deviceID );
500 continue;
501 }
502 test_error( err, "Failed to get number of sub-devices" );
503
504 // get the list of subDevices
505 // create room for 1 more device_id, so that we can put the parent device in there.
506 cl_device_id *subDevices = (cl_device_id*)alloca(sizeof(cl_device_id) * (deviceCount + 1));
507 err = clCreateSubDevices(deviceID, partitionProp[i], deviceCount, subDevices, &deviceCount);
508 test_error( err, "Actual creation of sub-devices failed" );
509
510 log_info("Testing on all devices in context\n");
511 err = test_device_set(deviceCount, deviceCount, subDevices, num_elements);
512 if (err == 0)
513 {
514 log_info("Testing on a parent device for context\n");
515
516 // add the parent device
517 subDevices[deviceCount] = deviceID;
518 err = test_device_set(deviceCount + 1, deviceCount, subDevices, num_elements, &deviceID);
519 }
520 if (err != 0)
521 {
522 printf("error! returning %d\n",err);
523 return err;
524 }
525
526 // now, recurse and test the FIRST of these sub-devices, to make sure it can be further partitioned
527 err = test_partition_of_device(subDevices[0], context, queue, num_elements, partitionProp[i], starting_property, ending_property);
528 if (err != 0)
529 {
530 printf("error! returning %d\n",err);
531 return err;
532 }
533
534 for (cl_uint j=0;j < deviceCount;j++)
535 {
536 err = clReleaseDevice(subDevices[j]);
537 test_error( err, "\n Releasing sub-device failed \n" );
538 }
539
540 } // for
541
542 log_info("Testing on all device %p finished\n", deviceID);
543 return 0;
544 }
545
546
test_partition_equally(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)547 int test_partition_equally(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
548 {
549 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 1);
550 }
551
test_partition_by_counts(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)552 int test_partition_by_counts(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
553 {
554 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 1, 2);
555 }
556
test_partition_by_affinity_domain_numa(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)557 int test_partition_by_affinity_domain_numa(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
558 {
559 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 2, 3);
560 }
561
test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)562 int test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
563 {
564 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 3, 4);
565 }
566
test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)567 int test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
568 {
569 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 4, 5);
570 }
571
test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)572 int test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
573 {
574 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 5, 6);
575 }
576
test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)577 int test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
578 {
579 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 6, 7);
580 }
581
test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)582 int test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
583 {
584 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 7, 8);
585 }
586
test_partition_all(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)587 int test_partition_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
588 {
589 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 8);
590 }
591