• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <stdlib.h>
18 
19 #include "procs.h"
20 #include "harness/errorHelpers.h"
21 #include "harness/testHarness.h"
22 
23 #define MAX_SUB_DEVICES        16        // Limit the sub-devices to ensure no out of resource errors.
24 #define BUFFER_SIZE        1024
25 
26 // Kernel source code
27 static const char *buffer_migrate_kernel_code =
28 "__kernel void test_buffer_migrate(__global uint *dst, __global uint *src1, __global uint *src2, uint x)\n"
29 "{\n"
30 "  int tid = get_global_id(0);\n"
31 "  dst[tid] = src1[tid] ^ src2[tid] ^ x;\n"
32 "}\n";
33 
34 enum migrations { MIGRATE_PREFERRED,         // migrate to the preferred sub-device
35   MIGRATE_NON_PREFERRED,     // migrate to a randomly chosen non-preferred sub-device
36   MIGRATE_RANDOM,        // migrate to a randomly chosen sub-device with randomly chosen flags
37   NUMBER_OF_MIGRATIONS };
38 
init_buffer(cl_command_queue cmd_q,cl_mem buffer,cl_uint * data)39 static cl_mem init_buffer(cl_command_queue cmd_q, cl_mem buffer, cl_uint *data)
40 {
41   cl_int err;
42 
43   if (buffer) {
44     if ((err = clEnqueueWriteBuffer(cmd_q, buffer, CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, data, 0, NULL, NULL)) != CL_SUCCESS) {
45       print_error(err, "Failed on enqueue write of buffer data.");
46     }
47   }
48   return buffer;
49 }
50 
migrateMemObject(enum migrations migrate,cl_command_queue * queues,cl_mem * mem_objects,cl_uint num_devices,cl_mem_migration_flags * flags,MTdata d)51 static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
52 {
53   cl_uint i, j;
54   cl_int  err = CL_SUCCESS;
55 
56   for (i=0; i<num_devices; i++) {
57     j = genrand_int32(d) % num_devices;
58     flags[i] = 0;
59     switch (migrate) {
60       case MIGRATE_PREFERRED:
61         // Force the device to be preferred
62         j = i;
63         break;
64       case MIGRATE_NON_PREFERRED:
65         // Coerce the device to be non-preferred
66         if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
67         break;
68       case MIGRATE_RANDOM:
69         // Choose a random set of flags
70         flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));;
71         break;
72       default: log_error("Unhandled migration type: %d\n", migrate); return -1;
73     }
74     if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]), flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
75       print_error(err, "Failed migrating memory object.");
76     }
77   }
78   return err;
79 }
80 
restoreBuffer(cl_command_queue * queues,cl_mem * buffers,cl_uint num_devices,cl_mem_migration_flags * flags,cl_uint * buffer)81 static cl_int restoreBuffer(cl_command_queue *queues, cl_mem *buffers, cl_uint num_devices, cl_mem_migration_flags *flags, cl_uint *buffer)
82 {
83   cl_uint i, j;
84   cl_int  err;
85 
86   // If the buffer was previously migrated with undefined content, reload the content.
87 
88   for (i=0; i<num_devices; i++) {
89     if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
90       if ((err = clEnqueueWriteBuffer(queues[i], buffers[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
91         print_error(err, "Failed on restoration enqueue write of buffer data.");
92         return err;
93       }
94     }
95   }
96   return CL_SUCCESS;
97 }
98 
test_buffer_migrate(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)99 int test_buffer_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
100 {
101   int failed = 0;
102   cl_uint i, j;
103   cl_int err;
104   cl_uint max_sub_devices = 0;
105   cl_uint num_devices, num_devices_limited;
106   cl_uint A[BUFFER_SIZE], B[BUFFER_SIZE], C[BUFFER_SIZE];
107   cl_uint test_number = 1;
108   cl_device_affinity_domain domain, domains;
109   cl_device_id *devices;
110   cl_command_queue *queues;
111   cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
112   cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
113   cl_mem *bufferA, *bufferB, *bufferC;
114   cl_program program = NULL;
115   cl_kernel kernel = NULL;
116   cl_context ctx = NULL;    // context for all sub-devices
117   enum migrations migrateA, migrateB, migrateC;
118   MTdata d = init_genrand(gRandomSeed);
119   const size_t wgs[1] = {BUFFER_SIZE};
120 
121   /* Allocate arrays whose size varies according to the maximum number of sub-devices */
122   if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
123     print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
124     return -1;
125   }
126   if (max_sub_devices < 1) {
127     log_error("ERROR: Invalid number of compute units returned.\n");
128     return -1;
129   }
130   devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
131   queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
132   flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
133   flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
134   flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
135   bufferA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
136   bufferB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
137   bufferC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
138 
139   if ((devices == NULL) || (queues  == NULL) ||
140       (flagsA  == NULL) || (flagsB  == NULL) || (flagsC  == NULL) ||
141       (bufferA == NULL) || (bufferB == NULL) || (bufferC == NULL)) {
142     log_error("ERROR: Failed to successfully allocate required local buffers.\n");
143     failed = -1;
144     goto cleanup_allocations;
145   }
146 
147   for (i=0; i<max_sub_devices; i++) {
148     devices[i] = NULL;
149     queues [i] = NULL;
150     bufferA[i] = bufferB[i] = bufferC[i] = NULL;
151   }
152 
153   for (i=0; i<BUFFER_SIZE; i++) {
154     A[i] = genrand_int32(d);
155     B[i] = genrand_int32(d);
156   }
157 
158   // Attempt to partition the device along each of the allowed affinity domain.
159   if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
160     print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
161     return -1;
162   }
163 
164   domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
165               CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
166 
167   do {
168     if (domains) {
169       for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
170       domains &= ~domain;
171     } else {
172       domain = 0;
173     }
174 
175     // Determine the number of partitions for the device given the specific domain.
176     if (domain) {
177       property[1] = domain;
178       err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
179       if ((err != CL_SUCCESS) || (num_devices == 0)) {
180         print_error(err, "Obtaining the number of partions by affinity failed.");
181         failed = 1;
182         goto cleanup;
183       }
184     } else {
185       num_devices = 1;
186     }
187 
188     if (num_devices > 1) {
189       // Create each of the sub-devices and a corresponding context.
190       if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
191         print_error(err, "Failed creating sub devices.");
192         failed = 1;
193         goto cleanup;
194       }
195 
196       // Create a context containing all the sub-devices
197       ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
198       if (ctx == NULL) {
199     print_error(err, "Failed creating context containing the sub-devices.");
200     failed = 1;
201     goto cleanup;
202       }
203 
204       // Create a command queue for each sub-device
205       for (i=0; i<num_devices; i++) {
206         if (devices[i]) {
207           if ((queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &err)) == NULL) {
208             print_error(err, "Failed creating command queues.");
209             failed = 1;
210             goto cleanup;
211           }
212         }
213       }
214     } else {
215       // No partitioning available. Just exercise the APIs on a single device.
216       devices[0] = deviceID;
217       queues[0] = queue;
218       ctx = context;
219     }
220 
221     // Build the kernel program.
222     if ((err = create_single_kernel_helper(ctx, &program, &kernel, 1,
223                                            &buffer_migrate_kernel_code,
224                                            "test_buffer_migrate")))
225     {
226         print_error(err, "Failed creating kernel.");
227         failed = 1;
228         goto cleanup;
229     }
230 
231     num_devices_limited = num_devices;
232 
233     // Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
234     // If we run out of memory, then restrict the number of sub-devices to be tested.
235     for (i=0; i<num_devices; i++) {
236       bufferA[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY  | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), A);
237       bufferB[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY  | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), B);
238       bufferC[i] = clCreateBuffer(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err);
239 
240       if ((bufferA[i] == NULL) || (bufferB[i] == NULL) || (bufferC[i] == NULL)) {
241         if (i == 0) {
242           log_error("Failed to allocate even 1 set of buffers.\n");
243           failed = 1;
244           goto cleanup;
245         }
246         num_devices_limited = i;
247         break;
248       }
249     }
250 
251     // For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
252     for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
253       if (migrateMemObject(migrateA, queues, bufferA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
254         failed = 1;
255         goto cleanup;
256       }
257       for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
258         if (migrateMemObject(migrateC, queues, bufferC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
259           failed = 1;
260           goto cleanup;
261         }
262         for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
263           if (migrateMemObject(migrateB, queues, bufferB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
264             failed = 1;
265             goto cleanup;
266           }
267           // Run the test on each of the partitions.
268           for (i=0; i<num_devices_limited; i++) {
269             cl_uint x;
270 
271             x = i + test_number;
272 
273             if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&bufferC[i])) != CL_SUCCESS) {
274               print_error(err, "Failed set kernel argument 0.");
275               failed = 1;
276               goto cleanup;
277             }
278 
279             if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&bufferA[i])) != CL_SUCCESS) {
280               print_error(err, "Failed set kernel argument 1.");
281               failed = 1;
282               goto cleanup;
283             }
284 
285             if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&bufferB[i])) != CL_SUCCESS) {
286               print_error(err, "Failed set kernel argument 2.");
287               failed = 1;
288               goto cleanup;
289             }
290 
291             if ((err = clSetKernelArg(kernel, 3, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
292               print_error(err, "Failed set kernel argument 3.");
293               failed = 1;
294               goto cleanup;
295             }
296 
297             if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 1, NULL, wgs, NULL, 0, NULL, NULL)) != CL_SUCCESS) {
298                 print_error(err, "Failed enqueuing the NDRange kernel.");
299                 failed = 1;
300                 goto cleanup;
301             }
302           }
303           // Verify the results as long as neither input is an undefined migration
304           for (i=0; i<num_devices_limited; i++, test_number++) {
305             if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
306               if ((err = clEnqueueReadBuffer(queues[i], bufferC[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, C, 0, NULL, NULL)) != CL_SUCCESS) {
307                 print_error(err, "Failed reading output buffer.");
308                 failed = 1;
309                 goto cleanup;
310               }
311               for (j=0; j<BUFFER_SIZE; j++) {
312                 cl_uint expected;
313 
314                 expected = A[j] ^ B[j] ^ test_number;
315                 if (C[j] != expected) {
316                   log_error("Failed on device %d,  work item %4d,  expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
317                   failed = 1;
318                 }
319               }
320               if (failed) goto cleanup;
321             }
322           }
323 
324           if (restoreBuffer(queues, bufferB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
325             failed = 1;
326             goto cleanup;
327           }
328         }
329       }
330       if (restoreBuffer(queues, bufferA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
331         failed = 1;
332         goto cleanup;
333       }
334     }
335 
336   cleanup:
337     // Clean up all the allocted resources create by the test. This includes sub-devices,
338     // command queues, and memory buffers.
339 
340     for (i=0; i<max_sub_devices; i++) {
341       // Memory buffer cleanup
342       if (bufferA[i]) {
343         if ((err = clReleaseMemObject(bufferA[i])) != CL_SUCCESS) {
344           print_error(err, "Failed releasing memory object.");
345           failed = 1;
346         }
347       }
348       if (bufferB[i]) {
349         if ((err = clReleaseMemObject(bufferB[i])) != CL_SUCCESS) {
350           print_error(err, "Failed releasing memory object.");
351           failed = 1;
352         }
353       }
354       if (bufferC[i]) {
355         if ((err = clReleaseMemObject(bufferC[i])) != CL_SUCCESS) {
356           print_error(err, "Failed releasing memory object.");
357           failed = 1;
358         }
359       }
360 
361 
362       if (num_devices > 1) {
363         // Command queue cleanup
364         if (queues[i]) {
365           if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
366             print_error(err, "Failed releasing command queue.");
367             failed = 1;
368           }
369         }
370 
371         // Sub-device cleanup
372         if (devices[i]) {
373           if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
374             print_error(err, "Failed releasing sub device.");
375             failed = 1;
376           }
377         }
378         devices[i] = 0;
379       }
380     }
381 
382     // Context, program, and kernel cleanup
383     if (program) {
384       if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
385     print_error(err, "Failed releasing program.");
386     failed = 1;
387       }
388       program = NULL;
389     }
390 
391     if (kernel) {
392       if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
393     print_error(err, "Failed releasing kernel.");
394     failed = 1;
395       }
396       kernel = NULL;
397     }
398 
399     if (ctx && (ctx != context)) {
400       if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
401     print_error(err, "Failed releasing context.");
402     failed = 1;
403       }
404     }
405     ctx = NULL;
406 
407     if (failed) goto cleanup_allocations;
408   } while (domains);
409 
410 cleanup_allocations:
411   if (devices) free(devices);
412   if (queues)  free(queues);
413   if (flagsA)  free(flagsA);
414   if (flagsB)  free(flagsB);
415   if (flagsC)  free(flagsC);
416   if (bufferA) free(bufferA);
417   if (bufferB) free(bufferB);
418   if (bufferC) free(bufferC);
419 
420   return ((failed) ? -1 : 0);
421 }
422