// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "common.h" const char *byte_manipulation_kernels[] = { // Each device will write it's id into the bytes that it "owns", ownership is based on round robin (global_id % num_id) // num_id is equal to number of SVM devices in the system plus one (for the host code). // id is the index (id) of the device that this kernel is executing on. // For example, if there are 2 SVM devices and the host; the buffer should look like this after each device and the host write their id's: // 0, 1, 2, 0, 1, 2, 0, 1, 2... "__kernel void write_owned_locations(__global char* a, uint num_id, uint id)\n" "{\n" " size_t i = get_global_id(0);\n" " int owner = i % num_id;\n" " if(id == owner) \n" " a[i] = id;\n" // modify location if it belongs to this device, write id "}\n" // Verify that a device can see the byte sized updates from the other devices, sum up the device id's and see if they match expected value. // Note: this must be called with a reduced NDRange so that neighbor acesses don't go past end of buffer. // For example if there are two SVM devices and the host (3 total devices) the buffer should look like this: // 0,1,2,0,1,2... // and the expected sum at each point is 0+1+2 = 3. "__kernel void sum_neighbor_locations(__global char* a, uint num_devices, volatile __global uint* error_count)\n" "{\n" " size_t i = get_global_id(0);\n" " uint expected_sum = (num_devices * (num_devices - 1))/2;\n" " uint sum = 0;\n" " for(uint j=0; j 0) failed = true; } cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2; // check that host can see the byte writes made by the devices. for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++) { int sum = 0; for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j]; if(sum != expected) failed = true; } clSVMFree(context, pA); for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]); if(failed) return -1; return 0; }