// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "harness/compat.h" #include #include #include #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "common.h" // SVM Atomic wrappers. // Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly. // Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called. // For now only Windows x86 is implemented, add support for other platforms as needed. cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order) { #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) return *pValue; // provided the value is aligned x86 doesn't need anything more than this for seq_cst. #elif defined(__GNUC__) return __sync_add_and_fetch(pValue, 0); #else log_error("ERROR: AtomicLoadExplicit function not implemented\n"); return -1; #endif } // all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter. cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o) { #if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER) return InterlockedExchangeAdd( (volatile LONG*) object, operand); #elif defined(__GNUC__) return __sync_fetch_and_add(object, operand); #else log_error("ERROR: AtomicFetchAddExplicit function not implemented\n"); return -1; #endif } cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo) { #if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER) return InterlockedExchange( (volatile LONG*) object, desired); #elif defined(__GNUC__) return __sync_lock_test_and_set(object, desired); #else log_error("ERROR: AtomicExchangeExplicit function not implemented\n"); return -1; #endif } const char *linked_list_create_and_verify_kernels[] = { "typedef struct Node {\n" " int global_id;\n" " int position_in_list;\n" " __global struct Node* pNext;\n" "} Node;\n" "\n" // The allocation_index parameter must be initialized on the host to N work-items // The first N nodes in pNodes will be the heads of the lists. "__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n" "{\n" " size_t i = get_global_id(0);\n" " __global Node *pNode = &pNodes[i];\n" "\n" " pNode->global_id = i;\n" " pNode->position_in_list = 0;\n" "\n" " __global Node *pNew;\n" " for(int j=1; j < list_length; j++)\n" " {\n" " pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n" " pNew->global_id = i;\n" " pNew->position_in_list = j;\n" " pNode->pNext = pNew; // link new node onto end of list\n" " pNode = pNew; // move to end of list\n" " }\n" "}\n" "__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n" "{\n" " size_t i = get_global_id(0);\n" " __global Node *pNode = &pNodes[i];\n" "\n" " for(int j=0; j < list_length; j++)\n" " {\n" " if( pNode->global_id == i && pNode->position_in_list == j)\n" " {\n" " atomic_inc(num_correct);\n" " } \n" " else {\n" " break;\n" " }\n" " pNode = pNode->pNext;\n" " }\n" "}\n" }; // The first N nodes in pNodes will be the heads of the lists. void create_linked_lists(Node* pNodes, size_t num_lists, int list_length) { size_t allocation_index = num_lists; // heads of lists are in first num_lists nodes. for(cl_uint i = 0; i < num_lists; i++) { Node *pNode = &pNodes[i]; pNode->global_id = i; pNode->position_in_list = 0; Node *pNew; for(int j=1; j < list_length; j++) { pNew = &pNodes[ allocation_index++ ];// allocate a new node pNew->global_id = i; pNew->position_in_list = j; pNode->pNext = pNew; // link new node onto end of list pNode = pNew; // move to end of list } } } cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length) { cl_int error = CL_SUCCESS; int numCorrect = 0; log_info(" and verifying on host "); for(cl_uint i=0; i < num_lists; i++) { Node *pNode = &pNodes[i]; for(int j=0; j < list_length; j++) { if( pNode->global_id == i && pNode->position_in_list == j) { numCorrect++; } else { break; } pNode = pNode->pNext; } } if(numCorrect != list_length * (cl_uint)num_lists) { error = -1; log_info("Failed\n"); } else log_info("Passed\n"); return error; } // Note that we don't use the context provided by the test harness since it doesn't support multiple devices, // so we create are own context here that has all devices, we use the same platform that the harness used. cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector extensions_list) { cl_int error; cl_platform_id platform_id; // find out what platform the harness is using. error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL); test_error(error,"clGetDeviceInfo failed"); error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL, num_devices ); test_error(error, "clGetDeviceIDs failed"); std::vector devicesTmp(*num_devices), devices, capable_devices; error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL ); test_error(error, "clGetDeviceIDs failed"); devices.push_back(device_from_harness); for (size_t i = 0; i < devicesTmp.size(); ++i) { if (device_from_harness != devicesTmp[i]) devices.push_back(devicesTmp[i]); } // Select only the devices that support the SVM level needed for the test. // Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute). cl_device_svm_capabilities caps; cl_uint num_capable_devices = 0; for(cl_uint i = 0; i < *num_devices; i++) { Version version = get_device_cl_version(devices[i]); if(device_from_harness != devices[i] && version < Version(2,0)) { continue; } error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL); test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS))) { log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value"); return -1; } bool extensions_supported = true; for (auto extension : extensions_list) { if (!is_extension_available(devices[i], extension.c_str())) { log_error("Required extension not found - device id %d - %s\n", i, extension.c_str()); extensions_supported = false; break; } } if((caps & required_svm_caps) == required_svm_caps && extensions_supported) { capable_devices.push_back(devices[i]); ++num_capable_devices; } } devices = capable_devices; // the only devices we care about from here on are the ones capable of supporting the requested SVM level. *num_devices = num_capable_devices; if(num_capable_devices == 0) // if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices) { log_info("Requested SVM level or required extensions not supported by any device on this platform, test not executed.\n"); return 1; // 1 indicates do not execute, but counts as passing. } cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 }; *context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error); test_error(error, "Unable to create context" ); // *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) ); for(cl_uint i = 0; i < *num_devices; i++) { queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error); test_error(error, "clCreateCommandQueue failed"); } if (ppCodeString) { error = create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0); test_error(error, "failed to create program"); } return 0; } test_definition test_list[] = { ADD_TEST( svm_byte_granularity), ADD_TEST( svm_set_kernel_exec_info_svm_ptrs ), ADD_TEST( svm_fine_grain_memory_consistency ), ADD_TEST( svm_fine_grain_sync_buffers ), ADD_TEST( svm_shared_address_space_fine_grain ), ADD_TEST( svm_shared_sub_buffers ), ADD_TEST( svm_shared_address_space_fine_grain_buffers ), ADD_TEST( svm_allocate_shared_buffer ), ADD_TEST( svm_shared_address_space_coarse_grain_old_api ), ADD_TEST( svm_shared_address_space_coarse_grain_new_api ), ADD_TEST( svm_cross_buffer_pointers_coarse_grain ), ADD_TEST( svm_pointer_passing ), ADD_TEST( svm_enqueue_api ), ADD_TEST_VERSION( svm_migrate, Version(2, 1)), }; const int test_num = ARRAY_SIZE( test_list ); test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); auto expected_min_version = Version(2, 0); if (version < expected_min_version) { version_expected_info("Test", "OpenCL", expected_min_version.to_string().c_str(), version.to_string().c_str()); return TEST_SKIP; } int error; cl_device_svm_capabilities svm_caps; error = clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, sizeof(svm_caps), &svm_caps, NULL); if (error != CL_SUCCESS) { print_error(error, "Unable to get svm capabilities"); return TEST_FAIL; } if ((svm_caps == 0) && (version >= Version(3, 0))) { return TEST_SKIP; } return TEST_PASS; } int main(int argc, const char *argv[]) { return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL); }