• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <vector>
20 #include <sstream>
21 #include "harness/testHarness.h"
22 #include "harness/kernelHelpers.h"
23 
24 #include "common.h"
25 
26 // SVM Atomic wrappers.
27 // Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
28 // Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
29 // For now only Windows x86 is implemented, add support for other platforms as needed.
AtomicLoadExplicit(volatile cl_int * pValue,cl_memory_order order)30 cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
31 {
32 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
33   return *pValue;  // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
34 #elif defined(__GNUC__)
35 	return __sync_add_and_fetch(pValue, 0);
36 #else
37   log_error("ERROR: AtomicLoadExplicit function not implemented\n");
38   return -1;
39 #endif
40 }
41 // all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
AtomicFetchAddExplicit(volatile cl_int * object,cl_int operand,cl_memory_order o)42 cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
43 {
44 #if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
45   return InterlockedExchangeAdd( (volatile LONG*) object, operand);
46 #elif defined(__GNUC__)
47   return __sync_fetch_and_add(object, operand);
48 #else
49   log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
50   return -1;
51 #endif
52 }
53 
AtomicExchangeExplicit(volatile cl_int * object,cl_int desired,cl_memory_order mo)54 cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
55 {
56 #if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
57   return InterlockedExchange( (volatile LONG*) object, desired);
58 #elif defined(__GNUC__)
59   return __sync_lock_test_and_set(object, desired);
60 #else
61   log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
62   return -1;
63 #endif
64 }
65 
66 
67 const char *linked_list_create_and_verify_kernels[] = {
68   "typedef struct Node {\n"
69   "    int global_id;\n"
70   "    int position_in_list;\n"
71   "    __global struct Node* pNext;\n"
72   "} Node;\n"
73   "\n"
74   // The allocation_index parameter must be initialized on the host to N work-items
75   // The first N nodes in pNodes will be the heads of the lists.
76   "__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
77   "{\n"
78   "    size_t i = get_global_id(0);\n"
79   "    __global Node *pNode = &pNodes[i];\n"
80   "\n"
81   "    pNode->global_id = i;\n"
82   "    pNode->position_in_list = 0;\n"
83   "\n"
84   "    __global Node *pNew;\n"
85   "    for(int j=1; j < list_length; j++)\n"
86   "    {\n"
87   "        pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
88   "        pNew->global_id = i;\n"
89   "        pNew->position_in_list = j;\n"
90   "        pNode->pNext = pNew;  // link new node onto end of list\n"
91   "        pNode = pNew;   // move to end of list\n"
92   "    }\n"
93   "}\n"
94 
95   "__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
96   "{\n"
97   "    size_t i = get_global_id(0);\n"
98   "    __global Node *pNode = &pNodes[i];\n"
99   "\n"
100   "    for(int j=0; j < list_length; j++)\n"
101   "    {\n"
102   "        if( pNode->global_id == i && pNode->position_in_list == j)\n"
103   "        {\n"
104   "            atomic_inc(num_correct);\n"
105   "        } \n"
106   "        else {\n"
107   "            break;\n"
108   "        }\n"
109   "        pNode = pNode->pNext;\n"
110   "    }\n"
111   "}\n"
112 };
113 
114 
115 // The first N nodes in pNodes will be the heads of the lists.
create_linked_lists(Node * pNodes,size_t num_lists,int list_length)116 void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
117 {
118   size_t allocation_index = num_lists;  // heads of lists are in first num_lists nodes.
119 
120   for(cl_uint i = 0; i < num_lists; i++)
121   {
122     Node *pNode = &pNodes[i];
123     pNode->global_id = i;
124     pNode->position_in_list = 0;
125     Node *pNew;
126     for(int j=1; j < list_length; j++)
127     {
128       pNew = &pNodes[ allocation_index++ ];// allocate a new node
129       pNew->global_id = i;
130       pNew->position_in_list = j;
131       pNode->pNext = pNew;  // link new node onto end of list
132       pNode = pNew;   // move to end of list
133     }
134   }
135 }
136 
verify_linked_lists(Node * pNodes,size_t num_lists,int list_length)137 cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
138 {
139   cl_int error = CL_SUCCESS;
140   int numCorrect = 0;
141 
142   log_info(" and verifying on host ");
143   for(cl_uint i=0; i < num_lists; i++)
144   {
145     Node *pNode = &pNodes[i];
146     for(int j=0; j < list_length; j++)
147     {
148       if( pNode->global_id == i && pNode->position_in_list == j)
149       {
150         numCorrect++;
151       }
152       else {
153         break;
154       }
155       pNode = pNode->pNext;
156     }
157   }
158   if(numCorrect != list_length * (cl_uint)num_lists)
159   {
160     error = -1;
161     log_info("Failed\n");
162   }
163   else
164     log_info("Passed\n");
165 
166   return error;
167 }
168 
169 // Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
170 // so we create are own context here that has all devices, we use the same platform that the harness used.
create_cl_objects(cl_device_id device_from_harness,const char ** ppCodeString,cl_context * context,cl_program * program,cl_command_queue * queues,cl_uint * num_devices,cl_device_svm_capabilities required_svm_caps,std::vector<std::string> extensions_list)171 cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list)
172 {
173   cl_int error;
174 
175   cl_platform_id platform_id;
176   // find out what platform the harness is using.
177   error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
178   test_error(error,"clGetDeviceInfo failed");
179 
180   error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
181   test_error(error, "clGetDeviceIDs failed");
182 
183   std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
184 
185   error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
186   test_error(error, "clGetDeviceIDs failed");
187 
188   devices.push_back(device_from_harness);
189   for (size_t i = 0; i < devicesTmp.size(); ++i)
190   {
191     if (device_from_harness != devicesTmp[i])
192       devices.push_back(devicesTmp[i]);
193   }
194 
195   // Select only the devices that support the SVM level needed for the test.
196   // Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
197   cl_device_svm_capabilities caps;
198   cl_uint num_capable_devices = 0;
199   for(cl_uint i = 0; i < *num_devices; i++)
200   {
201     Version version = get_device_cl_version(devices[i]);
202 
203     if(device_from_harness != devices[i] && version < Version(2,0))
204     {
205       continue;
206     }
207 
208     error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
209     test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
210     if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER |  CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
211     {
212       log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
213       return -1;
214     }
215     bool extensions_supported = true;
216     for (auto extension : extensions_list)
217     {
218       if (!is_extension_available(devices[i], extension.c_str()))
219       {
220         log_error("Required extension not found - device id %d - %s\n", i, extension.c_str());
221         extensions_supported = false;
222         break;
223       }
224     }
225     if((caps & required_svm_caps) == required_svm_caps && extensions_supported)
226     {
227       capable_devices.push_back(devices[i]);
228       ++num_capable_devices;
229     }
230   }
231   devices = capable_devices;  // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
232   *num_devices = num_capable_devices;
233   if(num_capable_devices == 0)
234     //    if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
235   {
236     log_info("Requested SVM level or required extensions not supported by any device on this platform, test not executed.\n");
237     return 1; // 1 indicates do not execute, but counts as passing.
238   }
239 
240   cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 };
241   *context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
242   test_error(error, "Unable to create context" );
243 
244   //    *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
245 
246   for(cl_uint i = 0; i < *num_devices; i++)
247   {
248     queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
249     test_error(error, "clCreateCommandQueue failed");
250   }
251 
252   if(ppCodeString)
253   {
254     error = create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0, "-cl-std=CL2.0");
255     test_error( error, "failed to create program" );
256   }
257 
258   return 0;
259 }
260 
261 test_definition test_list[] = {
262     ADD_TEST( svm_byte_granularity),
263     ADD_TEST( svm_set_kernel_exec_info_svm_ptrs ),
264     ADD_TEST( svm_fine_grain_memory_consistency ),
265     ADD_TEST( svm_fine_grain_sync_buffers ),
266     ADD_TEST( svm_shared_address_space_fine_grain ),
267     ADD_TEST( svm_shared_sub_buffers ),
268     ADD_TEST( svm_shared_address_space_fine_grain_buffers ),
269     ADD_TEST( svm_allocate_shared_buffer ),
270     ADD_TEST( svm_shared_address_space_coarse_grain_old_api ),
271     ADD_TEST( svm_shared_address_space_coarse_grain_new_api ),
272     ADD_TEST( svm_cross_buffer_pointers_coarse_grain ),
273     ADD_TEST( svm_pointer_passing ),
274     ADD_TEST( svm_enqueue_api ),
275     ADD_TEST_VERSION( svm_migrate, Version(2, 1)),
276 };
277 
278 const int test_num = ARRAY_SIZE( test_list );
279 
InitCL(cl_device_id device)280 test_status InitCL(cl_device_id device) {
281   auto version = get_device_cl_version(device);
282   auto expected_min_version = Version(2, 0);
283   if (version < expected_min_version) {
284     version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
285     return TEST_SKIP;
286   }
287 
288   int error;
289   cl_device_svm_capabilities svm_caps;
290   error = clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES,
291                           sizeof(svm_caps), &svm_caps, NULL);
292   if (error != CL_SUCCESS) {
293     print_error(error, "Unable to get svm capabilities");
294     return TEST_FAIL;
295   }
296 
297   if ((svm_caps == 0) && (version >= Version(3, 0)))
298   {
299       return TEST_SKIP;
300   }
301 
302   return TEST_PASS;
303 }
304 
main(int argc,const char * argv[])305 int main(int argc, const char *argv[])
306 {
307   return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL);
308 }
309 
310