• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/kernelHelpers.h"
18 #include "harness/typeWrappers.h"
19 
20 #include "common.h"
21 #include "host_atomics.h"
22 
23 #include <sstream>
24 #include <vector>
25 
26 template<typename HostAtomicType, typename HostDataType>
27 class CBasicTestStore : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
28 {
29 public:
30   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
31   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
32   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
33   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
34   using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestStore(TExplicitAtomicType dataType,bool useSVM)35   CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
36   {
37     OldValueCheck(false);
38   }
NumResults(cl_uint threadCount,cl_device_id deviceID)39   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
40   {
41     return threadCount;
42   }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)43   virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
44   {
45     if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
46       MemoryOrder() == MEMORY_ORDER_ACQ_REL)
47       return 0; //skip test - not applicable
48 
49     if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
50         return 0; // skip test - not applicable
51 
52     return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
53   }
ProgramCore()54   virtual std::string ProgramCore()
55   {
56     std::string memoryOrderScope = MemoryOrderScopeStr();
57     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
58     return
59       "  atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n";
60   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)61   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
62   {
63     host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
64   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)65   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
66   {
67     expected = (HostDataType)whichDestValue;
68     return true;
69   }
70 };
71 
test_atomic_store_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)72 int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
73 {
74   int error = 0;
75   CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
76   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
77   CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
78   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
79   CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
80   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
81   CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
82   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
83   CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
84   EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
85   CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
86   EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
87   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
88   {
89     CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
90     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
91     CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
92     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
93     CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
94     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
95     CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
96     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
97   }
98   else
99   {
100     CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
101     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
102     CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
103     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
104     CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
105     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
106     CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
107     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
108   }
109   return error;
110 }
111 
test_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)112 int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
113 {
114   return test_atomic_store_generic(deviceID, context, queue, num_elements, false);
115 }
116 
test_svm_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)117 int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
118 {
119   return test_atomic_store_generic(deviceID, context, queue, num_elements, true);
120 }
121 
122 template<typename HostAtomicType, typename HostDataType>
123 class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType>
124 {
125 public:
126   using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
CBasicTestInit(TExplicitAtomicType dataType,bool useSVM)127   CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
128   {
129     OldValueCheck(false);
130   }
NumResults(cl_uint threadCount,cl_device_id deviceID)131   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
132   {
133     return threadCount;
134   }
ProgramCore()135   virtual std::string ProgramCore()
136   {
137     return
138       "  atomic_init(&destMemory[tid], tid);\n";
139   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)140   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
141   {
142     host_atomic_init(&destMemory[tid], (HostDataType)tid);
143   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)144   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
145   {
146     expected = (HostDataType)whichDestValue;
147     return true;
148   }
149 };
150 
test_atomic_init_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)151 int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
152 {
153   int error = 0;
154   CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
155   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
156   CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
157   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
158   CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
159   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
160   CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
161   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
162   CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
163   EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
164   CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
165   EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
166   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
167   {
168     CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
169     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
170     CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
171     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
172     CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
173     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
174     CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
175     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
176   }
177   else
178   {
179     CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
180     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
181     CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
182     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
183     CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
184     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
185     CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
186     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
187   }
188   return error;
189 }
190 
test_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)191 int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
192 {
193   return test_atomic_init_generic(deviceID, context, queue, num_elements, false);
194 }
195 
test_svm_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)196 int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
197 {
198   return test_atomic_init_generic(deviceID, context, queue, num_elements, true);
199 }
200 
201 template<typename HostAtomicType, typename HostDataType>
202 class CBasicTestLoad : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
203 {
204 public:
205   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
206   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
207   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
208   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
209   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
210   using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestLoad(TExplicitAtomicType dataType,bool useSVM)211   CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
212   {
213     OldValueCheck(false);
214   }
NumResults(cl_uint threadCount,cl_device_id deviceID)215   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
216   {
217     return threadCount;
218   }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)219   virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
220   {
221     if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
222       MemoryOrder() == MEMORY_ORDER_ACQ_REL)
223       return 0; //skip test - not applicable
224 
225     if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
226         return 0; // skip test - not applicable
227 
228     return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
229   }
ProgramCore()230   virtual std::string ProgramCore()
231   {
232       // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
233       // should be MEMORY_ORDER_RELEASE
234       std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
235       std::string memoryOrderScopeStore =
236           (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
237           ? (", memory_order_release" + MemoryScopeStr())
238           : memoryOrderScopeLoad;
239       std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
240       return "  atomic_store" + postfix + "(&destMemory[tid], tid"
241           + memoryOrderScopeStore
242           + ");\n"
243             "  oldValues[tid] = atomic_load"
244           + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
245   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)246   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
247   {
248     host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST);
249     oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[tid], MemoryOrder());
250   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)251   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
252   {
253     expected = (HostDataType)whichDestValue;
254     return true;
255   }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)256   virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
257   {
258     correct = true;
259     for(cl_uint i = 0; i < threadCount; i++ )
260     {
261       if(refValues[i] != (HostDataType)i)
262       {
263         log_error("Invalid value for thread %u\n", (cl_uint)i);
264         correct = false;
265         return true;
266       }
267     }
268     return true;
269   }
270 };
271 
test_atomic_load_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)272 int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
273 {
274   int error = 0;
275   CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
276   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
277   CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
278   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
279   CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
280   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
281   CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
282   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
283   CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
284   EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
285   CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
286   EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
287   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
288   {
289     CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
290     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
291     CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
292     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
293     CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
294     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
295     CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
296     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
297   }
298   else
299   {
300     CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
301     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
302     CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
303     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
304     CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
305     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
306     CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
307     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
308   }
309   return error;
310 }
311 
test_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)312 int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
313 {
314   return test_atomic_load_generic(deviceID, context, queue, num_elements, false);
315 }
316 
test_svm_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)317 int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
318 {
319   return test_atomic_load_generic(deviceID, context, queue, num_elements, true);
320 }
321 
322 template<typename HostAtomicType, typename HostDataType>
323 class CBasicTestExchange : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
324 {
325 public:
326   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
327   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
328   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
329   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
330   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
331   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestExchange(TExplicitAtomicType dataType,bool useSVM)332   CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
333   {
334     StartValue(123456);
335   }
ProgramCore()336   virtual std::string ProgramCore()
337   {
338     std::string memoryOrderScope = MemoryOrderScopeStr();
339     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
340     return
341       "  oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n"
342       "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
343       "    oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n";
344   }
345 
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)346   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
347   {
348     oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder());
349     for(int i = 0; i < Iterations(); i++)
350       oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder());
351   }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)352   virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
353   {
354     OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only
355     correct = true;
356     /* We are expecting values from 0 to size-1 and initial value from atomic variable */
357     /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
358     /* Any repeated value is treated as an error */
359     std::vector<bool> tidFound(threadCount);
360     bool startValueFound = false;
361     cl_uint i;
362 
363     for(i = 0; i <= threadCount; i++)
364     {
365       cl_uint value;
366       if(i == threadCount)
367         value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
368       else
369         value = (cl_uint)refValues[i];
370       if(value == (cl_uint)StartValue())
371       {
372         // Special initial value
373         if(startValueFound)
374         {
375           log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
376           correct = false;
377           return true;
378         }
379         startValueFound = true;
380         continue;
381       }
382       if(value >= threadCount)
383       {
384         log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
385         correct = false;
386         return true;
387       }
388       if(tidFound[value])
389       {
390         log_error("ERROR: Value (%u) occurred more thane once\n", value);
391         correct = false;
392         return true;
393       }
394       tidFound[value] = true;
395     }
396     return true;
397   }
398 };
399 
test_atomic_exchange_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)400 int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
401 {
402   int error = 0;
403   CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
404   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
405   CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
406   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
407   CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
408   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
409   CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
410   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
411   CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
412   EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
413   CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
414   EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
415   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
416   {
417     CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
418     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
419     CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
420     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
421     CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
422     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
423     CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
424     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
425   }
426   else
427   {
428     CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
429     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
430     CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
431     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
432     CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
433     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
434     CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
435     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
436   }
437   return error;
438 }
439 
test_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)440 int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
441 {
442   return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false);
443 }
444 
test_svm_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)445 int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
446 {
447   return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true);
448 }
449 
450 template<typename HostAtomicType, typename HostDataType>
451 class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>
452 {
453 public:
454   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
455   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
456   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
457   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
458   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
459   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
460   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
461   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
462   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
463   using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestCompareStrong(TExplicitAtomicType dataType,bool useSVM)464   CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
465   {
466     StartValue(123456);
467     OldValueCheck(false);
468   }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)469   virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
470   {
471     if(MemoryOrder2() == MEMORY_ORDER_RELEASE ||
472       MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
473       return 0; // not allowed as 'failure' argument
474     if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
475       (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
476       return 0; // failure argument shall be no stronger than the success
477 
478     if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
479         return 0; // skip test - not applicable
480 
481     if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF)
482         return 0; // skip test - not applicable
483 
484     return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
485   }
ProgramCore()486   virtual std::string ProgramCore()
487   {
488     std::string memoryOrderScope = MemoryOrderScope();
489     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
490     return
491       std::string("  ")+DataType().RegularTypeName()+" expected, previous;\n"
492       "  int successCount = 0;\n"
493       "  oldValues[tid] = tid;\n"
494       "  expected = tid;  // force failure at the beginning\n"
495       "  if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
496       "    oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
497       "  else\n"
498       "  {\n"
499       "    for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
500       "    {\n"
501       "      previous = expected;\n"
502       "      if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
503       "      {\n"
504       "        oldValues[tid] = expected;\n"
505       "        successCount++;\n"
506       "      }\n"
507       "      else\n"
508       "      {\n"
509       "        if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n"
510       "        {\n"
511       "          oldValues[tid] = threadCount; //mark fail with invalid value\n"
512       "          break;\n"
513       "        }\n"
514       "      }\n"
515       "    }\n"
516       "  }\n";
517   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)518   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
519   {
520     HostDataType expected = (HostDataType)StartValue(), previous;
521     oldValues[tid] = (HostDataType)tid;
522     for(int i = 0; i < Iterations(); i++)
523     {
524       previous = expected;
525       if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2()))
526         oldValues[tid] = expected;
527       else
528       {
529         if(previous == expected) // shouldn't occur for 'strong'
530         {
531           oldValues[tid] = threadCount; //mark fail with invalid value
532         }
533       }
534     }
535   }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)536   virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
537   {
538     correct = true;
539     /* We are expecting values from 0 to size-1 and initial value from atomic variable */
540     /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
541     /* Any repeated value is treated as an error */
542     std::vector<bool> tidFound(threadCount);
543     bool startValueFound = false;
544     cl_uint i;
545 
546     for(i = 0; i <= threadCount; i++)
547     {
548       cl_uint value;
549       if(i == threadCount)
550         value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
551       else
552         value = (cl_uint)refValues[i];
553       if(value == (cl_uint)StartValue())
554       {
555         // Special initial value
556         if(startValueFound)
557         {
558           log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
559           correct = false;
560           return true;
561         }
562         startValueFound = true;
563         continue;
564       }
565       if(value >= threadCount)
566       {
567         if(value == threadCount)
568           log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n");
569         log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
570         correct = false;
571         return true;
572       }
573       if(tidFound[value])
574       {
575         log_error("ERROR: Value (%u) occurred more thane once\n", value);
576         correct = false;
577         return true;
578       }
579       tidFound[value] = true;
580     }
581     return true;
582   }
583 };
584 
test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)585 int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
586 {
587   int error = 0;
588   CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
589   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
590   CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
591   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
592   CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
593   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
594   CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
595   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
596   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
597   {
598     CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
599     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
600     CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
601     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
602     CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
603     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
604     CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
605     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
606   }
607   else
608   {
609     CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
610     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
611     CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
612     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
613     CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
614     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
615     CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
616     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
617   }
618   return error;
619 }
620 
test_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)621 int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
622 {
623   return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false);
624 }
625 
test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)626 int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
627 {
628   return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true);
629 }
630 
631 template<typename HostAtomicType, typename HostDataType>
632 class CBasicTestCompareWeak : public CBasicTestCompareStrong<HostAtomicType, HostDataType>
633 {
634 public:
635   using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
636   using CBasicTestCompareStrong<HostAtomicType, HostDataType>::MemoryOrderScope;
637   using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
638   using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
639   using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestCompareWeak(TExplicitAtomicType dataType,bool useSVM)640   CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType, useSVM)
641   {
642   }
ProgramCore()643   virtual std::string ProgramCore()
644   {
645     std::string memoryOrderScope = MemoryOrderScope();
646     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
647     return
648       std::string("  ")+DataType().RegularTypeName()+" expected , previous;\n"
649       "  int successCount = 0;\n"
650       "  oldValues[tid] = tid;\n"
651       "  expected = tid;  // force failure at the beginning\n"
652       "  if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
653       "    oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
654       "  else\n"
655       "  {\n"
656       "    for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
657       "    {\n"
658       "      previous = expected;\n"
659       "      if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
660       "      {\n"
661       "        oldValues[tid] = expected;\n"
662       "        successCount++;\n"
663       "      }\n"
664       "    }\n"
665       "  }\n";
666   }
667 };
668 
test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)669 int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
670 {
671   int error = 0;
672   CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
673   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
674   CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
675   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
676   CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
677   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
678   CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
679   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
680   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
681   {
682     CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
683     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
684     CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
685     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
686     CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
687     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
688     CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
689     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
690   }
691   else
692   {
693     CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
694     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
695     CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
696     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
697     CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
698     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
699     CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
700     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
701   }
702   return error;
703 }
704 
test_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)705 int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
706 {
707   return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false);
708 }
709 
test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)710 int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
711 {
712   return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true);
713 }
714 
715 template<typename HostAtomicType, typename HostDataType>
716 class CBasicTestFetchAdd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
717 {
718 public:
719   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
720   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
721   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
722   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchAdd(TExplicitAtomicType dataType,bool useSVM)723   CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
724   {
725   }
ProgramCore()726   virtual std::string ProgramCore()
727   {
728     std::string memoryOrderScope = MemoryOrderScopeStr();
729     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
730     return
731       "  oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"+
732       "  atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
733       "  atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
734       "  atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n";
735   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)736   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
737   {
738     oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
739     host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
740     host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
741     host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder());
742   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)743   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
744   {
745     expected = StartValue();
746     for(cl_uint i = 0; i < threadCount; i++)
747       expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
748     return true;
749   }
750 };
751 
test_atomic_fetch_add_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)752 int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
753 {
754   int error = 0;
755   CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
756   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
757   CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
758   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
759   CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
760   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
761   CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
762   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
763   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
764   {
765     CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
766     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
767     CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
768     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
769     CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
770     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
771     CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
772     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
773   }
774   else
775   {
776     CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
777     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
778     CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
779     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
780     CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
781     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
782     CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
783     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
784   }
785   return error;
786 }
787 
test_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)788 int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
789 {
790   return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false);
791 }
792 
test_svm_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)793 int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
794 {
795   return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true);
796 }
797 
798 template<typename HostAtomicType, typename HostDataType>
799 class CBasicTestFetchSub : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
800 {
801 public:
802   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
803   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
804   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
805   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchSub(TExplicitAtomicType dataType,bool useSVM)806   CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
807   {
808   }
ProgramCore()809   virtual std::string ProgramCore()
810   {
811     std::string memoryOrderScope = MemoryOrderScopeStr();
812     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
813     return
814       "  oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n";
815   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)816   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
817   {
818     oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder());
819   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)820   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
821   {
822     expected = StartValue();
823     for(cl_uint i = 0; i < threadCount; i++)
824       expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
825     return true;
826   }
827 };
828 
test_atomic_fetch_sub_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)829 int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
830 {
831   int error = 0;
832   CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
833   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
834   CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
835   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
836   CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
837   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
838   CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
839   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
840   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
841   {
842     CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
843     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
844     CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
845     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
846     CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
847     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
848     CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
849     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
850   }
851   else
852   {
853     CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
854     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
855     CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
856     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
857     CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
858     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
859     CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
860     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
861   }
862   return error;
863 }
864 
test_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)865 int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
866 {
867   return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false);
868 }
869 
test_svm_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)870 int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
871 {
872   return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true);
873 }
874 
875 template<typename HostAtomicType, typename HostDataType>
876 class CBasicTestFetchOr : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
877 {
878 public:
879   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
880   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
881   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
882   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchOr(TExplicitAtomicType dataType,bool useSVM)883   CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
884   {
885     StartValue(0);
886   }
NumResults(cl_uint threadCount,cl_device_id deviceID)887   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
888   {
889     cl_uint numBits = DataType().Size(deviceID) * 8;
890 
891     return (threadCount + numBits - 1) / numBits;
892   }
ProgramCore()893   virtual std::string ProgramCore()
894   {
895     std::string memoryOrderScope = MemoryOrderScopeStr();
896     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
897     return
898       std::string("    size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
899       "    int whichResult = tid / numBits;\n"
900       "    int bitIndex = tid - (whichResult * numBits);\n"
901       "\n"
902       "    oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
903   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)904   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
905   {
906     size_t numBits = sizeof(HostDataType) * 8;
907     size_t whichResult = tid / numBits;
908     size_t bitIndex = tid - (whichResult * numBits);
909 
910     oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder());
911   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)912   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
913   {
914     cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
915     if(whichDestValue < numValues - 1)
916     {
917       expected = ~(HostDataType)0;
918       return true;
919     }
920     // Last item doesn't get or'ed on every bit, so we have to mask away
921     cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
922     expected = StartValue();
923     for(cl_uint i = 0; i < numBits; i++)
924       expected |= ((HostDataType)1 << i);
925     return true;
926   }
927 };
928 
test_atomic_fetch_or_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)929 int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
930 {
931   int error = 0;
932   CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
933   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
934   CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
935   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
936   CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
937   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
938   CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
939   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
940   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
941   {
942     CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
943     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
944     CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
945     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
946     CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
947     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
948     CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
949     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
950   }
951   else
952   {
953     CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
954     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
955     CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
956     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
957     CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
958     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
959     CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
960     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
961   }
962   return error;
963 }
964 
test_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)965 int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
966 {
967   return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false);
968 }
969 
test_svm_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)970 int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
971 {
972   return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true);
973 }
974 
975 template<typename HostAtomicType, typename HostDataType>
976 class CBasicTestFetchXor : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
977 {
978 public:
979   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
980   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
981   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
982   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchXor(TExplicitAtomicType dataType,bool useSVM)983   CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
984   {
985     StartValue((HostDataType)0x2f08ab418ba0541LL);
986   }
ProgramCore()987   virtual std::string ProgramCore()
988   {
989     std::string memoryOrderScope = MemoryOrderScopeStr();
990     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
991     return
992       std::string("  int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
993       "  int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
994       "\n"
995       "  oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
996   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)997   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
998   {
999     int numBits = sizeof(HostDataType) * 8;
1000     int bitIndex = (numBits-1)*(tid+1)/threadCount;
1001 
1002     oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
1003   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1004   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1005   {
1006     int numBits = sizeof(HostDataType)*8;
1007     expected = StartValue();
1008     for(cl_uint i = 0; i < threadCount; i++)
1009     {
1010       int bitIndex = (numBits-1)*(i+1)/threadCount;
1011       expected ^= ((HostDataType)1 << bitIndex);
1012     }
1013     return true;
1014   }
1015 };
1016 
test_atomic_fetch_xor_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1017 int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1018 {
1019   int error = 0;
1020   CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1021   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1022   CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1023   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1024   CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1025   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1026   CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1027   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1028   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1029   {
1030     CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1031     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1032     CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1033     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1034     CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1035     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1036     CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1037     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1038   }
1039   else
1040   {
1041     CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1042     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1043     CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1044     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1045     CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1046     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1047     CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1048     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1049   }
1050   return error;
1051 }
1052 
test_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1053 int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1054 {
1055   return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false);
1056 }
1057 
test_svm_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1058 int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1059 {
1060   return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true);
1061 }
1062 
1063 template<typename HostAtomicType, typename HostDataType>
1064 class CBasicTestFetchAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1065 {
1066 public:
1067   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1068   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1069   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1070   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchAnd(TExplicitAtomicType dataType,bool useSVM)1071   CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1072   {
1073     StartValue(~(HostDataType)0);
1074   }
NumResults(cl_uint threadCount,cl_device_id deviceID)1075   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1076   {
1077     cl_uint numBits = DataType().Size(deviceID) * 8;
1078 
1079     return (threadCount + numBits - 1) / numBits;
1080   }
ProgramCore()1081   virtual std::string ProgramCore()
1082   {
1083     std::string memoryOrderScope = MemoryOrderScopeStr();
1084     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1085     return
1086       std::string("  size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
1087       "  int whichResult = tid / numBits;\n"
1088       "  int bitIndex = tid - (whichResult * numBits);\n"
1089       "\n"
1090       "  oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
1091   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1092   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1093   {
1094     size_t numBits = sizeof(HostDataType) * 8;
1095     size_t whichResult = tid / numBits;
1096     size_t bitIndex = tid - (whichResult * numBits);
1097 
1098     oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder());
1099   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1100   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1101   {
1102     cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
1103     if(whichDestValue < numValues - 1)
1104     {
1105       expected = 0;
1106       return true;
1107     }
1108     // Last item doesn't get and'ed on every bit, so we have to mask away
1109     size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
1110     expected = StartValue();
1111     for(size_t i = 0; i < numBits; i++)
1112       expected &= ~((HostDataType)1 << i);
1113     return true;
1114   }
1115 };
1116 
test_atomic_fetch_and_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1117 int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1118 {
1119   int error = 0;
1120   CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1121   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1122   CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1123   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1124   CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1125   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1126   CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1127   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1128   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1129   {
1130     CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1131     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1132     CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1133     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1134     CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1135     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1136     CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1137     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1138   }
1139   else
1140   {
1141     CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1142     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1143     CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1144     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1145     CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1146     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1147     CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1148     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1149   }
1150   return error;
1151 }
1152 
test_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1153 int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1154 {
1155   return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false);
1156 }
1157 
test_svm_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1158 int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1159 {
1160   return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true);
1161 }
1162 
1163 template<typename HostAtomicType, typename HostDataType>
1164 class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1165 {
1166 public:
1167   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1168   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1169   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1170   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
1171   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1172   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchOrAnd(TExplicitAtomicType dataType,bool useSVM)1173   CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1174   {
1175     StartValue(0);
1176   }
NumResults(cl_uint threadCount,cl_device_id deviceID)1177   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1178   {
1179     return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
1180   }
1181   // each thread modifies (with OR and AND operations) and verifies
1182   // only one bit in atomic variable
1183   // other bits are modified by other threads but it must not affect current thread operation
ProgramCore()1184   virtual std::string ProgramCore()
1185   {
1186     std::string memoryOrderScope = MemoryOrderScopeStr();
1187     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1188     return
1189       std::string("  int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
1190       "  size_t valueInd = tid/bits;\n"
1191       "  "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
1192       "  oldValues[tid] = 0;\n"
1193       "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
1194       "  {\n"
1195       "    value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
1196       "    if(value & bitMask) // bit should be set to 0\n"
1197       "      oldValues[tid]++;\n"
1198       "    value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n"
1199       "    if(!(value & bitMask)) // bit should be set to 1\n"
1200       "      oldValues[tid]++;\n"
1201       "  }\n";
1202   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1203   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1204   {
1205     int bits = sizeof(HostDataType)*8;
1206     size_t valueInd = tid/bits;
1207     HostDataType value, bitMask = (HostDataType)1 << tid%bits;
1208     oldValues[tid] = 0;
1209     for(int i = 0; i < Iterations(); i++)
1210     {
1211       value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder());
1212       if(value & bitMask) // bit should be set to 0
1213         oldValues[tid]++;
1214       value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder());
1215       if(!(value & bitMask)) // bit should be set to 1
1216         oldValues[tid]++;
1217     }
1218   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1219   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1220   {
1221     expected = 0;
1222     return true;
1223   }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1224   virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
1225   {
1226     correct = true;
1227     for(cl_uint i = 0; i < threadCount; i++)
1228     {
1229       if(refValues[i] > 0)
1230       {
1231         log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]);
1232         correct = false;
1233       }
1234     }
1235     return true;
1236   }
1237 };
1238 
test_atomic_fetch_orand_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1239 int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1240 {
1241   int error = 0;
1242   CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1243   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1244   CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1245   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1246   CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1247   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1248   CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1249   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1250   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1251   {
1252     CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1253     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1254     CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1255     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1256     CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1257     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1258     CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1259     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1260   }
1261   else
1262   {
1263     CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1264     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1265     CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1266     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1267     CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1268     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1269     CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1270     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1271   }
1272   return error;
1273 }
1274 
test_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1275 int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1276 {
1277   return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false);
1278 }
1279 
test_svm_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1280 int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1281 {
1282   return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true);
1283 }
1284 
1285 template<typename HostAtomicType, typename HostDataType>
1286 class CBasicTestFetchXor2 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1287 {
1288 public:
1289   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1290   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1291   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1292   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
1293   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1294   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchXor2(TExplicitAtomicType dataType,bool useSVM)1295   CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1296   {
1297     StartValue(0);
1298   }
NumResults(cl_uint threadCount,cl_device_id deviceID)1299   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1300   {
1301     return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
1302   }
1303   // each thread modifies (with XOR operation) and verifies
1304   // only one bit in atomic variable
1305   // other bits are modified by other threads but it must not affect current thread operation
ProgramCore()1306   virtual std::string ProgramCore()
1307   {
1308     std::string memoryOrderScope = MemoryOrderScopeStr();
1309     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1310     return
1311       std::string("  int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
1312       "  size_t valueInd = tid/bits;\n"
1313       "  "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
1314       "  oldValues[tid] = 0;\n"
1315       "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
1316       "  {\n"
1317       "    value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
1318       "    if(value & bitMask) // bit should be set to 0\n"
1319       "      oldValues[tid]++;\n"
1320       "    value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
1321       "    if(!(value & bitMask)) // bit should be set to 1\n"
1322       "      oldValues[tid]++;\n"
1323       "  }\n";
1324   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1325   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1326   {
1327     int bits = sizeof(HostDataType)*8;
1328     size_t valueInd = tid/bits;
1329     HostDataType value, bitMask = (HostDataType)1 << tid%bits;
1330     oldValues[tid] = 0;
1331     for(int i = 0; i < Iterations(); i++)
1332     {
1333       value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
1334       if(value & bitMask) // bit should be set to 0
1335         oldValues[tid]++;
1336       value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
1337       if(!(value & bitMask)) // bit should be set to 1
1338         oldValues[tid]++;
1339     }
1340   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1341   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1342   {
1343     expected = 0;
1344     return true;
1345   }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1346   virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
1347   {
1348     correct = true;
1349     for(cl_uint i = 0; i < threadCount; i++)
1350     {
1351       if(refValues[i] > 0)
1352       {
1353         log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]);
1354         correct = false;
1355       }
1356     }
1357     return true;
1358   }
1359 };
1360 
test_atomic_fetch_xor2_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1361 int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1362 {
1363   int error = 0;
1364   CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1365   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1366   CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1367   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1368   CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1369   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1370   CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1371   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1372   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1373   {
1374     CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1375     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1376     CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1377     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1378     CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1379     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1380     CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1381     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1382   }
1383   else
1384   {
1385     CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1386     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1387     CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1388     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1389     CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1390     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1391     CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1392     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1393   }
1394   return error;
1395 }
1396 
test_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1397 int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1398 {
1399   return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false);
1400 }
1401 
test_svm_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1402 int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1403 {
1404   return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true);
1405 }
1406 
1407 template<typename HostAtomicType, typename HostDataType>
1408 class CBasicTestFetchMin : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1409 {
1410 public:
1411   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1412   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1413   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1414   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMin(TExplicitAtomicType dataType,bool useSVM)1415   CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1416   {
1417     StartValue(DataType().MaxValue());
1418   }
ProgramCore()1419   virtual std::string ProgramCore()
1420   {
1421     std::string memoryOrderScope = MemoryOrderScopeStr();
1422     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1423     return
1424       "  oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
1425   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1426   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1427   {
1428     oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder());
1429   }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)1430   virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
1431   {
1432     for(cl_uint i = 0; i < threadCount; i++)
1433     {
1434       startRefValues[i] = genrand_int32(d);
1435       if(sizeof(HostDataType) >= 8)
1436         startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
1437     }
1438     return true;
1439   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1440   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1441   {
1442     expected = StartValue();
1443     for(cl_uint i = 0; i < threadCount; i++)
1444     {
1445       if(startRefValues[ i ] < expected)
1446         expected = startRefValues[ i ];
1447     }
1448     return true;
1449   }
1450 };
1451 
test_atomic_fetch_min_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1452 int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1453 {
1454   int error = 0;
1455   CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1456   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1457   CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1458   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1459   CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1460   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1461   CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1462   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1463   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1464   {
1465     CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1466     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1467     CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1468     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1469     CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1470     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1471     CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1472     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1473   }
1474   else
1475   {
1476     CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1477     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1478     CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1479     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1480     CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1481     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1482     CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1483     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1484   }
1485   return error;
1486 }
1487 
test_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1488 int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1489 {
1490   return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false);
1491 }
1492 
test_svm_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1493 int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1494 {
1495   return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true);
1496 }
1497 
1498 template<typename HostAtomicType, typename HostDataType>
1499 class CBasicTestFetchMax : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1500 {
1501 public:
1502   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1503   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1504   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1505   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMax(TExplicitAtomicType dataType,bool useSVM)1506   CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1507   {
1508     StartValue(DataType().MinValue());
1509   }
ProgramCore()1510   virtual std::string ProgramCore()
1511   {
1512     std::string memoryOrderScope = MemoryOrderScopeStr();
1513     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1514     return
1515       "  oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
1516   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1517   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1518   {
1519     oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder());
1520   }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)1521   virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
1522   {
1523     for(cl_uint i = 0; i < threadCount; i++)
1524     {
1525       startRefValues[i] = genrand_int32(d);
1526       if(sizeof(HostDataType) >= 8)
1527         startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
1528     }
1529     return true;
1530   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1531   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1532   {
1533     expected = StartValue();
1534     for(cl_uint i = 0; i < threadCount; i++)
1535     {
1536       if(startRefValues[ i ] > expected)
1537         expected = startRefValues[ i ];
1538     }
1539     return true;
1540   }
1541 };
1542 
test_atomic_fetch_max_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1543 int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1544 {
1545   int error = 0;
1546   CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1547   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1548   CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1549   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1550   CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1551   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1552   CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1553   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1554   if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1555   {
1556     CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1557     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1558     CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1559     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1560     CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1561     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1562     CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1563     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1564   }
1565   else
1566   {
1567     CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1568     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1569     CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1570     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1571     CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1572     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1573     CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1574     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1575   }
1576   return error;
1577 }
1578 
test_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1579 int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1580 {
1581   return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false);
1582 }
1583 
test_svm_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1584 int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1585 {
1586   return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true);
1587 }
1588 
1589 template<typename HostAtomicType, typename HostDataType>
1590 class CBasicTestFlag : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1591 {
1592   static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
1593 public:
1594   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1595   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
1596   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1597   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
1598   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
1599   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
1600   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFlag(TExplicitAtomicType dataType,bool useSVM)1601   CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1602   {
1603     StartValue(0);
1604     OldValueCheck(false);
1605   }
NumResults(cl_uint threadCount,cl_device_id deviceID)1606   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1607   {
1608     return threadCount;
1609   }
MemoryOrderForClear()1610   TExplicitMemoryOrderType MemoryOrderForClear()
1611   {
1612     // Memory ordering for atomic_flag_clear function
1613     // ("shall not be memory_order_acquire nor memory_order_acq_rel")
1614     if(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
1615       return MEMORY_ORDER_RELAXED;
1616     if (MemoryOrder() == MEMORY_ORDER_ACQ_REL)
1617       return MEMORY_ORDER_RELEASE;
1618     return MemoryOrder();
1619   }
MemoryOrderScopeStrForClear()1620   std::string MemoryOrderScopeStrForClear()
1621   {
1622     std::string orderStr;
1623     if (MemoryOrder() != MEMORY_ORDER_EMPTY)
1624       orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
1625     return orderStr + MemoryScopeStr();
1626   }
1627 
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)1628   virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
1629                                 cl_command_queue queue)
1630   {
1631       // This test assumes support for the memory_scope_device scope in the case
1632       // that LocalMemory() == false. Therefore we should skip this test in that
1633       // configuration on a 3.0 driver since supporting the memory_scope_device
1634       // scope is optionaly.
1635       if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
1636       {
1637           if (!LocalMemory()
1638               && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
1639           {
1640               log_info(
1641                   "Skipping atomic_flag test due to use of atomic_scope_device "
1642                   "which is optionally not supported on this device\n");
1643               return 0; // skip test - not applicable
1644           }
1645       }
1646       return CBasicTestMemOrderScope<HostAtomicType,
1647                                      HostDataType>::ExecuteSingleTest(deviceID,
1648                                                                       context,
1649                                                                       queue);
1650   }
ProgramCore()1651   virtual std::string ProgramCore()
1652   {
1653     std::string memoryOrderScope = MemoryOrderScopeStr();
1654     std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1655     std::string program =
1656       "  uint cnt, stop = 0;\n"
1657       "  for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
1658       "  {\n"
1659       "    bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
1660     if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE)
1661       program += "    atomic_work_item_fence(" +
1662                  std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
1663                  "memory_order_acquire," +
1664                  std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
1665                  ");\n";
1666 
1667     program +=
1668       "    if (!set)\n"
1669       "    {\n";
1670 
1671     if (LocalMemory())
1672       program += "      uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
1673     else
1674       program += "      uint csIndex = cnt;\n";
1675 
1676     std::ostringstream csNotVisited;
1677     csNotVisited << CRITICAL_SECTION_NOT_VISITED;
1678     program +=
1679       "      // verify that thread is the first visitor\n"
1680       "      if(oldValues[csIndex] == "+csNotVisited.str()+")\n"
1681       "      {\n"
1682       "        oldValues[csIndex] = tid; // set the winner id for this critical section\n"
1683       "        stop = 1;\n"
1684       "      }\n";
1685 
1686     if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED)
1687       program += "      atomic_work_item_fence(" +
1688                  std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
1689                  "memory_order_release," +
1690                  std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
1691                  ");\n";
1692 
1693     program +=
1694       "      atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n"
1695       "    }\n"
1696       "  }\n";
1697     return program;
1698   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1699   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1700   {
1701     cl_uint cnt, stop = 0;
1702     for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
1703     {
1704       if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
1705       {
1706         cl_uint csIndex = cnt;
1707         // verify that thread is the first visitor\n"
1708         if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
1709         {
1710           oldValues[csIndex] = tid; // set the winner id for this critical section\n"
1711           stop = 1;
1712         }
1713         host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
1714       }
1715     }
1716   }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1717   virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1718   {
1719     expected = StartValue();
1720     return true;
1721   }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)1722   virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
1723   {
1724     for(cl_uint i = 0 ; i < threadCount; i++)
1725       startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
1726     return true;
1727   }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1728   virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
1729   {
1730     correct = true;
1731     /* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */
1732     /* These values must be distributed across refValues array */
1733     std::vector<bool> tidFound(threadCount);
1734     cl_uint i;
1735 
1736     for (i = 0; i < threadCount; i++)
1737     {
1738       cl_uint value = (cl_uint)refValues[i];
1739       if (value == CRITICAL_SECTION_NOT_VISITED)
1740       {
1741         // Special initial value
1742         log_error("ERROR: Critical section %u not visited\n", i);
1743         correct = false;
1744         return true;
1745       }
1746       if (value >= threadCount)
1747       {
1748         log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
1749         correct = false;
1750         return true;
1751       }
1752       if (tidFound[value])
1753       {
1754         log_error("ERROR: Value (%u) occurred more thane once\n", value);
1755         correct = false;
1756         return true;
1757       }
1758       tidFound[value] = true;
1759     }
1760     return true;
1761   }
1762 };
1763 
test_atomic_flag_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1764 int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1765 {
1766   int error = 0;
1767   CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG, useSVM);
1768   EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements));
1769   return error;
1770 }
1771 
test_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1772 int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1773 {
1774   return test_atomic_flag_generic(deviceID, context, queue, num_elements, false);
1775 }
1776 
test_svm_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1777 int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1778 {
1779   return test_atomic_flag_generic(deviceID, context, queue, num_elements, true);
1780 }
1781 
1782 template<typename HostAtomicType, typename HostDataType>
1783 class CBasicTestFence : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1784 {
1785   struct TestDefinition {
1786     bool op1IsFence;
1787     TExplicitMemoryOrderType op1MemOrder;
1788     bool op2IsFence;
1789     TExplicitMemoryOrderType op2MemOrder;
1790   };
1791 public:
1792   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1793   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
1794   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1795   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
1796   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
1797   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DeclaredInProgram;
1798   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
1799   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1800   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::CurrentGroupSize;
1801   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
1802   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
1803   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
CBasicTestFence(TExplicitAtomicType dataType,bool useSVM)1804   CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1805   {
1806     StartValue(0);
1807     OldValueCheck(false);
1808   }
NumResults(cl_uint threadCount,cl_device_id deviceID)1809   virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1810   {
1811     return threadCount;
1812   }
NumNonAtomicVariablesPerThread()1813   virtual cl_uint NumNonAtomicVariablesPerThread()
1814   {
1815     if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
1816       return 1;
1817     if (LocalMemory())
1818     {
1819       if (gIsEmbedded)
1820       {
1821         if (CurrentGroupSize() > 1024)
1822           CurrentGroupSize(1024);
1823         return 1; //1KB of local memory required by spec. Clamp group size to 1k and allow 1 variable per thread
1824       }
1825       else
1826         return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec
1827     }
1828     return 256;
1829   }
SingleTestName()1830   virtual std::string SingleTestName()
1831   {
1832     std::string testName;
1833     if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
1834       testName += "seq_cst fence, ";
1835     else
1836       testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order"))
1837         + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with "
1838         + std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order"))
1839         + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
1840     testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
1841     testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
1842     return testName;
1843   }
SVMDataBufferAllSVMConsistent()1844   virtual bool SVMDataBufferAllSVMConsistent()
1845   {
1846       // Although memory_scope_all_devices doesn't mention SVM it is just an
1847       // alias for memory_scope_all_svm_devices.  So both scopes interact with
1848       // SVM allocations, on devices that support those, just the same.
1849       return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
1850           || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
1851   }
ExecuteForEachParameterSet(cl_device_id deviceID,cl_context context,cl_command_queue queue)1852   virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
1853   {
1854     int error = 0;
1855     // execute 3 (maximum) sub cases for each memory order
1856     for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
1857     {
1858       EXECUTE_TEST(error, (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
1859     }
1860     return error;
1861   }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)1862   virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
1863   {
1864     if(DeclaredInProgram() || UsedInFunction())
1865       return 0; //skip test - not applicable - no overloaded fence functions for different address spaces
1866     if(MemoryOrder() == MEMORY_ORDER_EMPTY ||
1867       MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15
1868       return 0; //skip test - not applicable
1869     if((UseSVM() || gHost)
1870       && LocalMemory())
1871       return 0; // skip test - not applicable for SVM and local memory
1872     struct TestDefinition acqTests[] = {
1873       // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
1874       { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
1875       { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
1876       { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
1877     };
1878     struct TestDefinition relTests[] = {
1879       { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
1880       { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
1881     };
1882     struct TestDefinition arTests[] = {
1883       { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
1884       { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
1885       { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
1886     };
1887     switch (MemoryOrder())
1888     {
1889     case MEMORY_ORDER_ACQUIRE:
1890       if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition))
1891         return 0;
1892       _subCase = acqTests[_subCaseId];
1893       break;
1894     case MEMORY_ORDER_RELEASE:
1895       if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition))
1896         return 0;
1897       _subCase = relTests[_subCaseId];
1898       break;
1899     case MEMORY_ORDER_ACQ_REL:
1900       if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition))
1901         return 0;
1902       _subCase = arTests[_subCaseId];
1903       break;
1904     case MEMORY_ORDER_SEQ_CST:
1905       if (_subCaseId != 0) // one special case only
1906         return 0;
1907       break;
1908     default:
1909       return 0;
1910     }
1911     LocalRefValues(LocalMemory());
1912     return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
1913   }
ProgramHeader(cl_uint maxNumDestItems)1914   virtual std::string ProgramHeader(cl_uint maxNumDestItems)
1915   {
1916     std::string header;
1917     if(gOldAPI)
1918     {
1919       if(MemoryScope() == MEMORY_SCOPE_EMPTY)
1920       {
1921         header += "#define atomic_work_item_fence(x,y)                        mem_fence(x)\n";
1922       }
1923       else
1924       {
1925         header += "#define atomic_work_item_fence(x,y,z)                      mem_fence(x)\n";
1926       }
1927     }
1928     return header+CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
1929   }
ProgramCore()1930   virtual std::string ProgramCore()
1931   {
1932     std::ostringstream naValues;
1933     naValues << NumNonAtomicVariablesPerThread();
1934     std::string program, fenceType, nonAtomic;
1935     if (LocalMemory())
1936     {
1937       program = "  size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n";
1938       fenceType = "CLK_LOCAL_MEM_FENCE";
1939       nonAtomic = "localValues";
1940     }
1941     else
1942     {
1943       program = "  size_t myId = tid, hisId = threadCount-1-tid;\n";
1944       fenceType = "CLK_GLOBAL_MEM_FENCE";
1945       nonAtomic = "oldValues";
1946     }
1947     if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
1948     {
1949       // All threads are divided into pairs.
1950       // Each thread has its own atomic variable and performs the following actions:
1951       // - increments its own variable
1952       // - performs fence operation to propagate its value and to see value from other thread
1953       // - reads value from other thread's variable
1954       // - repeats the above steps when both values are the same (and less than 1000000)
1955       // - stores the last value read from other thread (in additional variable)
1956       // At the end of execution at least one thread should know the last value from other thread
1957       program += std::string("") +
1958         "  " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n"
1959         "  do {\n"
1960         "    myValue++;\n"
1961         "    atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"
1962         "    atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n"
1963         "    hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
1964         "  } while(myValue == hisValue && myValue < 1000000);\n"
1965         "  " + nonAtomic + "[myId] = hisValue; \n";
1966     }
1967     else
1968     {
1969       // Each thread modifies one of its non-atomic variables, increments value of its atomic variable
1970       // and reads values from another thread in typical synchronizes-with scenario with:
1971       // - non-atomic variable (at index A) modification (value change from 0 to A)
1972       // - release operation (additional fence or within atomic) + atomic variable modification (value A)
1973       // - atomic variable read (value B) + acquire operation (additional fence or within atomic)
1974       // - non-atomic variable (at index B) read (value C)
1975       // Each thread verifies dependency between atomic and non-atomic value read from another thread
1976       // The following condition must be true: B == C
1977       program += std::string("") +
1978         "  " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n"
1979         "  do {\n"
1980         "    myValue++;\n"
1981         "    " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n";
1982       if (_subCase.op1IsFence)
1983         program += std::string("") +
1984         "    atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n"
1985         "    atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n";
1986       else
1987         program += std::string("") +
1988         "    atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n";
1989       if (_subCase.op2IsFence)
1990         program += std::string("") +
1991         "    hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
1992         "    atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n";
1993       else
1994         program += std::string("") +
1995         "    hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n";
1996       program +=
1997         "    hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n";
1998       if (LocalMemory())
1999         program += "    hisId = (hisId+1)%get_local_size(0);\n";
2000       else
2001         program += "    hisId = (hisId+1)%threadCount;\n";
2002       program +=
2003         "  } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n"
2004         "  if(hisAtomicValue != hisValue)\n"
2005         "  { // fail\n"
2006         "    atomic_store(&destMemory[myId], myValue-1);\n";
2007       if (LocalMemory())
2008         program += "    hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n";
2009       else
2010         program += "    hisId = (hisId+threadCount-1)%threadCount;\n";
2011       program +=
2012         "    if(myValue+1 < " + naValues.str() + ")\n"
2013         "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n"
2014         "    if(myValue+2 < " + naValues.str() + ")\n"
2015         "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n"
2016         "    if(myValue+3 < " + naValues.str() + ")\n"
2017         "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n";
2018       if (gDebug)
2019       {
2020         program +=
2021           "    printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n";
2022       }
2023       program +=
2024         "  }\n";
2025     }
2026     return program;
2027   }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2028   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
2029   {
2030     size_t myId = tid, hisId = threadCount - 1 - tid;
2031     if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2032     {
2033       HostDataType myValue = 0, hisValue;
2034       // CPU thread typically starts faster - wait for GPU thread
2035       myValue++;
2036       host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
2037       while (host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0);
2038       do {
2039         myValue++;
2040         host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2041         host_atomic_thread_fence(MemoryOrder());
2042         hisValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
2043       } while (myValue == hisValue && hisValue < 1000000);
2044       oldValues[tid] = hisValue;
2045     }
2046     else
2047     {
2048       HostDataType myValue = 0, hisAtomicValue, hisValue;
2049       do {
2050         myValue++;
2051         oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue;
2052         if (_subCase.op1IsFence)
2053         {
2054           host_atomic_thread_fence(_subCase.op1MemOrder);
2055           host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2056         }
2057         else
2058           host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, _subCase.op1MemOrder);
2059         if (_subCase.op2IsFence)
2060         {
2061           hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
2062           host_atomic_thread_fence(_subCase.op2MemOrder);
2063         }
2064         else
2065           hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], _subCase.op2MemOrder);
2066         hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue];
2067         hisId = (hisId + 1) % threadCount;
2068       } while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1);
2069       if(hisAtomicValue != hisValue)
2070       { // fail
2071         host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST);
2072         if (gDebug)
2073         {
2074           hisId = (hisId + threadCount - 1) % threadCount;
2075           printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue);
2076         }
2077       }
2078     }
2079   }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2080   virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
2081   {
2082     for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++)
2083       startRefValues[i] = 0;
2084     return true;
2085   }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)2086   virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
2087   {
2088     correct = true;
2089     cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
2090     for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize)
2091     {
2092       if(workOffset+workSize > threadCount)
2093         // last workgroup (host threads)
2094         workSize = threadCount-workOffset;
2095       for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++)
2096       {
2097         HostAtomicType myValue = finalValues[workOffset + i];
2098         if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2099         {
2100           HostDataType hisValue = refValues[workOffset + i];
2101           if (myValue == hisValue)
2102           {
2103             // a draw - both threads should reach final value 1000000
2104             if (myValue != 1000000)
2105             {
2106               log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue);
2107               correct = false;
2108               return true;
2109             }
2110           }
2111           else
2112           {
2113             //slower thread (in total order of seq_cst operations) must know last value written by faster thread
2114             HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i];
2115             HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i];
2116 
2117             // who is the winner? - thread with lower private counter value
2118             if (myValue == hisRealValue) // forbidden result - fence doesn't work
2119             {
2120               log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue);
2121               log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim);
2122               correct = false;
2123               return true;
2124             }
2125             if (myValue > hisRealValue) // I'm slower
2126             {
2127               if (hisRealValue != hisValue)
2128               {
2129                 log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue);
2130                 log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i);
2131                 correct = false;
2132                 return true;
2133               }
2134             }
2135             else // I'm faster
2136             {
2137               if (myValueReadByHim != myValue)
2138               {
2139                 log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue);
2140                 log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i);
2141                 correct = false;
2142                 return true;
2143               }
2144             }
2145           }
2146         }
2147         else
2148         {
2149           if (myValue != NumNonAtomicVariablesPerThread()-1)
2150           {
2151             log_error("ERROR: Invalid atomic value #%u (%d instead of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread()-1);
2152             log_error("ERROR: Thread #%u observed invalid values in other thread's variables\n", workOffset + i, myValue);
2153             correct = false;
2154             return true;
2155           }
2156         }
2157       }
2158     }
2159     return true;
2160   }
2161 private:
2162   int _subCaseId;
2163   struct TestDefinition _subCase;
2164 };
2165 
test_atomic_fence_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2166 int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
2167 {
2168   int error = 0;
2169   CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
2170   EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
2171   CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
2172   EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
2173   CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
2174   EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
2175   CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
2176   EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
2177   if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2178   {
2179     CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2180     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
2181     CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2182     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2183     CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
2184     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
2185     CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2186     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2187   }
2188   else
2189   {
2190     CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2191     EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
2192     CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2193     EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2194     CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
2195     EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
2196     CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2197     EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2198   }
2199   return error;
2200 }
2201 
test_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2202 int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
2203 {
2204   return test_atomic_fence_generic(deviceID, context, queue, num_elements, false);
2205 }
2206 
test_svm_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2207 int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
2208 {
2209   return test_atomic_fence_generic(deviceID, context, queue, num_elements, true);
2210 }
2211