1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/kernelHelpers.h"
18 #include "harness/typeWrappers.h"
19
20 #include "common.h"
21 #include "host_atomics.h"
22
23 #include <sstream>
24 #include <vector>
25
26 template<typename HostAtomicType, typename HostDataType>
27 class CBasicTestStore : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
28 {
29 public:
30 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
31 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
32 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
33 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
34 using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestStore(TExplicitAtomicType dataType,bool useSVM)35 CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
36 {
37 OldValueCheck(false);
38 }
NumResults(cl_uint threadCount,cl_device_id deviceID)39 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
40 {
41 return threadCount;
42 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)43 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
44 {
45 if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
46 MemoryOrder() == MEMORY_ORDER_ACQ_REL)
47 return 0; //skip test - not applicable
48
49 if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
50 return 0; // skip test - not applicable
51
52 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
53 }
ProgramCore()54 virtual std::string ProgramCore()
55 {
56 std::string memoryOrderScope = MemoryOrderScopeStr();
57 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
58 return
59 " atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n";
60 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)61 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
62 {
63 host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
64 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)65 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
66 {
67 expected = (HostDataType)whichDestValue;
68 return true;
69 }
70 };
71
test_atomic_store_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)72 int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
73 {
74 int error = 0;
75 CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
76 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
77 CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
78 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
79 CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
80 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
81 CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
82 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
83 CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
84 EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
85 CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
86 EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
87 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
88 {
89 CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
90 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
91 CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
92 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
93 CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
94 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
95 CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
96 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
97 }
98 else
99 {
100 CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
101 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
102 CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
103 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
104 CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
105 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
106 CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
107 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
108 }
109 return error;
110 }
111
test_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)112 int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
113 {
114 return test_atomic_store_generic(deviceID, context, queue, num_elements, false);
115 }
116
test_svm_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)117 int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
118 {
119 return test_atomic_store_generic(deviceID, context, queue, num_elements, true);
120 }
121
122 template<typename HostAtomicType, typename HostDataType>
123 class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType>
124 {
125 public:
126 using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
CBasicTestInit(TExplicitAtomicType dataType,bool useSVM)127 CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
128 {
129 OldValueCheck(false);
130 }
NumResults(cl_uint threadCount,cl_device_id deviceID)131 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
132 {
133 return threadCount;
134 }
ProgramCore()135 virtual std::string ProgramCore()
136 {
137 return
138 " atomic_init(&destMemory[tid], tid);\n";
139 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)140 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
141 {
142 host_atomic_init(&destMemory[tid], (HostDataType)tid);
143 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)144 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
145 {
146 expected = (HostDataType)whichDestValue;
147 return true;
148 }
149 };
150
test_atomic_init_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)151 int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
152 {
153 int error = 0;
154 CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
155 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
156 CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
157 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
158 CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
159 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
160 CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
161 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
162 CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
163 EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
164 CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
165 EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
166 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
167 {
168 CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
169 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
170 CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
171 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
172 CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
173 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
174 CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
175 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
176 }
177 else
178 {
179 CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
180 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
181 CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
182 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
183 CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
184 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
185 CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
186 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
187 }
188 return error;
189 }
190
test_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)191 int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
192 {
193 return test_atomic_init_generic(deviceID, context, queue, num_elements, false);
194 }
195
test_svm_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)196 int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
197 {
198 return test_atomic_init_generic(deviceID, context, queue, num_elements, true);
199 }
200
201 template<typename HostAtomicType, typename HostDataType>
202 class CBasicTestLoad : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
203 {
204 public:
205 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
206 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
207 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
208 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
209 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
210 using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestLoad(TExplicitAtomicType dataType,bool useSVM)211 CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
212 {
213 OldValueCheck(false);
214 }
NumResults(cl_uint threadCount,cl_device_id deviceID)215 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
216 {
217 return threadCount;
218 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)219 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
220 {
221 if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
222 MemoryOrder() == MEMORY_ORDER_ACQ_REL)
223 return 0; //skip test - not applicable
224
225 if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
226 return 0; // skip test - not applicable
227
228 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
229 }
ProgramCore()230 virtual std::string ProgramCore()
231 {
232 // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
233 // should be MEMORY_ORDER_RELEASE
234 std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
235 std::string memoryOrderScopeStore =
236 (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
237 ? (", memory_order_release" + MemoryScopeStr())
238 : memoryOrderScopeLoad;
239 std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
240 return " atomic_store" + postfix + "(&destMemory[tid], tid"
241 + memoryOrderScopeStore
242 + ");\n"
243 " oldValues[tid] = atomic_load"
244 + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
245 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)246 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
247 {
248 host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST);
249 oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[tid], MemoryOrder());
250 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)251 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
252 {
253 expected = (HostDataType)whichDestValue;
254 return true;
255 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)256 virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
257 {
258 correct = true;
259 for(cl_uint i = 0; i < threadCount; i++ )
260 {
261 if(refValues[i] != (HostDataType)i)
262 {
263 log_error("Invalid value for thread %u\n", (cl_uint)i);
264 correct = false;
265 return true;
266 }
267 }
268 return true;
269 }
270 };
271
test_atomic_load_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)272 int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
273 {
274 int error = 0;
275 CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
276 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
277 CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
278 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
279 CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
280 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
281 CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
282 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
283 CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
284 EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
285 CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
286 EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
287 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
288 {
289 CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
290 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
291 CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
292 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
293 CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
294 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
295 CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
296 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
297 }
298 else
299 {
300 CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
301 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
302 CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
303 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
304 CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
305 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
306 CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
307 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
308 }
309 return error;
310 }
311
test_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)312 int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
313 {
314 return test_atomic_load_generic(deviceID, context, queue, num_elements, false);
315 }
316
test_svm_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)317 int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
318 {
319 return test_atomic_load_generic(deviceID, context, queue, num_elements, true);
320 }
321
322 template<typename HostAtomicType, typename HostDataType>
323 class CBasicTestExchange : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
324 {
325 public:
326 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
327 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
328 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
329 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
330 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
331 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestExchange(TExplicitAtomicType dataType,bool useSVM)332 CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
333 {
334 StartValue(123456);
335 }
ProgramCore()336 virtual std::string ProgramCore()
337 {
338 std::string memoryOrderScope = MemoryOrderScopeStr();
339 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
340 return
341 " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n"
342 " for(int i = 0; i < "+IterationsStr()+"; i++)\n"
343 " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n";
344 }
345
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)346 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
347 {
348 oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder());
349 for(int i = 0; i < Iterations(); i++)
350 oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder());
351 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)352 virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
353 {
354 OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only
355 correct = true;
356 /* We are expecting values from 0 to size-1 and initial value from atomic variable */
357 /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
358 /* Any repeated value is treated as an error */
359 std::vector<bool> tidFound(threadCount);
360 bool startValueFound = false;
361 cl_uint i;
362
363 for(i = 0; i <= threadCount; i++)
364 {
365 cl_uint value;
366 if(i == threadCount)
367 value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
368 else
369 value = (cl_uint)refValues[i];
370 if(value == (cl_uint)StartValue())
371 {
372 // Special initial value
373 if(startValueFound)
374 {
375 log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
376 correct = false;
377 return true;
378 }
379 startValueFound = true;
380 continue;
381 }
382 if(value >= threadCount)
383 {
384 log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
385 correct = false;
386 return true;
387 }
388 if(tidFound[value])
389 {
390 log_error("ERROR: Value (%u) occurred more thane once\n", value);
391 correct = false;
392 return true;
393 }
394 tidFound[value] = true;
395 }
396 return true;
397 }
398 };
399
test_atomic_exchange_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)400 int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
401 {
402 int error = 0;
403 CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
404 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
405 CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
406 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
407 CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
408 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
409 CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
410 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
411 CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
412 EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
413 CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
414 EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
415 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
416 {
417 CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
418 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
419 CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
420 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
421 CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
422 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
423 CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
424 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
425 }
426 else
427 {
428 CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
429 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
430 CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
431 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
432 CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
433 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
434 CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
435 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
436 }
437 return error;
438 }
439
test_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)440 int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
441 {
442 return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false);
443 }
444
test_svm_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)445 int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
446 {
447 return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true);
448 }
449
450 template<typename HostAtomicType, typename HostDataType>
451 class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>
452 {
453 public:
454 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
455 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
456 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
457 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
458 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
459 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
460 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
461 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
462 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
463 using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestCompareStrong(TExplicitAtomicType dataType,bool useSVM)464 CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
465 {
466 StartValue(123456);
467 OldValueCheck(false);
468 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)469 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
470 {
471 if(MemoryOrder2() == MEMORY_ORDER_RELEASE ||
472 MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
473 return 0; // not allowed as 'failure' argument
474 if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
475 (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
476 return 0; // failure argument shall be no stronger than the success
477
478 if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
479 return 0; // skip test - not applicable
480
481 if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF)
482 return 0; // skip test - not applicable
483
484 return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
485 }
ProgramCore()486 virtual std::string ProgramCore()
487 {
488 std::string memoryOrderScope = MemoryOrderScope();
489 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
490 return
491 std::string(" ")+DataType().RegularTypeName()+" expected, previous;\n"
492 " int successCount = 0;\n"
493 " oldValues[tid] = tid;\n"
494 " expected = tid; // force failure at the beginning\n"
495 " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
496 " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
497 " else\n"
498 " {\n"
499 " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
500 " {\n"
501 " previous = expected;\n"
502 " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
503 " {\n"
504 " oldValues[tid] = expected;\n"
505 " successCount++;\n"
506 " }\n"
507 " else\n"
508 " {\n"
509 " if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n"
510 " {\n"
511 " oldValues[tid] = threadCount; //mark fail with invalid value\n"
512 " break;\n"
513 " }\n"
514 " }\n"
515 " }\n"
516 " }\n";
517 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)518 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
519 {
520 HostDataType expected = (HostDataType)StartValue(), previous;
521 oldValues[tid] = (HostDataType)tid;
522 for(int i = 0; i < Iterations(); i++)
523 {
524 previous = expected;
525 if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2()))
526 oldValues[tid] = expected;
527 else
528 {
529 if(previous == expected) // shouldn't occur for 'strong'
530 {
531 oldValues[tid] = threadCount; //mark fail with invalid value
532 }
533 }
534 }
535 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)536 virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
537 {
538 correct = true;
539 /* We are expecting values from 0 to size-1 and initial value from atomic variable */
540 /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
541 /* Any repeated value is treated as an error */
542 std::vector<bool> tidFound(threadCount);
543 bool startValueFound = false;
544 cl_uint i;
545
546 for(i = 0; i <= threadCount; i++)
547 {
548 cl_uint value;
549 if(i == threadCount)
550 value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
551 else
552 value = (cl_uint)refValues[i];
553 if(value == (cl_uint)StartValue())
554 {
555 // Special initial value
556 if(startValueFound)
557 {
558 log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
559 correct = false;
560 return true;
561 }
562 startValueFound = true;
563 continue;
564 }
565 if(value >= threadCount)
566 {
567 if(value == threadCount)
568 log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n");
569 log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
570 correct = false;
571 return true;
572 }
573 if(tidFound[value])
574 {
575 log_error("ERROR: Value (%u) occurred more thane once\n", value);
576 correct = false;
577 return true;
578 }
579 tidFound[value] = true;
580 }
581 return true;
582 }
583 };
584
test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)585 int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
586 {
587 int error = 0;
588 CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
589 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
590 CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
591 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
592 CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
593 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
594 CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
595 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
596 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
597 {
598 CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
599 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
600 CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
601 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
602 CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
603 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
604 CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
605 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
606 }
607 else
608 {
609 CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
610 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
611 CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
612 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
613 CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
614 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
615 CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
616 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
617 }
618 return error;
619 }
620
test_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)621 int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
622 {
623 return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false);
624 }
625
test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)626 int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
627 {
628 return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true);
629 }
630
631 template<typename HostAtomicType, typename HostDataType>
632 class CBasicTestCompareWeak : public CBasicTestCompareStrong<HostAtomicType, HostDataType>
633 {
634 public:
635 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
636 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::MemoryOrderScope;
637 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
638 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
639 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestCompareWeak(TExplicitAtomicType dataType,bool useSVM)640 CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType, useSVM)
641 {
642 }
ProgramCore()643 virtual std::string ProgramCore()
644 {
645 std::string memoryOrderScope = MemoryOrderScope();
646 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
647 return
648 std::string(" ")+DataType().RegularTypeName()+" expected , previous;\n"
649 " int successCount = 0;\n"
650 " oldValues[tid] = tid;\n"
651 " expected = tid; // force failure at the beginning\n"
652 " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
653 " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
654 " else\n"
655 " {\n"
656 " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
657 " {\n"
658 " previous = expected;\n"
659 " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
660 " {\n"
661 " oldValues[tid] = expected;\n"
662 " successCount++;\n"
663 " }\n"
664 " }\n"
665 " }\n";
666 }
667 };
668
test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)669 int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
670 {
671 int error = 0;
672 CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
673 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
674 CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
675 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
676 CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
677 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
678 CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
679 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
680 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
681 {
682 CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
683 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
684 CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
685 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
686 CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
687 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
688 CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
689 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
690 }
691 else
692 {
693 CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
694 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
695 CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
696 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
697 CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
698 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
699 CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
700 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
701 }
702 return error;
703 }
704
test_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)705 int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
706 {
707 return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false);
708 }
709
test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)710 int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
711 {
712 return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true);
713 }
714
715 template<typename HostAtomicType, typename HostDataType>
716 class CBasicTestFetchAdd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
717 {
718 public:
719 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
720 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
721 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
722 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchAdd(TExplicitAtomicType dataType,bool useSVM)723 CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
724 {
725 }
ProgramCore()726 virtual std::string ProgramCore()
727 {
728 std::string memoryOrderScope = MemoryOrderScopeStr();
729 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
730 return
731 " oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"+
732 " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
733 " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
734 " atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n";
735 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)736 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
737 {
738 oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
739 host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
740 host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
741 host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder());
742 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)743 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
744 {
745 expected = StartValue();
746 for(cl_uint i = 0; i < threadCount; i++)
747 expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
748 return true;
749 }
750 };
751
test_atomic_fetch_add_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)752 int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
753 {
754 int error = 0;
755 CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
756 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
757 CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
758 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
759 CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
760 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
761 CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
762 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
763 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
764 {
765 CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
766 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
767 CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
768 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
769 CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
770 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
771 CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
772 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
773 }
774 else
775 {
776 CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
777 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
778 CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
779 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
780 CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
781 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
782 CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
783 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
784 }
785 return error;
786 }
787
test_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)788 int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
789 {
790 return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false);
791 }
792
test_svm_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)793 int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
794 {
795 return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true);
796 }
797
798 template<typename HostAtomicType, typename HostDataType>
799 class CBasicTestFetchSub : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
800 {
801 public:
802 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
803 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
804 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
805 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchSub(TExplicitAtomicType dataType,bool useSVM)806 CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
807 {
808 }
ProgramCore()809 virtual std::string ProgramCore()
810 {
811 std::string memoryOrderScope = MemoryOrderScopeStr();
812 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
813 return
814 " oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n";
815 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)816 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
817 {
818 oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder());
819 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)820 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
821 {
822 expected = StartValue();
823 for(cl_uint i = 0; i < threadCount; i++)
824 expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
825 return true;
826 }
827 };
828
test_atomic_fetch_sub_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)829 int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
830 {
831 int error = 0;
832 CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
833 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
834 CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
835 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
836 CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
837 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
838 CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
839 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
840 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
841 {
842 CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
843 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
844 CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
845 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
846 CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
847 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
848 CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
849 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
850 }
851 else
852 {
853 CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
854 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
855 CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
856 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
857 CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
858 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
859 CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
860 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
861 }
862 return error;
863 }
864
test_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)865 int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
866 {
867 return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false);
868 }
869
test_svm_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)870 int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
871 {
872 return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true);
873 }
874
875 template<typename HostAtomicType, typename HostDataType>
876 class CBasicTestFetchOr : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
877 {
878 public:
879 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
880 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
881 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
882 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchOr(TExplicitAtomicType dataType,bool useSVM)883 CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
884 {
885 StartValue(0);
886 }
NumResults(cl_uint threadCount,cl_device_id deviceID)887 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
888 {
889 cl_uint numBits = DataType().Size(deviceID) * 8;
890
891 return (threadCount + numBits - 1) / numBits;
892 }
ProgramCore()893 virtual std::string ProgramCore()
894 {
895 std::string memoryOrderScope = MemoryOrderScopeStr();
896 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
897 return
898 std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
899 " int whichResult = tid / numBits;\n"
900 " int bitIndex = tid - (whichResult * numBits);\n"
901 "\n"
902 " oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
903 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)904 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
905 {
906 size_t numBits = sizeof(HostDataType) * 8;
907 size_t whichResult = tid / numBits;
908 size_t bitIndex = tid - (whichResult * numBits);
909
910 oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder());
911 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)912 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
913 {
914 cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
915 if(whichDestValue < numValues - 1)
916 {
917 expected = ~(HostDataType)0;
918 return true;
919 }
920 // Last item doesn't get or'ed on every bit, so we have to mask away
921 cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
922 expected = StartValue();
923 for(cl_uint i = 0; i < numBits; i++)
924 expected |= ((HostDataType)1 << i);
925 return true;
926 }
927 };
928
test_atomic_fetch_or_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)929 int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
930 {
931 int error = 0;
932 CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
933 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
934 CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
935 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
936 CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
937 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
938 CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
939 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
940 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
941 {
942 CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
943 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
944 CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
945 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
946 CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
947 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
948 CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
949 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
950 }
951 else
952 {
953 CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
954 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
955 CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
956 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
957 CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
958 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
959 CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
960 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
961 }
962 return error;
963 }
964
test_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)965 int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
966 {
967 return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false);
968 }
969
test_svm_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)970 int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
971 {
972 return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true);
973 }
974
975 template<typename HostAtomicType, typename HostDataType>
976 class CBasicTestFetchXor : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
977 {
978 public:
979 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
980 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
981 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
982 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchXor(TExplicitAtomicType dataType,bool useSVM)983 CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
984 {
985 StartValue((HostDataType)0x2f08ab418ba0541LL);
986 }
ProgramCore()987 virtual std::string ProgramCore()
988 {
989 std::string memoryOrderScope = MemoryOrderScopeStr();
990 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
991 return
992 std::string(" int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
993 " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
994 "\n"
995 " oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
996 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)997 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
998 {
999 int numBits = sizeof(HostDataType) * 8;
1000 int bitIndex = (numBits-1)*(tid+1)/threadCount;
1001
1002 oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
1003 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1004 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1005 {
1006 int numBits = sizeof(HostDataType)*8;
1007 expected = StartValue();
1008 for(cl_uint i = 0; i < threadCount; i++)
1009 {
1010 int bitIndex = (numBits-1)*(i+1)/threadCount;
1011 expected ^= ((HostDataType)1 << bitIndex);
1012 }
1013 return true;
1014 }
1015 };
1016
test_atomic_fetch_xor_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1017 int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1018 {
1019 int error = 0;
1020 CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1021 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1022 CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1023 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1024 CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1025 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1026 CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1027 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1028 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1029 {
1030 CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1031 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1032 CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1033 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1034 CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1035 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1036 CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1037 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1038 }
1039 else
1040 {
1041 CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1042 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1043 CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1044 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1045 CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1046 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1047 CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1048 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1049 }
1050 return error;
1051 }
1052
test_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1053 int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1054 {
1055 return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false);
1056 }
1057
test_svm_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1058 int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1059 {
1060 return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true);
1061 }
1062
1063 template<typename HostAtomicType, typename HostDataType>
1064 class CBasicTestFetchAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1065 {
1066 public:
1067 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1068 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1069 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1070 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchAnd(TExplicitAtomicType dataType,bool useSVM)1071 CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1072 {
1073 StartValue(~(HostDataType)0);
1074 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1075 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1076 {
1077 cl_uint numBits = DataType().Size(deviceID) * 8;
1078
1079 return (threadCount + numBits - 1) / numBits;
1080 }
ProgramCore()1081 virtual std::string ProgramCore()
1082 {
1083 std::string memoryOrderScope = MemoryOrderScopeStr();
1084 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1085 return
1086 std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
1087 " int whichResult = tid / numBits;\n"
1088 " int bitIndex = tid - (whichResult * numBits);\n"
1089 "\n"
1090 " oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
1091 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1092 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1093 {
1094 size_t numBits = sizeof(HostDataType) * 8;
1095 size_t whichResult = tid / numBits;
1096 size_t bitIndex = tid - (whichResult * numBits);
1097
1098 oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder());
1099 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1100 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1101 {
1102 cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
1103 if(whichDestValue < numValues - 1)
1104 {
1105 expected = 0;
1106 return true;
1107 }
1108 // Last item doesn't get and'ed on every bit, so we have to mask away
1109 size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
1110 expected = StartValue();
1111 for(size_t i = 0; i < numBits; i++)
1112 expected &= ~((HostDataType)1 << i);
1113 return true;
1114 }
1115 };
1116
test_atomic_fetch_and_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1117 int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1118 {
1119 int error = 0;
1120 CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1121 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1122 CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1123 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1124 CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1125 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1126 CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1127 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1128 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1129 {
1130 CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1131 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1132 CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1133 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1134 CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1135 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1136 CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1137 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1138 }
1139 else
1140 {
1141 CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1142 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1143 CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1144 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1145 CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1146 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1147 CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1148 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1149 }
1150 return error;
1151 }
1152
test_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1153 int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1154 {
1155 return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false);
1156 }
1157
test_svm_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1158 int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1159 {
1160 return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true);
1161 }
1162
1163 template<typename HostAtomicType, typename HostDataType>
1164 class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1165 {
1166 public:
1167 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1168 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1169 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1170 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
1171 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1172 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchOrAnd(TExplicitAtomicType dataType,bool useSVM)1173 CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1174 {
1175 StartValue(0);
1176 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1177 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1178 {
1179 return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
1180 }
1181 // each thread modifies (with OR and AND operations) and verifies
1182 // only one bit in atomic variable
1183 // other bits are modified by other threads but it must not affect current thread operation
ProgramCore()1184 virtual std::string ProgramCore()
1185 {
1186 std::string memoryOrderScope = MemoryOrderScopeStr();
1187 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1188 return
1189 std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
1190 " size_t valueInd = tid/bits;\n"
1191 " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
1192 " oldValues[tid] = 0;\n"
1193 " for(int i = 0; i < "+IterationsStr()+"; i++)\n"
1194 " {\n"
1195 " value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
1196 " if(value & bitMask) // bit should be set to 0\n"
1197 " oldValues[tid]++;\n"
1198 " value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n"
1199 " if(!(value & bitMask)) // bit should be set to 1\n"
1200 " oldValues[tid]++;\n"
1201 " }\n";
1202 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1203 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1204 {
1205 int bits = sizeof(HostDataType)*8;
1206 size_t valueInd = tid/bits;
1207 HostDataType value, bitMask = (HostDataType)1 << tid%bits;
1208 oldValues[tid] = 0;
1209 for(int i = 0; i < Iterations(); i++)
1210 {
1211 value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder());
1212 if(value & bitMask) // bit should be set to 0
1213 oldValues[tid]++;
1214 value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder());
1215 if(!(value & bitMask)) // bit should be set to 1
1216 oldValues[tid]++;
1217 }
1218 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1219 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1220 {
1221 expected = 0;
1222 return true;
1223 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1224 virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
1225 {
1226 correct = true;
1227 for(cl_uint i = 0; i < threadCount; i++)
1228 {
1229 if(refValues[i] > 0)
1230 {
1231 log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]);
1232 correct = false;
1233 }
1234 }
1235 return true;
1236 }
1237 };
1238
test_atomic_fetch_orand_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1239 int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1240 {
1241 int error = 0;
1242 CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1243 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1244 CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1245 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1246 CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1247 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1248 CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1249 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1250 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1251 {
1252 CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1253 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1254 CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1255 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1256 CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1257 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1258 CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1259 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1260 }
1261 else
1262 {
1263 CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1264 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1265 CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1266 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1267 CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1268 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1269 CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1270 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1271 }
1272 return error;
1273 }
1274
test_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1275 int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1276 {
1277 return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false);
1278 }
1279
test_svm_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1280 int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1281 {
1282 return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true);
1283 }
1284
1285 template<typename HostAtomicType, typename HostDataType>
1286 class CBasicTestFetchXor2 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1287 {
1288 public:
1289 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1290 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1291 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1292 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
1293 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1294 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchXor2(TExplicitAtomicType dataType,bool useSVM)1295 CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1296 {
1297 StartValue(0);
1298 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1299 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1300 {
1301 return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
1302 }
1303 // each thread modifies (with XOR operation) and verifies
1304 // only one bit in atomic variable
1305 // other bits are modified by other threads but it must not affect current thread operation
ProgramCore()1306 virtual std::string ProgramCore()
1307 {
1308 std::string memoryOrderScope = MemoryOrderScopeStr();
1309 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1310 return
1311 std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
1312 " size_t valueInd = tid/bits;\n"
1313 " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
1314 " oldValues[tid] = 0;\n"
1315 " for(int i = 0; i < "+IterationsStr()+"; i++)\n"
1316 " {\n"
1317 " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
1318 " if(value & bitMask) // bit should be set to 0\n"
1319 " oldValues[tid]++;\n"
1320 " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
1321 " if(!(value & bitMask)) // bit should be set to 1\n"
1322 " oldValues[tid]++;\n"
1323 " }\n";
1324 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1325 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1326 {
1327 int bits = sizeof(HostDataType)*8;
1328 size_t valueInd = tid/bits;
1329 HostDataType value, bitMask = (HostDataType)1 << tid%bits;
1330 oldValues[tid] = 0;
1331 for(int i = 0; i < Iterations(); i++)
1332 {
1333 value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
1334 if(value & bitMask) // bit should be set to 0
1335 oldValues[tid]++;
1336 value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
1337 if(!(value & bitMask)) // bit should be set to 1
1338 oldValues[tid]++;
1339 }
1340 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1341 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1342 {
1343 expected = 0;
1344 return true;
1345 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1346 virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
1347 {
1348 correct = true;
1349 for(cl_uint i = 0; i < threadCount; i++)
1350 {
1351 if(refValues[i] > 0)
1352 {
1353 log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]);
1354 correct = false;
1355 }
1356 }
1357 return true;
1358 }
1359 };
1360
test_atomic_fetch_xor2_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1361 int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1362 {
1363 int error = 0;
1364 CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1365 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1366 CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1367 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1368 CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1369 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1370 CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1371 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1372 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1373 {
1374 CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1375 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1376 CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1377 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1378 CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1379 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1380 CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1381 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1382 }
1383 else
1384 {
1385 CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1386 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1387 CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1388 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1389 CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1390 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1391 CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1392 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1393 }
1394 return error;
1395 }
1396
test_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1397 int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1398 {
1399 return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false);
1400 }
1401
test_svm_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1402 int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1403 {
1404 return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true);
1405 }
1406
1407 template<typename HostAtomicType, typename HostDataType>
1408 class CBasicTestFetchMin : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1409 {
1410 public:
1411 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1412 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1413 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1414 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMin(TExplicitAtomicType dataType,bool useSVM)1415 CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1416 {
1417 StartValue(DataType().MaxValue());
1418 }
ProgramCore()1419 virtual std::string ProgramCore()
1420 {
1421 std::string memoryOrderScope = MemoryOrderScopeStr();
1422 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1423 return
1424 " oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
1425 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1426 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1427 {
1428 oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder());
1429 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)1430 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
1431 {
1432 for(cl_uint i = 0; i < threadCount; i++)
1433 {
1434 startRefValues[i] = genrand_int32(d);
1435 if(sizeof(HostDataType) >= 8)
1436 startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
1437 }
1438 return true;
1439 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1440 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1441 {
1442 expected = StartValue();
1443 for(cl_uint i = 0; i < threadCount; i++)
1444 {
1445 if(startRefValues[ i ] < expected)
1446 expected = startRefValues[ i ];
1447 }
1448 return true;
1449 }
1450 };
1451
test_atomic_fetch_min_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1452 int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1453 {
1454 int error = 0;
1455 CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1456 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1457 CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1458 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1459 CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1460 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1461 CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1462 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1463 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1464 {
1465 CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1466 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1467 CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1468 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1469 CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1470 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1471 CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1472 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1473 }
1474 else
1475 {
1476 CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1477 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1478 CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1479 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1480 CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1481 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1482 CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1483 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1484 }
1485 return error;
1486 }
1487
test_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1488 int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1489 {
1490 return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false);
1491 }
1492
test_svm_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1493 int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1494 {
1495 return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true);
1496 }
1497
1498 template<typename HostAtomicType, typename HostDataType>
1499 class CBasicTestFetchMax : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1500 {
1501 public:
1502 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1503 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1504 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1505 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMax(TExplicitAtomicType dataType,bool useSVM)1506 CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1507 {
1508 StartValue(DataType().MinValue());
1509 }
ProgramCore()1510 virtual std::string ProgramCore()
1511 {
1512 std::string memoryOrderScope = MemoryOrderScopeStr();
1513 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1514 return
1515 " oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
1516 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1517 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1518 {
1519 oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder());
1520 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)1521 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
1522 {
1523 for(cl_uint i = 0; i < threadCount; i++)
1524 {
1525 startRefValues[i] = genrand_int32(d);
1526 if(sizeof(HostDataType) >= 8)
1527 startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
1528 }
1529 return true;
1530 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1531 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1532 {
1533 expected = StartValue();
1534 for(cl_uint i = 0; i < threadCount; i++)
1535 {
1536 if(startRefValues[ i ] > expected)
1537 expected = startRefValues[ i ];
1538 }
1539 return true;
1540 }
1541 };
1542
test_atomic_fetch_max_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1543 int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1544 {
1545 int error = 0;
1546 CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
1547 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
1548 CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
1549 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
1550 CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
1551 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
1552 CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
1553 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
1554 if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1555 {
1556 CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1557 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1558 CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1559 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1560 CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1561 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1562 CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1563 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1564 }
1565 else
1566 {
1567 CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1568 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
1569 CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1570 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1571 CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
1572 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
1573 CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1574 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1575 }
1576 return error;
1577 }
1578
test_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1579 int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1580 {
1581 return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false);
1582 }
1583
test_svm_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1584 int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1585 {
1586 return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true);
1587 }
1588
1589 template<typename HostAtomicType, typename HostDataType>
1590 class CBasicTestFlag : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1591 {
1592 static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
1593 public:
1594 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1595 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
1596 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1597 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
1598 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
1599 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
1600 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFlag(TExplicitAtomicType dataType,bool useSVM)1601 CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1602 {
1603 StartValue(0);
1604 OldValueCheck(false);
1605 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1606 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1607 {
1608 return threadCount;
1609 }
MemoryOrderForClear()1610 TExplicitMemoryOrderType MemoryOrderForClear()
1611 {
1612 // Memory ordering for atomic_flag_clear function
1613 // ("shall not be memory_order_acquire nor memory_order_acq_rel")
1614 if(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
1615 return MEMORY_ORDER_RELAXED;
1616 if (MemoryOrder() == MEMORY_ORDER_ACQ_REL)
1617 return MEMORY_ORDER_RELEASE;
1618 return MemoryOrder();
1619 }
MemoryOrderScopeStrForClear()1620 std::string MemoryOrderScopeStrForClear()
1621 {
1622 std::string orderStr;
1623 if (MemoryOrder() != MEMORY_ORDER_EMPTY)
1624 orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
1625 return orderStr + MemoryScopeStr();
1626 }
1627
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)1628 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
1629 cl_command_queue queue)
1630 {
1631 // This test assumes support for the memory_scope_device scope in the case
1632 // that LocalMemory() == false. Therefore we should skip this test in that
1633 // configuration on a 3.0 driver since supporting the memory_scope_device
1634 // scope is optionaly.
1635 if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
1636 {
1637 if (!LocalMemory()
1638 && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
1639 {
1640 log_info(
1641 "Skipping atomic_flag test due to use of atomic_scope_device "
1642 "which is optionally not supported on this device\n");
1643 return 0; // skip test - not applicable
1644 }
1645 }
1646 return CBasicTestMemOrderScope<HostAtomicType,
1647 HostDataType>::ExecuteSingleTest(deviceID,
1648 context,
1649 queue);
1650 }
ProgramCore()1651 virtual std::string ProgramCore()
1652 {
1653 std::string memoryOrderScope = MemoryOrderScopeStr();
1654 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1655 std::string program =
1656 " uint cnt, stop = 0;\n"
1657 " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
1658 " {\n"
1659 " bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
1660 if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE)
1661 program += " atomic_work_item_fence(" +
1662 std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
1663 "memory_order_acquire," +
1664 std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
1665 ");\n";
1666
1667 program +=
1668 " if (!set)\n"
1669 " {\n";
1670
1671 if (LocalMemory())
1672 program += " uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
1673 else
1674 program += " uint csIndex = cnt;\n";
1675
1676 std::ostringstream csNotVisited;
1677 csNotVisited << CRITICAL_SECTION_NOT_VISITED;
1678 program +=
1679 " // verify that thread is the first visitor\n"
1680 " if(oldValues[csIndex] == "+csNotVisited.str()+")\n"
1681 " {\n"
1682 " oldValues[csIndex] = tid; // set the winner id for this critical section\n"
1683 " stop = 1;\n"
1684 " }\n";
1685
1686 if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED)
1687 program += " atomic_work_item_fence(" +
1688 std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
1689 "memory_order_release," +
1690 std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
1691 ");\n";
1692
1693 program +=
1694 " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n"
1695 " }\n"
1696 " }\n";
1697 return program;
1698 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1699 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
1700 {
1701 cl_uint cnt, stop = 0;
1702 for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
1703 {
1704 if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
1705 {
1706 cl_uint csIndex = cnt;
1707 // verify that thread is the first visitor\n"
1708 if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
1709 {
1710 oldValues[csIndex] = tid; // set the winner id for this critical section\n"
1711 stop = 1;
1712 }
1713 host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
1714 }
1715 }
1716 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1717 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
1718 {
1719 expected = StartValue();
1720 return true;
1721 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)1722 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
1723 {
1724 for(cl_uint i = 0 ; i < threadCount; i++)
1725 startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
1726 return true;
1727 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1728 virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
1729 {
1730 correct = true;
1731 /* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */
1732 /* These values must be distributed across refValues array */
1733 std::vector<bool> tidFound(threadCount);
1734 cl_uint i;
1735
1736 for (i = 0; i < threadCount; i++)
1737 {
1738 cl_uint value = (cl_uint)refValues[i];
1739 if (value == CRITICAL_SECTION_NOT_VISITED)
1740 {
1741 // Special initial value
1742 log_error("ERROR: Critical section %u not visited\n", i);
1743 correct = false;
1744 return true;
1745 }
1746 if (value >= threadCount)
1747 {
1748 log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
1749 correct = false;
1750 return true;
1751 }
1752 if (tidFound[value])
1753 {
1754 log_error("ERROR: Value (%u) occurred more thane once\n", value);
1755 correct = false;
1756 return true;
1757 }
1758 tidFound[value] = true;
1759 }
1760 return true;
1761 }
1762 };
1763
test_atomic_flag_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1764 int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
1765 {
1766 int error = 0;
1767 CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG, useSVM);
1768 EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements));
1769 return error;
1770 }
1771
test_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1772 int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1773 {
1774 return test_atomic_flag_generic(deviceID, context, queue, num_elements, false);
1775 }
1776
test_svm_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1777 int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1778 {
1779 return test_atomic_flag_generic(deviceID, context, queue, num_elements, true);
1780 }
1781
1782 template<typename HostAtomicType, typename HostDataType>
1783 class CBasicTestFence : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
1784 {
1785 struct TestDefinition {
1786 bool op1IsFence;
1787 TExplicitMemoryOrderType op1MemOrder;
1788 bool op2IsFence;
1789 TExplicitMemoryOrderType op2MemOrder;
1790 };
1791 public:
1792 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1793 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
1794 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1795 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
1796 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
1797 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DeclaredInProgram;
1798 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
1799 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1800 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::CurrentGroupSize;
1801 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
1802 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
1803 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
CBasicTestFence(TExplicitAtomicType dataType,bool useSVM)1804 CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
1805 {
1806 StartValue(0);
1807 OldValueCheck(false);
1808 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1809 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1810 {
1811 return threadCount;
1812 }
NumNonAtomicVariablesPerThread()1813 virtual cl_uint NumNonAtomicVariablesPerThread()
1814 {
1815 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
1816 return 1;
1817 if (LocalMemory())
1818 {
1819 if (gIsEmbedded)
1820 {
1821 if (CurrentGroupSize() > 1024)
1822 CurrentGroupSize(1024);
1823 return 1; //1KB of local memory required by spec. Clamp group size to 1k and allow 1 variable per thread
1824 }
1825 else
1826 return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec
1827 }
1828 return 256;
1829 }
SingleTestName()1830 virtual std::string SingleTestName()
1831 {
1832 std::string testName;
1833 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
1834 testName += "seq_cst fence, ";
1835 else
1836 testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order"))
1837 + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with "
1838 + std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order"))
1839 + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
1840 testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
1841 testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
1842 return testName;
1843 }
SVMDataBufferAllSVMConsistent()1844 virtual bool SVMDataBufferAllSVMConsistent()
1845 {
1846 // Although memory_scope_all_devices doesn't mention SVM it is just an
1847 // alias for memory_scope_all_svm_devices. So both scopes interact with
1848 // SVM allocations, on devices that support those, just the same.
1849 return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
1850 || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
1851 }
ExecuteForEachParameterSet(cl_device_id deviceID,cl_context context,cl_command_queue queue)1852 virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
1853 {
1854 int error = 0;
1855 // execute 3 (maximum) sub cases for each memory order
1856 for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
1857 {
1858 EXECUTE_TEST(error, (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
1859 }
1860 return error;
1861 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)1862 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
1863 {
1864 if(DeclaredInProgram() || UsedInFunction())
1865 return 0; //skip test - not applicable - no overloaded fence functions for different address spaces
1866 if(MemoryOrder() == MEMORY_ORDER_EMPTY ||
1867 MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15
1868 return 0; //skip test - not applicable
1869 if((UseSVM() || gHost)
1870 && LocalMemory())
1871 return 0; // skip test - not applicable for SVM and local memory
1872 struct TestDefinition acqTests[] = {
1873 // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
1874 { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
1875 { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
1876 { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
1877 };
1878 struct TestDefinition relTests[] = {
1879 { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
1880 { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
1881 };
1882 struct TestDefinition arTests[] = {
1883 { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
1884 { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
1885 { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
1886 };
1887 switch (MemoryOrder())
1888 {
1889 case MEMORY_ORDER_ACQUIRE:
1890 if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition))
1891 return 0;
1892 _subCase = acqTests[_subCaseId];
1893 break;
1894 case MEMORY_ORDER_RELEASE:
1895 if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition))
1896 return 0;
1897 _subCase = relTests[_subCaseId];
1898 break;
1899 case MEMORY_ORDER_ACQ_REL:
1900 if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition))
1901 return 0;
1902 _subCase = arTests[_subCaseId];
1903 break;
1904 case MEMORY_ORDER_SEQ_CST:
1905 if (_subCaseId != 0) // one special case only
1906 return 0;
1907 break;
1908 default:
1909 return 0;
1910 }
1911 LocalRefValues(LocalMemory());
1912 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
1913 }
ProgramHeader(cl_uint maxNumDestItems)1914 virtual std::string ProgramHeader(cl_uint maxNumDestItems)
1915 {
1916 std::string header;
1917 if(gOldAPI)
1918 {
1919 if(MemoryScope() == MEMORY_SCOPE_EMPTY)
1920 {
1921 header += "#define atomic_work_item_fence(x,y) mem_fence(x)\n";
1922 }
1923 else
1924 {
1925 header += "#define atomic_work_item_fence(x,y,z) mem_fence(x)\n";
1926 }
1927 }
1928 return header+CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
1929 }
ProgramCore()1930 virtual std::string ProgramCore()
1931 {
1932 std::ostringstream naValues;
1933 naValues << NumNonAtomicVariablesPerThread();
1934 std::string program, fenceType, nonAtomic;
1935 if (LocalMemory())
1936 {
1937 program = " size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n";
1938 fenceType = "CLK_LOCAL_MEM_FENCE";
1939 nonAtomic = "localValues";
1940 }
1941 else
1942 {
1943 program = " size_t myId = tid, hisId = threadCount-1-tid;\n";
1944 fenceType = "CLK_GLOBAL_MEM_FENCE";
1945 nonAtomic = "oldValues";
1946 }
1947 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
1948 {
1949 // All threads are divided into pairs.
1950 // Each thread has its own atomic variable and performs the following actions:
1951 // - increments its own variable
1952 // - performs fence operation to propagate its value and to see value from other thread
1953 // - reads value from other thread's variable
1954 // - repeats the above steps when both values are the same (and less than 1000000)
1955 // - stores the last value read from other thread (in additional variable)
1956 // At the end of execution at least one thread should know the last value from other thread
1957 program += std::string("") +
1958 " " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n"
1959 " do {\n"
1960 " myValue++;\n"
1961 " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"
1962 " atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n"
1963 " hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
1964 " } while(myValue == hisValue && myValue < 1000000);\n"
1965 " " + nonAtomic + "[myId] = hisValue; \n";
1966 }
1967 else
1968 {
1969 // Each thread modifies one of its non-atomic variables, increments value of its atomic variable
1970 // and reads values from another thread in typical synchronizes-with scenario with:
1971 // - non-atomic variable (at index A) modification (value change from 0 to A)
1972 // - release operation (additional fence or within atomic) + atomic variable modification (value A)
1973 // - atomic variable read (value B) + acquire operation (additional fence or within atomic)
1974 // - non-atomic variable (at index B) read (value C)
1975 // Each thread verifies dependency between atomic and non-atomic value read from another thread
1976 // The following condition must be true: B == C
1977 program += std::string("") +
1978 " " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n"
1979 " do {\n"
1980 " myValue++;\n"
1981 " " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n";
1982 if (_subCase.op1IsFence)
1983 program += std::string("") +
1984 " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n"
1985 " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n";
1986 else
1987 program += std::string("") +
1988 " atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n";
1989 if (_subCase.op2IsFence)
1990 program += std::string("") +
1991 " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
1992 " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n";
1993 else
1994 program += std::string("") +
1995 " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n";
1996 program +=
1997 " hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n";
1998 if (LocalMemory())
1999 program += " hisId = (hisId+1)%get_local_size(0);\n";
2000 else
2001 program += " hisId = (hisId+1)%threadCount;\n";
2002 program +=
2003 " } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n"
2004 " if(hisAtomicValue != hisValue)\n"
2005 " { // fail\n"
2006 " atomic_store(&destMemory[myId], myValue-1);\n";
2007 if (LocalMemory())
2008 program += " hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n";
2009 else
2010 program += " hisId = (hisId+threadCount-1)%threadCount;\n";
2011 program +=
2012 " if(myValue+1 < " + naValues.str() + ")\n"
2013 " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n"
2014 " if(myValue+2 < " + naValues.str() + ")\n"
2015 " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n"
2016 " if(myValue+3 < " + naValues.str() + ")\n"
2017 " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n";
2018 if (gDebug)
2019 {
2020 program +=
2021 " printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n";
2022 }
2023 program +=
2024 " }\n";
2025 }
2026 return program;
2027 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2028 virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
2029 {
2030 size_t myId = tid, hisId = threadCount - 1 - tid;
2031 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2032 {
2033 HostDataType myValue = 0, hisValue;
2034 // CPU thread typically starts faster - wait for GPU thread
2035 myValue++;
2036 host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
2037 while (host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0);
2038 do {
2039 myValue++;
2040 host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2041 host_atomic_thread_fence(MemoryOrder());
2042 hisValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
2043 } while (myValue == hisValue && hisValue < 1000000);
2044 oldValues[tid] = hisValue;
2045 }
2046 else
2047 {
2048 HostDataType myValue = 0, hisAtomicValue, hisValue;
2049 do {
2050 myValue++;
2051 oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue;
2052 if (_subCase.op1IsFence)
2053 {
2054 host_atomic_thread_fence(_subCase.op1MemOrder);
2055 host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2056 }
2057 else
2058 host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, _subCase.op1MemOrder);
2059 if (_subCase.op2IsFence)
2060 {
2061 hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
2062 host_atomic_thread_fence(_subCase.op2MemOrder);
2063 }
2064 else
2065 hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], _subCase.op2MemOrder);
2066 hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue];
2067 hisId = (hisId + 1) % threadCount;
2068 } while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1);
2069 if(hisAtomicValue != hisValue)
2070 { // fail
2071 host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST);
2072 if (gDebug)
2073 {
2074 hisId = (hisId + threadCount - 1) % threadCount;
2075 printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue);
2076 }
2077 }
2078 }
2079 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2080 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
2081 {
2082 for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++)
2083 startRefValues[i] = 0;
2084 return true;
2085 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)2086 virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
2087 {
2088 correct = true;
2089 cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
2090 for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize)
2091 {
2092 if(workOffset+workSize > threadCount)
2093 // last workgroup (host threads)
2094 workSize = threadCount-workOffset;
2095 for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++)
2096 {
2097 HostAtomicType myValue = finalValues[workOffset + i];
2098 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2099 {
2100 HostDataType hisValue = refValues[workOffset + i];
2101 if (myValue == hisValue)
2102 {
2103 // a draw - both threads should reach final value 1000000
2104 if (myValue != 1000000)
2105 {
2106 log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue);
2107 correct = false;
2108 return true;
2109 }
2110 }
2111 else
2112 {
2113 //slower thread (in total order of seq_cst operations) must know last value written by faster thread
2114 HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i];
2115 HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i];
2116
2117 // who is the winner? - thread with lower private counter value
2118 if (myValue == hisRealValue) // forbidden result - fence doesn't work
2119 {
2120 log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue);
2121 log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim);
2122 correct = false;
2123 return true;
2124 }
2125 if (myValue > hisRealValue) // I'm slower
2126 {
2127 if (hisRealValue != hisValue)
2128 {
2129 log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue);
2130 log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i);
2131 correct = false;
2132 return true;
2133 }
2134 }
2135 else // I'm faster
2136 {
2137 if (myValueReadByHim != myValue)
2138 {
2139 log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue);
2140 log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i);
2141 correct = false;
2142 return true;
2143 }
2144 }
2145 }
2146 }
2147 else
2148 {
2149 if (myValue != NumNonAtomicVariablesPerThread()-1)
2150 {
2151 log_error("ERROR: Invalid atomic value #%u (%d instead of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread()-1);
2152 log_error("ERROR: Thread #%u observed invalid values in other thread's variables\n", workOffset + i, myValue);
2153 correct = false;
2154 return true;
2155 }
2156 }
2157 }
2158 }
2159 return true;
2160 }
2161 private:
2162 int _subCaseId;
2163 struct TestDefinition _subCase;
2164 };
2165
test_atomic_fence_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2166 int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
2167 {
2168 int error = 0;
2169 CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
2170 EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
2171 CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
2172 EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
2173 CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
2174 EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
2175 CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
2176 EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
2177 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2178 {
2179 CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2180 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
2181 CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2182 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2183 CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
2184 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
2185 CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2186 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2187 }
2188 else
2189 {
2190 CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2191 EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
2192 CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2193 EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2194 CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
2195 EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
2196 CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2197 EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2198 }
2199 return error;
2200 }
2201
test_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2202 int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
2203 {
2204 return test_atomic_fence_generic(deviceID, context, queue, num_elements, false);
2205 }
2206
test_svm_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2207 int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
2208 {
2209 return test_atomic_fence_generic(deviceID, context, queue, num_elements, true);
2210 }
2211