// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/typeWrappers.h" #include "common.h" #include "host_atomics.h" #include #include template class CBasicTestStore : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTest::CheckCapabilities; CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_ACQ_REL) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " atomic_store" + postfix + "(&destMemory[tid], tid" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = (HostDataType)whichDestValue; return true; } }; int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestStore test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestStore test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestStore test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_store_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_store_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestInit : public CBasicTest { public: using CBasicTest::OldValueCheck; CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual std::string ProgramCore() { return " atomic_init(&destMemory[tid], tid);\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_init(&destMemory[tid], (HostDataType)tid); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = (HostDataType)whichDestValue; return true; } }; int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestInit test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestInit test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestInit test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_init_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_init_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestLoad : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTest::CheckCapabilities; CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder() == MEMORY_ORDER_RELEASE || MemoryOrder() == MEMORY_ORDER_ACQ_REL) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store // should be MEMORY_ORDER_RELEASE std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); std::string memoryOrderScopeStore = (MemoryOrder() == MEMORY_ORDER_ACQUIRE) ? (", memory_order_release" + MemoryScopeStr()) : memoryOrderScopeLoad; std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); return " atomic_store" + postfix + "(&destMemory[tid], tid" + memoryOrderScopeStore + ");\n" " oldValues[tid] = atomic_load" + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST); oldValues[tid] = host_atomic_load( &destMemory[tid], MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = (HostDataType)whichDestValue; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] != (HostDataType)i) { log_error("Invalid value for thread %u\n", (cl_uint)i); correct = false; return true; } } return true; } }; int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestLoad test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestLoad test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestLoad test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_load_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_load_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestExchange : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(123456); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " oldValues[tid] = atomic_exchange" + postfix + "(&destMemory[0], tid" + memoryOrderScope + ");\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " oldValues[tid] = atomic_exchange" + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder()); for (int i = 0; i < Iterations(); i++) oldValues[tid] = host_atomic_exchange( &destMemory[0], oldValues[tid], MemoryOrder()); } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { OldValueCheck( Iterations() % 2 == 0); // check is valid for even number of iterations only correct = true; /* We are expecting values from 0 to size-1 and initial value from * atomic variable */ /* These values must be distributed across refValues array and atomic * variable finalVaue[0] */ /* Any repeated value is treated as an error */ std::vector tidFound(threadCount); bool startValueFound = false; cl_uint i; for (i = 0; i <= threadCount; i++) { cl_uint value; if (i == threadCount) value = (cl_uint)finalValues[0]; // additional value from atomic // variable (last written) else value = (cl_uint)refValues[i]; if (value == (cl_uint)StartValue()) { // Special initial value if (startValueFound) { log_error("ERROR: Starting reference value (%u) occurred " "more thane once\n", (cl_uint)StartValue()); correct = false; return true; } startValueFound = true; continue; } if (value >= threadCount) { log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestExchange test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope { public: using CBasicTestMemOrder2Scope::StartValue; using CBasicTestMemOrder2Scope::OldValueCheck; using CBasicTestMemOrder2Scope::MemoryOrder; using CBasicTestMemOrder2Scope::MemoryOrder2; using CBasicTestMemOrder2Scope::MemoryOrderScope; using CBasicTestMemOrder2Scope::MemoryScope; using CBasicTestMemOrder2Scope::DataType; using CBasicTestMemOrder2Scope::Iterations; using CBasicTestMemOrder2Scope::IterationsStr; using CBasicTest::CheckCapabilities; CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope(dataType, useSVM) { StartValue(123456); OldValueCheck(false); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder2() == MEMORY_ORDER_RELEASE || MemoryOrder2() == MEMORY_ORDER_ACQ_REL) return 0; // not allowed as 'failure' argument if ((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) || (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST)) return 0; // failure argument shall be no stronger than the success if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable return CBasicTestMemOrder2Scope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScope(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" ") + DataType().RegularTypeName() + " expected, previous;\n" " int successCount = 0;\n" " oldValues[tid] = tid;\n" " expected = tid; // force failure at the beginning\n" " if(atomic_compare_exchange_strong" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + ") || expected == tid)\n" " oldValues[tid] = threadCount+1; //mark unexpected success " "with invalid value\n" " else\n" " {\n" " for(int i = 0; i < " + IterationsStr() + " || successCount == 0; i++)\n" " {\n" " previous = expected;\n" " if(atomic_compare_exchange_strong" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + "))\n" " {\n" " oldValues[tid] = expected;\n" " successCount++;\n" " }\n" " else\n" " {\n" " if(previous == expected) // spurious failure - " "shouldn't occur for 'strong'\n" " {\n" " oldValues[tid] = threadCount; //mark fail with " "invalid value\n" " break;\n" " }\n" " }\n" " }\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { HostDataType expected = (HostDataType)StartValue(), previous; oldValues[tid] = (HostDataType)tid; for (int i = 0; i < Iterations(); i++) { previous = expected; if (host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2())) oldValues[tid] = expected; else { if (previous == expected) // shouldn't occur for 'strong' { oldValues[tid] = threadCount; // mark fail with invalid // value } } } } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; /* We are expecting values from 0 to size-1 and initial value from * atomic variable */ /* These values must be distributed across refValues array and atomic * variable finalVaue[0] */ /* Any repeated value is treated as an error */ std::vector tidFound(threadCount); bool startValueFound = false; cl_uint i; for (i = 0; i <= threadCount; i++) { cl_uint value; if (i == threadCount) value = (cl_uint)finalValues[0]; // additional value from atomic // variable (last written) else value = (cl_uint)refValues[i]; if (value == (cl_uint)StartValue()) { // Special initial value if (startValueFound) { log_error("ERROR: Starting reference value (%u) occurred " "more thane once\n", (cl_uint)StartValue()); correct = false; return true; } startValueFound = true; continue; } if (value >= threadCount) { if (value == threadCount) log_error("ERROR: Spurious failure detected for " "atomic_compare_exchange_strong\n"); log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestCompareStrong test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestCompareWeak : public CBasicTestCompareStrong { public: using CBasicTestCompareStrong::StartValue; using CBasicTestCompareStrong::MemoryOrderScope; using CBasicTestCompareStrong::DataType; using CBasicTestCompareStrong::Iterations; using CBasicTestCompareStrong::IterationsStr; CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong(dataType, useSVM) {} virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScope(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" ") + DataType().RegularTypeName() + " expected , previous;\n" " int successCount = 0;\n" " oldValues[tid] = tid;\n" " expected = tid; // force failure at the beginning\n" " if(atomic_compare_exchange_weak" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + ") || expected == tid)\n" " oldValues[tid] = threadCount+1; //mark unexpected success " "with invalid value\n" " else\n" " {\n" " for(int i = 0; i < " + IterationsStr() + " || successCount == 0; i++)\n" " {\n" " previous = expected;\n" " if(atomic_compare_exchange_weak" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + "))\n" " {\n" " oldValues[tid] = expected;\n" " successCount++;\n" " }\n" " }\n" " }\n"; } }; int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestCompareWeak test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchAdd : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) {} virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " oldValues[tid] = atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" " atomic_fetch_add" + postfix + "(&destMemory[0], ((" + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { oldValues[tid] = host_atomic_fetch_add( &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8, MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = StartValue(); for (cl_uint i = 0; i < threadCount; i++) expected += ((HostDataType)i + 3) * 3 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); return true; } }; int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchAdd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchSub : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) {} virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " oldValues[tid] = atomic_fetch_sub" + postfix + "(&destMemory[0], tid + 3 +(((" + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { oldValues[tid] = host_atomic_fetch_sub( &destMemory[0], (HostDataType)tid + 3 + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = StartValue(); for (cl_uint i = 0; i < threadCount; i++) expected -= (HostDataType)i + 3 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); return true; } }; int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchSub test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchOr : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { cl_uint numBits = DataType().Size(deviceID) * 8; return (threadCount + numBits - 1) / numBits; } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" size_t numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int whichResult = tid / numBits;\n" " int bitIndex = tid - (whichResult * numBits);\n" "\n" " oldValues[tid] = atomic_fetch_or" + postfix + "(&destMemory[whichResult], ((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t numBits = sizeof(HostDataType) * 8; size_t whichResult = tid / numBits; size_t bitIndex = tid - (whichResult * numBits); oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) / (sizeof(HostDataType) * 8); if (whichDestValue < numValues - 1) { expected = ~(HostDataType)0; return true; } // Last item doesn't get or'ed on every bit, so we have to mask away cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType) * 8); expected = StartValue(); for (cl_uint i = 0; i < numBits; i++) expected |= ((HostDataType)1 << i); return true; } }; int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchOr test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchXor : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::DataType; CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue((HostDataType)0x2f08ab418ba0541LL); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n" "\n" " oldValues[tid] = atomic_fetch_xor" + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int numBits = sizeof(HostDataType) * 8; int bitIndex = (numBits - 1) * (tid + 1) / threadCount; oldValues[tid] = host_atomic_fetch_xor( &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { int numBits = sizeof(HostDataType) * 8; expected = StartValue(); for (cl_uint i = 0; i < threadCount; i++) { int bitIndex = (numBits - 1) * (i + 1) / threadCount; expected ^= ((HostDataType)1 << bitIndex); } return true; } }; int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchXor test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchAnd : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(~(HostDataType)0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { cl_uint numBits = DataType().Size(deviceID) * 8; return (threadCount + numBits - 1) / numBits; } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" size_t numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int whichResult = tid / numBits;\n" " int bitIndex = tid - (whichResult * numBits);\n" "\n" " oldValues[tid] = atomic_fetch_and" + postfix + "(&destMemory[whichResult], ~((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t numBits = sizeof(HostDataType) * 8; size_t whichResult = tid / numBits; size_t bitIndex = tid - (whichResult * numBits); oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) / (sizeof(HostDataType) * 8); if (whichDestValue < numValues - 1) { expected = 0; return true; } // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType) * 8); expected = StartValue(); for (size_t i = 0; i < numBits; i++) expected &= ~((HostDataType)1 << i); return true; } }; int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchAnd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); } // each thread modifies (with OR and AND operations) and verifies // only one bit in atomic variable // other bits are modified by other threads but it must not affect current // thread operation virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int bits = sizeof(") + DataType().RegularTypeName() + ")*8;\n" + " size_t valueInd = tid/bits;\n" " " + DataType().RegularTypeName() + " value, bitMask = (" + DataType().RegularTypeName() + ")1 << tid%bits;\n" " oldValues[tid] = 0;\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " {\n" " value = atomic_fetch_or" + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + ");\n" " if(value & bitMask) // bit should be set to 0\n" " oldValues[tid]++;\n" " value = atomic_fetch_and" + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope + ");\n" " if(!(value & bitMask)) // bit should be set to 1\n" " oldValues[tid]++;\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int bits = sizeof(HostDataType) * 8; size_t valueInd = tid / bits; HostDataType value, bitMask = (HostDataType)1 << tid % bits; oldValues[tid] = 0; for (int i = 0; i < Iterations(); i++) { value = host_atomic_fetch_or(destMemory + valueInd, bitMask, MemoryOrder()); if (value & bitMask) // bit should be set to 0 oldValues[tid]++; value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask, MemoryOrder()); if (!(value & bitMask)) // bit should be set to 1 oldValues[tid]++; } } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = 0; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] > 0) { log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); correct = false; } } return true; } }; int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchOrAnd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchXor2 : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); } // each thread modifies (with XOR operation) and verifies // only one bit in atomic variable // other bits are modified by other threads but it must not affect current // thread operation virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int bits = sizeof(") + DataType().RegularTypeName() + ")*8;\n" + " size_t valueInd = tid/bits;\n" " " + DataType().RegularTypeName() + " value, bitMask = (" + DataType().RegularTypeName() + ")1 << tid%bits;\n" " oldValues[tid] = 0;\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " {\n" " value = atomic_fetch_xor" + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + ");\n" " if(value & bitMask) // bit should be set to 0\n" " oldValues[tid]++;\n" " value = atomic_fetch_xor" + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + ");\n" " if(!(value & bitMask)) // bit should be set to 1\n" " oldValues[tid]++;\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int bits = sizeof(HostDataType) * 8; size_t valueInd = tid / bits; HostDataType value, bitMask = (HostDataType)1 << tid % bits; oldValues[tid] = 0; for (int i = 0; i < Iterations(); i++) { value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, MemoryOrder()); if (value & bitMask) // bit should be set to 0 oldValues[tid]++; value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, MemoryOrder()); if (!(value & bitMask)) // bit should be set to 1 oldValues[tid]++; } } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = 0; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] > 0) { log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]); correct = false; } } return true; } }; int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchXor2 test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchMin : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(DataType().MaxValue()); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " oldValues[tid] = atomic_fetch_min" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder()); } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { for (cl_uint i = 0; i < threadCount; i++) { startRefValues[i] = genrand_int32(d); if (sizeof(HostDataType) >= 8) startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; } return true; } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = StartValue(); for (cl_uint i = 0; i < threadCount; i++) { if (startRefValues[i] < expected) expected = startRefValues[i]; } return true; } }; int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchMin test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFetchMax : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(DataType().MinValue()); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " oldValues[tid] = atomic_fetch_max" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder()); } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { for (cl_uint i = 0; i < threadCount; i++) { startRefValues[i] = genrand_int32(d); if (sizeof(HostDataType) >= 8) startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; } return true; } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = StartValue(); for (cl_uint i = 0; i < threadCount; i++) { if (startRefValues[i] > expected) expected = startRefValues[i]; } return true; } }; int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchMax test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFlag : public CBasicTestMemOrderScope { static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000; public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::UseSVM; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } TExplicitMemoryOrderType MemoryOrderForClear() { // Memory ordering for atomic_flag_clear function // ("shall not be memory_order_acquire nor memory_order_acq_rel") if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED; if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE; return MemoryOrder(); } std::string MemoryOrderScopeStrForClear() { std::string orderStr; if (MemoryOrder() != MEMORY_ORDER_EMPTY) orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear()); return orderStr + MemoryScopeStr(); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { // This test assumes support for the memory_scope_device scope in the // case that LocalMemory() == false. Therefore we should skip this test // in that configuration on a 3.0 driver since supporting the // memory_scope_device scope is optionaly. if (get_device_cl_version(deviceID) >= Version{ 3, 0 }) { if (!LocalMemory() && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE)) { log_info("Skipping atomic_flag test due to use of " "atomic_scope_device " "which is optionally not supported on this device\n"); return 0; // skip test - not applicable } } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string program = " uint cnt, stop = 0;\n" " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread " "must find critical section where it is the first visitor\n" " {\n" " bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n"; if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory()) program += " atomic_work_item_fence(" + std::string( LocalMemory() ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_acquire," + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device")) + ");\n"; program += " if (!set)\n" " {\n"; if (LocalMemory()) program += " uint csIndex = " "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n"; else program += " uint csIndex = cnt;\n"; std::ostringstream csNotVisited; csNotVisited << CRITICAL_SECTION_NOT_VISITED; program += " // verify that thread is the first visitor\n" " if(oldValues[csIndex] == " + csNotVisited.str() + ")\n" " {\n" " oldValues[csIndex] = tid; // set the winner id for this " "critical section\n" " stop = 1;\n" " }\n"; if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory()) program += " atomic_work_item_fence(" + std::string( LocalMemory() ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_release," + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device")) + ");\n"; program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n" " }\n" " }\n"; return program; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { cl_uint cnt, stop = 0; for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the // first visitor\n" { if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder())) { cl_uint csIndex = cnt; // verify that thread is the first visitor\n" if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED) { oldValues[csIndex] = tid; // set the winner id for this critical section\n" stop = 1; } host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear()); } } } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = StartValue(); return true; } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { for (cl_uint i = 0; i < threadCount; i++) startRefValues[i] = CRITICAL_SECTION_NOT_VISITED; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; /* We are expecting unique values from 0 to threadCount-1 (each critical * section must be visited) */ /* These values must be distributed across refValues array */ std::vector tidFound(threadCount); cl_uint i; for (i = 0; i < threadCount; i++) { cl_uint value = (cl_uint)refValues[i]; if (value == CRITICAL_SECTION_NOT_VISITED) { // Special initial value log_error("ERROR: Critical section %u not visited\n", i); correct = false; return true; } if (value >= threadCount) { log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFlag test_flag(TYPE_ATOMIC_FLAG, useSVM); EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements)); return error; } int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_flag_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_flag_generic(deviceID, context, queue, num_elements, true); } template class CBasicTestFence : public CBasicTestMemOrderScope { struct TestDefinition { bool op1IsFence; TExplicitMemoryOrderType op1MemOrder; bool op2IsFence; TExplicitMemoryOrderType op2MemOrder; }; public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTestMemOrderScope::DeclaredInProgram; using CBasicTestMemOrderScope::UsedInFunction; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::CurrentGroupSize; using CBasicTestMemOrderScope::UseSVM; using CBasicTestMemOrderScope::LocalMemory; using CBasicTestMemOrderScope::LocalRefValues; CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual cl_uint NumNonAtomicVariablesPerThread() { if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1; if (LocalMemory()) { if (gIsEmbedded) { if (CurrentGroupSize() > 512) CurrentGroupSize(512); return 2; // 1KB of local memory required by spec. Clamp group // size to 512 and allow 2 variables per thread } else return 32 * 1024 / 8 / CurrentGroupSize() - 1; // 32KB of local memory required by spec } return 256; } virtual std::string SingleTestName() { std::string testName; if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) testName += "seq_cst fence, "; else testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)) .substr(sizeof("memory_order")) + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with " + std::string(get_memory_order_type_name(_subCase.op2MemOrder)) .substr(sizeof("memory_order")) + (_subCase.op2IsFence ? " fence" : " atomic") + ", "; testName += CBasicTest::SingleTestName(); testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())) .substr(sizeof("memory")); return testName; } virtual bool SVMDataBufferAllSVMConsistent() { // Although memory_scope_all_devices doesn't mention SVM it is just an // alias for memory_scope_all_svm_devices. So both scopes interact with // SVM allocations, on devices that support those, just the same. return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES; } virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) { int error = 0; // execute 3 (maximum) sub cases for each memory order for (_subCaseId = 0; _subCaseId < 3; _subCaseId++) { EXECUTE_TEST( error, (CBasicTestMemOrderScope:: ExecuteForEachParameterSet(deviceID, context, queue))); } return error; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (DeclaredInProgram() || UsedInFunction()) return 0; // skip test - not applicable - no overloaded fence // functions for different address spaces if (MemoryOrder() == MEMORY_ORDER_EMPTY || MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since // opencl20-openclc-rev15 return 0; // skip test - not applicable if ((UseSVM() || gHost) && LocalMemory()) return 0; // skip test - not applicable for SVM and local memory struct TestDefinition acqTests[] = { // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder} { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE } }; struct TestDefinition relTests[] = { { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL } }; struct TestDefinition arTests[] = { { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }, { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL } }; switch (MemoryOrder()) { case MEMORY_ORDER_ACQUIRE: if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition)) return 0; _subCase = acqTests[_subCaseId]; break; case MEMORY_ORDER_RELEASE: if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition)) return 0; _subCase = relTests[_subCaseId]; break; case MEMORY_ORDER_ACQ_REL: if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition)) return 0; _subCase = arTests[_subCaseId]; break; case MEMORY_ORDER_SEQ_CST: if (_subCaseId != 0) // one special case only return 0; break; default: return 0; } LocalRefValues(LocalMemory()); return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramHeader(cl_uint maxNumDestItems) { std::string header; if (gOldAPI) { if (MemoryScope() == MEMORY_SCOPE_EMPTY) { header += "#define atomic_work_item_fence(x,y) " " mem_fence(x)\n"; } else { header += "#define atomic_work_item_fence(x,y,z) " " mem_fence(x)\n"; } } return header + CBasicTestMemOrderScope:: ProgramHeader(maxNumDestItems); } virtual std::string ProgramCore() { std::ostringstream naValues; naValues << NumNonAtomicVariablesPerThread(); std::string program, fenceType, nonAtomic; if (LocalMemory()) { program = " size_t myId = get_local_id(0), hisId = " "get_local_size(0)-1-myId;\n"; fenceType = "CLK_LOCAL_MEM_FENCE"; nonAtomic = "localValues"; } else { program = " size_t myId = tid, hisId = threadCount-1-tid;\n"; fenceType = "CLK_GLOBAL_MEM_FENCE"; nonAtomic = "oldValues"; } if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) { // All threads are divided into pairs. // Each thread has its own atomic variable and performs the // following actions: // - increments its own variable // - performs fence operation to propagate its value and to see // value from other thread // - reads value from other thread's variable // - repeats the above steps when both values are the same (and less // than 1000000) // - stores the last value read from other thread (in additional // variable) At the end of execution at least one thread should know // the last value from other thread program += std::string("") + " " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n" " do {\n" " myValue++;\n" " atomic_store_explicit(&destMemory[myId], myValue, " "memory_order_relaxed" + MemoryScopeStr() + ");\n" " atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n" " hisValue = atomic_load_explicit(&destMemory[hisId], " "memory_order_relaxed" + MemoryScopeStr() + ");\n" " } while(myValue == hisValue && myValue < 1000000);\n" " " + nonAtomic + "[myId] = hisValue; \n"; } else { // Each thread modifies one of its non-atomic variables, increments // value of its atomic variable and reads values from another thread // in typical synchronizes-with scenario with: // - non-atomic variable (at index A) modification (value change // from 0 to A) // - release operation (additional fence or within atomic) + atomic // variable modification (value A) // - atomic variable read (value B) + acquire operation (additional // fence or within atomic) // - non-atomic variable (at index B) read (value C) // Each thread verifies dependency between atomic and non-atomic // value read from another thread The following condition must be // true: B == C program += std::string("") + " " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n" " do {\n" " myValue++;\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue] = myValue;\n"; if (_subCase.op1IsFence) program += std::string("") + " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n" " atomic_store_explicit(&destMemory[myId], myValue, " "memory_order_relaxed" + MemoryScopeStr() + ");\n"; else program += std::string("") + " atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n"; if (_subCase.op2IsFence) program += std::string("") + " hisAtomicValue = " "atomic_load_explicit(&destMemory[hisId], " "memory_order_relaxed" + MemoryScopeStr() + ");\n" " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n"; else program += std::string("") + " hisAtomicValue = " "atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n"; program += " hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n"; if (LocalMemory()) program += " hisId = (hisId+1)%get_local_size(0);\n"; else program += " hisId = (hisId+1)%threadCount;\n"; program += " } while(hisAtomicValue == hisValue && myValue < " + naValues.str() + "-1);\n" " if(hisAtomicValue != hisValue)\n" " { // fail\n" " atomic_store_explicit(&destMemory[myId], myValue-1," " memory_order_relaxed, memory_scope_work_group);\n"; if (LocalMemory()) program += " hisId = " "(hisId+get_local_size(0)-1)%get_local_size(0);\n"; else program += " hisId = (hisId+threadCount-1)%threadCount;\n"; program += " if(myValue+1 < " + naValues.str() + ")\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n" " if(myValue+2 < " + naValues.str() + ")\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n" " if(myValue+3 < " + naValues.str() + ")\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n"; if (gDebug) { program += " printf(\"WI %d: atomic value (%d) at index %d " "is different than non-atomic value (%d)\\n\", tid, " "hisAtomicValue, hisId, hisValue);\n"; } program += " }\n"; } return program; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t myId = tid, hisId = threadCount - 1 - tid; if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) { HostDataType myValue = 0, hisValue; // CPU thread typically starts faster - wait for GPU thread myValue++; host_atomic_store( &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST); while (host_atomic_load( &destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0) ; do { myValue++; host_atomic_store( &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); host_atomic_thread_fence(MemoryOrder()); hisValue = host_atomic_load( &destMemory[hisId], MEMORY_ORDER_RELAXED); } while (myValue == hisValue && hisValue < 1000000); oldValues[tid] = hisValue; } else { HostDataType myValue = 0, hisAtomicValue, hisValue; do { myValue++; oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] = myValue; if (_subCase.op1IsFence) { host_atomic_thread_fence(_subCase.op1MemOrder); host_atomic_store( &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); } else host_atomic_store( &destMemory[myId], myValue, _subCase.op1MemOrder); if (_subCase.op2IsFence) { hisAtomicValue = host_atomic_load( &destMemory[hisId], MEMORY_ORDER_RELAXED); host_atomic_thread_fence(_subCase.op2MemOrder); } else hisAtomicValue = host_atomic_load( &destMemory[hisId], _subCase.op2MemOrder); hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread() + hisAtomicValue]; hisId = (hisId + 1) % threadCount; } while (hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread() - 1); if (hisAtomicValue != hisValue) { // fail host_atomic_store( &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST); if (gDebug) { hisId = (hisId + threadCount - 1) % threadCount; printf("WI %d: atomic value (%d) at index %d is different " "than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue); } } } } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread(); i++) startRefValues[i] = 0; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount; for (cl_uint workOffset = 0; workOffset < threadCount; workOffset += workSize) { if (workOffset + workSize > threadCount) // last workgroup (host threads) workSize = threadCount - workOffset; for (cl_uint i = 0; i < workSize && workOffset + i < threadCount; i++) { HostAtomicType myValue = finalValues[workOffset + i]; if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) { HostDataType hisValue = refValues[workOffset + i]; if (myValue == hisValue) { // a draw - both threads should reach final value // 1000000 if (myValue != 1000000) { log_error("ERROR: Invalid reference value #%u (%d " "instead of 1000000)\n", workOffset + i, myValue); correct = false; return true; } } else { // slower thread (in total order of seq_cst operations) // must know last value written by faster thread HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i]; HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i]; // who is the winner? - thread with lower private // counter value if (myValue == hisRealValue) // forbidden result - fence // doesn't work { log_error("ERROR: Atomic counter values #%u and " "#%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue); log_error( "ERROR: Both threads have outdated values read " "from another thread (%u and %u)\n", hisValue, myValueReadByHim); correct = false; return true; } if (myValue > hisRealValue) // I'm slower { if (hisRealValue != hisValue) { log_error("ERROR: Invalid reference value #%u " "(%d instead of %d)\n", workOffset + i, hisValue, hisRealValue); log_error( "ERROR: Slower thread #%u should know " "value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i); correct = false; return true; } } else // I'm faster { if (myValueReadByHim != myValue) { log_error("ERROR: Invalid reference value #%u " "(%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue); log_error( "ERROR: Slower thread #%u should know " "value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i); correct = false; return true; } } } } else { if (myValue != NumNonAtomicVariablesPerThread() - 1) { log_error("ERROR: Invalid atomic value #%u (%d instead " "of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread() - 1); log_error("ERROR: Thread #%u observed invalid values " "in other thread's variables\n", workOffset + i); correct = false; return true; } } } } return true; } private: size_t _subCaseId; struct TestDefinition _subCase; }; int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFence test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFence test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFence test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fence_generic(deviceID, context, queue, num_elements, false); } int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_atomic_fence_generic(deviceID, context, queue, num_elements, true); }