<atomic> design
The compiler supplies all of the intrinsics as described below. This list of intrinsics roughly parallels the requirements of the C and C++ atomics proposals. The C and C++ library implementations simply drop through to these intrinsics. Anything the platform does not support in hardware, the compiler arranges for a (compiler-rt) library call to be made which will do the job with a mutex, and in this case ignoring the memory ordering parameter (effectively implementing memory_order_seq_cst).
Ultimate efficiency is preferred over run time error checking. Undefined behavior is acceptable when the inputs do not conform as defined below.
// In every intrinsic signature below, type* atomic_obj may be a pointer to a // volatile-qualified type. // Memory ordering values map to the following meanings: // memory_order_relaxed == 0 // memory_order_consume == 1 // memory_order_acquire == 2 // memory_order_release == 3 // memory_order_acq_rel == 4 // memory_order_seq_cst == 5 // type must be trivially copyable // type represents a "type argument" bool __atomic_is_lock_free(type); // type must be trivially copyable // Behavior is defined for mem_ord = 0, 1, 2, 5 type __atomic_load(const type* atomic_obj, int mem_ord); // type must be trivially copyable // Behavior is defined for mem_ord = 0, 3, 5 void __atomic_store(type* atomic_obj, type desired, int mem_ord); // type must be trivially copyable // Behavior is defined for mem_ord = [0 ... 5] type __atomic_exchange(type* atomic_obj, type desired, int mem_ord); // type must be trivially copyable // Behavior is defined for mem_success = [0 ... 5], // mem_failure <= mem_success // mem_failure != 3 // mem_failure != 4 bool __atomic_compare_exchange_strong(type* atomic_obj, type* expected, type desired, int mem_success, int mem_failure); // type must be trivially copyable // Behavior is defined for mem_success = [0 ... 5], // mem_failure <= mem_success // mem_failure != 3 // mem_failure != 4 bool __atomic_compare_exchange_weak(type* atomic_obj, type* expected, type desired, int mem_success, int mem_failure); // type is one of: char, signed char, unsigned char, short, unsigned short, int, // unsigned int, long, unsigned long, long long, unsigned long long, // char16_t, char32_t, wchar_t // Behavior is defined for mem_ord = [0 ... 5] type __atomic_fetch_add(type* atomic_obj, type operand, int mem_ord); // type is one of: char, signed char, unsigned char, short, unsigned short, int, // unsigned int, long, unsigned long, long long, unsigned long long, // char16_t, char32_t, wchar_t // Behavior is defined for mem_ord = [0 ... 5] type __atomic_fetch_sub(type* atomic_obj, type operand, int mem_ord); // type is one of: char, signed char, unsigned char, short, unsigned short, int, // unsigned int, long, unsigned long, long long, unsigned long long, // char16_t, char32_t, wchar_t // Behavior is defined for mem_ord = [0 ... 5] type __atomic_fetch_and(type* atomic_obj, type operand, int mem_ord); // type is one of: char, signed char, unsigned char, short, unsigned short, int, // unsigned int, long, unsigned long, long long, unsigned long long, // char16_t, char32_t, wchar_t // Behavior is defined for mem_ord = [0 ... 5] type __atomic_fetch_or(type* atomic_obj, type operand, int mem_ord); // type is one of: char, signed char, unsigned char, short, unsigned short, int, // unsigned int, long, unsigned long, long long, unsigned long long, // char16_t, char32_t, wchar_t // Behavior is defined for mem_ord = [0 ... 5] type __atomic_fetch_xor(type* atomic_obj, type operand, int mem_ord); // Behavior is defined for mem_ord = [0 ... 5] void* __atomic_fetch_add(void** atomic_obj, ptrdiff_t operand, int mem_ord); void* __atomic_fetch_sub(void** atomic_obj, ptrdiff_t operand, int mem_ord); // Behavior is defined for mem_ord = [0 ... 5] void __atomic_thread_fence(int mem_ord); void __atomic_signal_fence(int mem_ord);
If desired the intrinsics taking a single mem_ord parameter can default this argument to 5.
If desired the intrinsics taking two ordering parameters can default mem_success to 5, and mem_failure to translate_memory_order(mem_success) where translate_memory_order(mem_success) is defined as:
int translate_memory_order(int o) { switch (o) { case 4: return 2; case 3: return 0; } return o; }
Below are representative C++ implementations of all of the operations. Their purpose is to document the desired semantics of each operation, assuming memory_order_seq_cst. This is essentially the code that will be called if the front end calls out to compiler-rt.
template <class T> T __atomic_load(T const volatile* obj) { unique_lock<mutex> _(some_mutex); return *obj; } template <class T> void __atomic_store(T volatile* obj, T desr) { unique_lock<mutex> _(some_mutex); *obj = desr; } template <class T> T __atomic_exchange(T volatile* obj, T desr) { unique_lock<mutex> _(some_mutex); T r = *obj; *obj = desr; return r; } template <class T> bool __atomic_compare_exchange_strong(T volatile* obj, T* exp, T desr) { unique_lock<mutex> _(some_mutex); if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) // if (*obj == *exp) { std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); // *obj = desr; return true; } std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); // *exp = *obj; return false; } // May spuriously return false (even if *obj == *exp) template <class T> bool __atomic_compare_exchange_weak(T volatile* obj, T* exp, T desr) { unique_lock<mutex> _(some_mutex); if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) // if (*obj == *exp) { std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); // *obj = desr; return true; } std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); // *exp = *obj; return false; } template <class T> T __atomic_fetch_add(T volatile* obj, T operand) { unique_lock<mutex> _(some_mutex); T r = *obj; *obj += operand; return r; } template <class T> T __atomic_fetch_sub(T volatile* obj, T operand) { unique_lock<mutex> _(some_mutex); T r = *obj; *obj -= operand; return r; } template <class T> T __atomic_fetch_and(T volatile* obj, T operand) { unique_lock<mutex> _(some_mutex); T r = *obj; *obj &= operand; return r; } template <class T> T __atomic_fetch_or(T volatile* obj, T operand) { unique_lock<mutex> _(some_mutex); T r = *obj; *obj |= operand; return r; } template <class T> T __atomic_fetch_xor(T volatile* obj, T operand) { unique_lock<mutex> _(some_mutex); T r = *obj; *obj ^= operand; return r; } void* __atomic_fetch_add(void* volatile* obj, ptrdiff_t operand) { unique_lock<mutex> _(some_mutex); void* r = *obj; (char*&)(*obj) += operand; return r; } void* __atomic_fetch_sub(void* volatile* obj, ptrdiff_t operand) { unique_lock<mutex> _(some_mutex); void* r = *obj; (char*&)(*obj) -= operand; return r; } void __atomic_thread_fence() { unique_lock<mutex> _(some_mutex); } void __atomic_signal_fence() { unique_lock<mutex> _(some_mutex); }