1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This file is an internal atomic implementation, use base/atomicops.h instead.
6
7 #ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
8 #define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
9 #pragma once
10
11 // This struct is not part of the public API of this module; clients may not
12 // use it.
13 // Features of this x86. Values may not be correct before main() is run,
14 // but are set conservatively.
15 struct AtomicOps_x86CPUFeatureStruct {
16 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
17 // after acquire compare-and-swap.
18 bool has_sse2; // Processor has SSE2.
19 };
20 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
21
22 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
23
24 namespace base {
25 namespace subtle {
26
27 // 32-bit low-level operations on any platform.
28
NoBarrier_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)29 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
30 Atomic32 old_value,
31 Atomic32 new_value) {
32 Atomic32 prev;
33 __asm__ __volatile__("lock; cmpxchgl %1,%2"
34 : "=a" (prev)
35 : "q" (new_value), "m" (*ptr), "0" (old_value)
36 : "memory");
37 return prev;
38 }
39
NoBarrier_AtomicExchange(volatile Atomic32 * ptr,Atomic32 new_value)40 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
41 Atomic32 new_value) {
42 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
43 : "=r" (new_value)
44 : "m" (*ptr), "0" (new_value)
45 : "memory");
46 return new_value; // Now it's the previous value.
47 }
48
NoBarrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)49 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
50 Atomic32 increment) {
51 Atomic32 temp = increment;
52 __asm__ __volatile__("lock; xaddl %0,%1"
53 : "+r" (temp), "+m" (*ptr)
54 : : "memory");
55 // temp now holds the old value of *ptr
56 return temp + increment;
57 }
58
Barrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)59 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
60 Atomic32 increment) {
61 Atomic32 temp = increment;
62 __asm__ __volatile__("lock; xaddl %0,%1"
63 : "+r" (temp), "+m" (*ptr)
64 : : "memory");
65 // temp now holds the old value of *ptr
66 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
67 __asm__ __volatile__("lfence" : : : "memory");
68 }
69 return temp + increment;
70 }
71
Acquire_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)72 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
73 Atomic32 old_value,
74 Atomic32 new_value) {
75 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
76 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
77 __asm__ __volatile__("lfence" : : : "memory");
78 }
79 return x;
80 }
81
Release_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)82 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
83 Atomic32 old_value,
84 Atomic32 new_value) {
85 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
86 }
87
NoBarrier_Store(volatile Atomic32 * ptr,Atomic32 value)88 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
89 *ptr = value;
90 }
91
92 #if defined(__x86_64__)
93
94 // 64-bit implementations of memory barrier can be simpler, because it
95 // "mfence" is guaranteed to exist.
MemoryBarrier()96 inline void MemoryBarrier() {
97 __asm__ __volatile__("mfence" : : : "memory");
98 }
99
Acquire_Store(volatile Atomic32 * ptr,Atomic32 value)100 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
101 *ptr = value;
102 MemoryBarrier();
103 }
104
105 #else
106
MemoryBarrier()107 inline void MemoryBarrier() {
108 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
109 __asm__ __volatile__("mfence" : : : "memory");
110 } else { // mfence is faster but not present on PIII
111 Atomic32 x = 0;
112 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
113 }
114 }
115
Acquire_Store(volatile Atomic32 * ptr,Atomic32 value)116 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
117 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
118 *ptr = value;
119 __asm__ __volatile__("mfence" : : : "memory");
120 } else {
121 NoBarrier_AtomicExchange(ptr, value);
122 // acts as a barrier on PIII
123 }
124 }
125 #endif
126
Release_Store(volatile Atomic32 * ptr,Atomic32 value)127 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
128 ATOMICOPS_COMPILER_BARRIER();
129 *ptr = value; // An x86 store acts as a release barrier.
130 // See comments in Atomic64 version of Release_Store(), below.
131 }
132
NoBarrier_Load(volatile const Atomic32 * ptr)133 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
134 return *ptr;
135 }
136
Acquire_Load(volatile const Atomic32 * ptr)137 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
138 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
139 // See comments in Atomic64 version of Release_Store(), below.
140 ATOMICOPS_COMPILER_BARRIER();
141 return value;
142 }
143
Release_Load(volatile const Atomic32 * ptr)144 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
145 MemoryBarrier();
146 return *ptr;
147 }
148
149 #if defined(__x86_64__)
150
151 // 64-bit low-level operations on 64-bit platform.
152
NoBarrier_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)153 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
154 Atomic64 old_value,
155 Atomic64 new_value) {
156 Atomic64 prev;
157 __asm__ __volatile__("lock; cmpxchgq %1,%2"
158 : "=a" (prev)
159 : "q" (new_value), "m" (*ptr), "0" (old_value)
160 : "memory");
161 return prev;
162 }
163
NoBarrier_AtomicExchange(volatile Atomic64 * ptr,Atomic64 new_value)164 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
165 Atomic64 new_value) {
166 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
167 : "=r" (new_value)
168 : "m" (*ptr), "0" (new_value)
169 : "memory");
170 return new_value; // Now it's the previous value.
171 }
172
NoBarrier_AtomicIncrement(volatile Atomic64 * ptr,Atomic64 increment)173 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
174 Atomic64 increment) {
175 Atomic64 temp = increment;
176 __asm__ __volatile__("lock; xaddq %0,%1"
177 : "+r" (temp), "+m" (*ptr)
178 : : "memory");
179 // temp now contains the previous value of *ptr
180 return temp + increment;
181 }
182
Barrier_AtomicIncrement(volatile Atomic64 * ptr,Atomic64 increment)183 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
184 Atomic64 increment) {
185 Atomic64 temp = increment;
186 __asm__ __volatile__("lock; xaddq %0,%1"
187 : "+r" (temp), "+m" (*ptr)
188 : : "memory");
189 // temp now contains the previous value of *ptr
190 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
191 __asm__ __volatile__("lfence" : : : "memory");
192 }
193 return temp + increment;
194 }
195
NoBarrier_Store(volatile Atomic64 * ptr,Atomic64 value)196 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
197 *ptr = value;
198 }
199
Acquire_Store(volatile Atomic64 * ptr,Atomic64 value)200 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
201 *ptr = value;
202 MemoryBarrier();
203 }
204
Release_Store(volatile Atomic64 * ptr,Atomic64 value)205 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
206 ATOMICOPS_COMPILER_BARRIER();
207
208 *ptr = value; // An x86 store acts as a release barrier
209 // for current AMD/Intel chips as of Jan 2008.
210 // See also Acquire_Load(), below.
211
212 // When new chips come out, check:
213 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
214 // System Programming Guide, Chatper 7: Multiple-processor management,
215 // Section 7.2, Memory Ordering.
216 // Last seen at:
217 // http://developer.intel.com/design/pentium4/manuals/index_new.htm
218 //
219 // x86 stores/loads fail to act as barriers for a few instructions (clflush
220 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
221 // not generated by the compiler, and are rare. Users of these instructions
222 // need to know about cache behaviour in any case since all of these involve
223 // either flushing cache lines or non-temporal cache hints.
224 }
225
NoBarrier_Load(volatile const Atomic64 * ptr)226 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
227 return *ptr;
228 }
229
Acquire_Load(volatile const Atomic64 * ptr)230 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
231 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
232 // for current AMD/Intel chips as of Jan 2008.
233 // See also Release_Store(), above.
234 ATOMICOPS_COMPILER_BARRIER();
235 return value;
236 }
237
Release_Load(volatile const Atomic64 * ptr)238 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
239 MemoryBarrier();
240 return *ptr;
241 }
242
Acquire_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)243 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
244 Atomic64 old_value,
245 Atomic64 new_value) {
246 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
247 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
248 __asm__ __volatile__("lfence" : : : "memory");
249 }
250 return x;
251 }
252
Release_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)253 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
254 Atomic64 old_value,
255 Atomic64 new_value) {
256 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
257 }
258
259 #endif // defined(__x86_64__)
260
261 } // namespace base::subtle
262 } // namespace base
263
264 #undef ATOMICOPS_COMPILER_BARRIER
265
266 #endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
267