1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 // This file is an internal atomic implementation, use atomicops.h instead.
29
30 #ifndef V8_ATOMICOPS_INTERNALS_X86_GCC_H_
31 #define V8_ATOMICOPS_INTERNALS_X86_GCC_H_
32
33 // This struct is not part of the public API of this module; clients may not
34 // use it.
35 // Features of this x86. Values may not be correct before main() is run,
36 // but are set conservatively.
37 struct AtomicOps_x86CPUFeatureStruct {
38 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
39 // after acquire compare-and-swap.
40 bool has_sse2; // Processor has SSE2.
41 };
42 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
43
44 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
45
46 namespace v8 {
47 namespace internal {
48
49 // 32-bit low-level operations on any platform.
50
NoBarrier_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)51 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
52 Atomic32 old_value,
53 Atomic32 new_value) {
54 Atomic32 prev;
55 __asm__ __volatile__("lock; cmpxchgl %1,%2"
56 : "=a" (prev)
57 : "q" (new_value), "m" (*ptr), "0" (old_value)
58 : "memory");
59 return prev;
60 }
61
NoBarrier_AtomicExchange(volatile Atomic32 * ptr,Atomic32 new_value)62 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
63 Atomic32 new_value) {
64 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
65 : "=r" (new_value)
66 : "m" (*ptr), "0" (new_value)
67 : "memory");
68 return new_value; // Now it's the previous value.
69 }
70
NoBarrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)71 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
72 Atomic32 increment) {
73 Atomic32 temp = increment;
74 __asm__ __volatile__("lock; xaddl %0,%1"
75 : "+r" (temp), "+m" (*ptr)
76 : : "memory");
77 // temp now holds the old value of *ptr
78 return temp + increment;
79 }
80
Barrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)81 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
82 Atomic32 increment) {
83 Atomic32 temp = increment;
84 __asm__ __volatile__("lock; xaddl %0,%1"
85 : "+r" (temp), "+m" (*ptr)
86 : : "memory");
87 // temp now holds the old value of *ptr
88 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
89 __asm__ __volatile__("lfence" : : : "memory");
90 }
91 return temp + increment;
92 }
93
Acquire_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)94 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
95 Atomic32 old_value,
96 Atomic32 new_value) {
97 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
98 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
99 __asm__ __volatile__("lfence" : : : "memory");
100 }
101 return x;
102 }
103
Release_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)104 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
105 Atomic32 old_value,
106 Atomic32 new_value) {
107 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
108 }
109
NoBarrier_Store(volatile Atomic32 * ptr,Atomic32 value)110 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
111 *ptr = value;
112 }
113
114 #if defined(__x86_64__)
115
116 // 64-bit implementations of memory barrier can be simpler, because it
117 // "mfence" is guaranteed to exist.
MemoryBarrier()118 inline void MemoryBarrier() {
119 __asm__ __volatile__("mfence" : : : "memory");
120 }
121
Acquire_Store(volatile Atomic32 * ptr,Atomic32 value)122 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
123 *ptr = value;
124 MemoryBarrier();
125 }
126
127 #else
128
MemoryBarrier()129 inline void MemoryBarrier() {
130 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
131 __asm__ __volatile__("mfence" : : : "memory");
132 } else { // mfence is faster but not present on PIII
133 Atomic32 x = 0;
134 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
135 }
136 }
137
Acquire_Store(volatile Atomic32 * ptr,Atomic32 value)138 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
139 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
140 *ptr = value;
141 __asm__ __volatile__("mfence" : : : "memory");
142 } else {
143 NoBarrier_AtomicExchange(ptr, value);
144 // acts as a barrier on PIII
145 }
146 }
147 #endif
148
Release_Store(volatile Atomic32 * ptr,Atomic32 value)149 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
150 ATOMICOPS_COMPILER_BARRIER();
151 *ptr = value; // An x86 store acts as a release barrier.
152 // See comments in Atomic64 version of Release_Store(), below.
153 }
154
NoBarrier_Load(volatile const Atomic32 * ptr)155 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
156 return *ptr;
157 }
158
Acquire_Load(volatile const Atomic32 * ptr)159 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
160 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
161 // See comments in Atomic64 version of Release_Store(), below.
162 ATOMICOPS_COMPILER_BARRIER();
163 return value;
164 }
165
Release_Load(volatile const Atomic32 * ptr)166 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
167 MemoryBarrier();
168 return *ptr;
169 }
170
171 #if defined(__x86_64__)
172
173 // 64-bit low-level operations on 64-bit platform.
174
NoBarrier_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)175 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
176 Atomic64 old_value,
177 Atomic64 new_value) {
178 Atomic64 prev;
179 __asm__ __volatile__("lock; cmpxchgq %1,%2"
180 : "=a" (prev)
181 : "q" (new_value), "m" (*ptr), "0" (old_value)
182 : "memory");
183 return prev;
184 }
185
NoBarrier_AtomicExchange(volatile Atomic64 * ptr,Atomic64 new_value)186 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
187 Atomic64 new_value) {
188 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
189 : "=r" (new_value)
190 : "m" (*ptr), "0" (new_value)
191 : "memory");
192 return new_value; // Now it's the previous value.
193 }
194
NoBarrier_AtomicIncrement(volatile Atomic64 * ptr,Atomic64 increment)195 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
196 Atomic64 increment) {
197 Atomic64 temp = increment;
198 __asm__ __volatile__("lock; xaddq %0,%1"
199 : "+r" (temp), "+m" (*ptr)
200 : : "memory");
201 // temp now contains the previous value of *ptr
202 return temp + increment;
203 }
204
Barrier_AtomicIncrement(volatile Atomic64 * ptr,Atomic64 increment)205 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
206 Atomic64 increment) {
207 Atomic64 temp = increment;
208 __asm__ __volatile__("lock; xaddq %0,%1"
209 : "+r" (temp), "+m" (*ptr)
210 : : "memory");
211 // temp now contains the previous value of *ptr
212 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
213 __asm__ __volatile__("lfence" : : : "memory");
214 }
215 return temp + increment;
216 }
217
NoBarrier_Store(volatile Atomic64 * ptr,Atomic64 value)218 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
219 *ptr = value;
220 }
221
Acquire_Store(volatile Atomic64 * ptr,Atomic64 value)222 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
223 *ptr = value;
224 MemoryBarrier();
225 }
226
Release_Store(volatile Atomic64 * ptr,Atomic64 value)227 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
228 ATOMICOPS_COMPILER_BARRIER();
229
230 *ptr = value; // An x86 store acts as a release barrier
231 // for current AMD/Intel chips as of Jan 2008.
232 // See also Acquire_Load(), below.
233
234 // When new chips come out, check:
235 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
236 // System Programming Guide, Chatper 7: Multiple-processor management,
237 // Section 7.2, Memory Ordering.
238 // Last seen at:
239 // http://developer.intel.com/design/pentium4/manuals/index_new.htm
240 //
241 // x86 stores/loads fail to act as barriers for a few instructions (clflush
242 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
243 // not generated by the compiler, and are rare. Users of these instructions
244 // need to know about cache behaviour in any case since all of these involve
245 // either flushing cache lines or non-temporal cache hints.
246 }
247
NoBarrier_Load(volatile const Atomic64 * ptr)248 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
249 return *ptr;
250 }
251
Acquire_Load(volatile const Atomic64 * ptr)252 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
253 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
254 // for current AMD/Intel chips as of Jan 2008.
255 // See also Release_Store(), above.
256 ATOMICOPS_COMPILER_BARRIER();
257 return value;
258 }
259
Release_Load(volatile const Atomic64 * ptr)260 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
261 MemoryBarrier();
262 return *ptr;
263 }
264
Acquire_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)265 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
266 Atomic64 old_value,
267 Atomic64 new_value) {
268 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
269 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
270 __asm__ __volatile__("lfence" : : : "memory");
271 }
272 return x;
273 }
274
Release_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)275 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
276 Atomic64 old_value,
277 Atomic64 new_value) {
278 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
279 }
280
281 #endif // defined(__x86_64__)
282
283 } } // namespace v8::internal
284
285 #undef ATOMICOPS_COMPILER_BARRIER
286
287 #endif // V8_ATOMICOPS_INTERNALS_X86_GCC_H_
288