• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This file is an internal atomic implementation, use base/atomicops.h instead.
6 
7 #ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
8 #define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
9 #pragma once
10 
11 // This struct is not part of the public API of this module; clients may not
12 // use it.
13 // Features of this x86.  Values may not be correct before main() is run,
14 // but are set conservatively.
15 struct AtomicOps_x86CPUFeatureStruct {
16   bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
17                             // after acquire compare-and-swap.
18   bool has_sse2;            // Processor has SSE2.
19 };
20 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
21 
22 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
23 
24 namespace base {
25 namespace subtle {
26 
27 // 32-bit low-level operations on any platform.
28 
NoBarrier_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)29 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
30                                          Atomic32 old_value,
31                                          Atomic32 new_value) {
32   Atomic32 prev;
33   __asm__ __volatile__("lock; cmpxchgl %1,%2"
34                        : "=a" (prev)
35                        : "q" (new_value), "m" (*ptr), "0" (old_value)
36                        : "memory");
37   return prev;
38 }
39 
NoBarrier_AtomicExchange(volatile Atomic32 * ptr,Atomic32 new_value)40 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
41                                          Atomic32 new_value) {
42   __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
43                        : "=r" (new_value)
44                        : "m" (*ptr), "0" (new_value)
45                        : "memory");
46   return new_value;  // Now it's the previous value.
47 }
48 
NoBarrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)49 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
50                                           Atomic32 increment) {
51   Atomic32 temp = increment;
52   __asm__ __volatile__("lock; xaddl %0,%1"
53                        : "+r" (temp), "+m" (*ptr)
54                        : : "memory");
55   // temp now holds the old value of *ptr
56   return temp + increment;
57 }
58 
Barrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)59 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
60                                         Atomic32 increment) {
61   Atomic32 temp = increment;
62   __asm__ __volatile__("lock; xaddl %0,%1"
63                        : "+r" (temp), "+m" (*ptr)
64                        : : "memory");
65   // temp now holds the old value of *ptr
66   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
67     __asm__ __volatile__("lfence" : : : "memory");
68   }
69   return temp + increment;
70 }
71 
Acquire_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)72 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
73                                        Atomic32 old_value,
74                                        Atomic32 new_value) {
75   Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
76   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
77     __asm__ __volatile__("lfence" : : : "memory");
78   }
79   return x;
80 }
81 
Release_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)82 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
83                                        Atomic32 old_value,
84                                        Atomic32 new_value) {
85   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
86 }
87 
NoBarrier_Store(volatile Atomic32 * ptr,Atomic32 value)88 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
89   *ptr = value;
90 }
91 
92 #if defined(__x86_64__)
93 
94 // 64-bit implementations of memory barrier can be simpler, because it
95 // "mfence" is guaranteed to exist.
MemoryBarrier()96 inline void MemoryBarrier() {
97   __asm__ __volatile__("mfence" : : : "memory");
98 }
99 
Acquire_Store(volatile Atomic32 * ptr,Atomic32 value)100 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
101   *ptr = value;
102   MemoryBarrier();
103 }
104 
105 #else
106 
MemoryBarrier()107 inline void MemoryBarrier() {
108   if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
109     __asm__ __volatile__("mfence" : : : "memory");
110   } else { // mfence is faster but not present on PIII
111     Atomic32 x = 0;
112     NoBarrier_AtomicExchange(&x, 0);  // acts as a barrier on PIII
113   }
114 }
115 
Acquire_Store(volatile Atomic32 * ptr,Atomic32 value)116 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
117   if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
118     *ptr = value;
119     __asm__ __volatile__("mfence" : : : "memory");
120   } else {
121     NoBarrier_AtomicExchange(ptr, value);
122                           // acts as a barrier on PIII
123   }
124 }
125 #endif
126 
Release_Store(volatile Atomic32 * ptr,Atomic32 value)127 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
128   ATOMICOPS_COMPILER_BARRIER();
129   *ptr = value; // An x86 store acts as a release barrier.
130   // See comments in Atomic64 version of Release_Store(), below.
131 }
132 
NoBarrier_Load(volatile const Atomic32 * ptr)133 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
134   return *ptr;
135 }
136 
Acquire_Load(volatile const Atomic32 * ptr)137 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
138   Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
139   // See comments in Atomic64 version of Release_Store(), below.
140   ATOMICOPS_COMPILER_BARRIER();
141   return value;
142 }
143 
Release_Load(volatile const Atomic32 * ptr)144 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
145   MemoryBarrier();
146   return *ptr;
147 }
148 
149 #if defined(__x86_64__)
150 
151 // 64-bit low-level operations on 64-bit platform.
152 
NoBarrier_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)153 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
154                                          Atomic64 old_value,
155                                          Atomic64 new_value) {
156   Atomic64 prev;
157   __asm__ __volatile__("lock; cmpxchgq %1,%2"
158                        : "=a" (prev)
159                        : "q" (new_value), "m" (*ptr), "0" (old_value)
160                        : "memory");
161   return prev;
162 }
163 
NoBarrier_AtomicExchange(volatile Atomic64 * ptr,Atomic64 new_value)164 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
165                                          Atomic64 new_value) {
166   __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
167                        : "=r" (new_value)
168                        : "m" (*ptr), "0" (new_value)
169                        : "memory");
170   return new_value;  // Now it's the previous value.
171 }
172 
NoBarrier_AtomicIncrement(volatile Atomic64 * ptr,Atomic64 increment)173 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
174                                           Atomic64 increment) {
175   Atomic64 temp = increment;
176   __asm__ __volatile__("lock; xaddq %0,%1"
177                        : "+r" (temp), "+m" (*ptr)
178                        : : "memory");
179   // temp now contains the previous value of *ptr
180   return temp + increment;
181 }
182 
Barrier_AtomicIncrement(volatile Atomic64 * ptr,Atomic64 increment)183 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
184                                         Atomic64 increment) {
185   Atomic64 temp = increment;
186   __asm__ __volatile__("lock; xaddq %0,%1"
187                        : "+r" (temp), "+m" (*ptr)
188                        : : "memory");
189   // temp now contains the previous value of *ptr
190   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
191     __asm__ __volatile__("lfence" : : : "memory");
192   }
193   return temp + increment;
194 }
195 
NoBarrier_Store(volatile Atomic64 * ptr,Atomic64 value)196 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
197   *ptr = value;
198 }
199 
Acquire_Store(volatile Atomic64 * ptr,Atomic64 value)200 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
201   *ptr = value;
202   MemoryBarrier();
203 }
204 
Release_Store(volatile Atomic64 * ptr,Atomic64 value)205 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
206   ATOMICOPS_COMPILER_BARRIER();
207 
208   *ptr = value; // An x86 store acts as a release barrier
209                 // for current AMD/Intel chips as of Jan 2008.
210                 // See also Acquire_Load(), below.
211 
212   // When new chips come out, check:
213   //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
214   //  System Programming Guide, Chatper 7: Multiple-processor management,
215   //  Section 7.2, Memory Ordering.
216   // Last seen at:
217   //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
218   //
219   // x86 stores/loads fail to act as barriers for a few instructions (clflush
220   // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
221   // not generated by the compiler, and are rare.  Users of these instructions
222   // need to know about cache behaviour in any case since all of these involve
223   // either flushing cache lines or non-temporal cache hints.
224 }
225 
NoBarrier_Load(volatile const Atomic64 * ptr)226 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
227   return *ptr;
228 }
229 
Acquire_Load(volatile const Atomic64 * ptr)230 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
231   Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
232                          // for current AMD/Intel chips as of Jan 2008.
233                          // See also Release_Store(), above.
234   ATOMICOPS_COMPILER_BARRIER();
235   return value;
236 }
237 
Release_Load(volatile const Atomic64 * ptr)238 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
239   MemoryBarrier();
240   return *ptr;
241 }
242 
Acquire_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)243 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
244                                        Atomic64 old_value,
245                                        Atomic64 new_value) {
246   Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
247   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
248     __asm__ __volatile__("lfence" : : : "memory");
249   }
250   return x;
251 }
252 
Release_CompareAndSwap(volatile Atomic64 * ptr,Atomic64 old_value,Atomic64 new_value)253 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
254                                        Atomic64 old_value,
255                                        Atomic64 new_value) {
256   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
257 }
258 
259 #endif  // defined(__x86_64__)
260 
261 } // namespace base::subtle
262 } // namespace base
263 
264 #undef ATOMICOPS_COMPILER_BARRIER
265 
266 #endif  // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
267