• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This file is an internal atomic implementation, use atomicops.h instead.
6 //
7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
8 
9 #ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
10 #define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
11 
12 #if defined(__QNXNTO__)
13 #include <sys/cpuinline.h>
14 #endif
15 
16 namespace v8 {
17 namespace base {
18 
19 // Memory barriers on ARM are funky, but the kernel is here to help:
20 //
21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at
22 //   all on this architecture, or when targeting its machine code.
23 //
24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
25 //   writing a random value to a very specific coprocessor register.
26 //
27 // * On ARMv7, the "dmb" instruction is used to perform a full memory
28 //   barrier (though writing to the co-processor will still work).
29 //   However, on single core devices (e.g. Nexus One, or Nexus S),
30 //   this instruction will take up to 200 ns, which is huge, even though
31 //   it's completely un-needed on these devices.
32 //
33 // * There is no easy way to determine at runtime if the device is
34 //   single or multi-core. However, the kernel provides a useful helper
35 //   function at a fixed memory address (0xffff0fa0), which will always
36 //   perform a memory barrier in the most efficient way. I.e. on single
37 //   core devices, this is an empty function that exits immediately.
38 //   On multi-core devices, it implements a full memory barrier.
39 //
40 // * This source could be compiled to ARMv5 machine code that runs on a
41 //   multi-core ARMv6 or ARMv7 device. In this case, memory barriers
42 //   are needed for correct execution. Always call the kernel helper, even
43 //   when targeting ARMv5TE.
44 //
45 
MemoryBarrier()46 inline void MemoryBarrier() {
47 #if defined(__ANDROID__)
48   // Note: This is a function call, which is also an implicit compiler barrier.
49   typedef void (*KernelMemoryBarrierFunc)();
50   ((KernelMemoryBarrierFunc)0xffff0fa0)();
51 #elif defined(__QNXNTO__)
52   __cpu_membarrier();
53 #else
54   // Fallback to GCC built-in function
55   __sync_synchronize();
56 #endif
57 }
58 
59 // An ARM toolchain would only define one of these depending on which
60 // variant of the target architecture is being used. This tests against
61 // any known ARMv6 or ARMv7 variant, where it is possible to directly
62 // use ldrex/strex instructions to implement fast atomic operations.
63 #if defined(__ARM_ARCH_8A__) || \
64     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) ||  \
65     defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
66     defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) ||  \
67     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
68     defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
69 
NoBarrier_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)70 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
71                                          Atomic32 old_value,
72                                          Atomic32 new_value) {
73   Atomic32 prev_value;
74   int reloop;
75   do {
76     // The following is equivalent to:
77     //
78     //   prev_value = LDREX(ptr)
79     //   reloop = 0
80     //   if (prev_value != old_value)
81     //      reloop = STREX(ptr, new_value)
82     __asm__ __volatile__("    ldrex %0, [%3]\n"
83                          "    mov %1, #0\n"
84                          "    cmp %0, %4\n"
85 #ifdef __thumb2__
86                          "    it eq\n"
87 #endif
88                          "    strexeq %1, %5, [%3]\n"
89                          : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
90                          : "r"(ptr), "r"(old_value), "r"(new_value)
91                          : "cc", "memory");
92   } while (reloop != 0);
93   return prev_value;
94 }
95 
Acquire_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)96 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
97                                        Atomic32 old_value,
98                                        Atomic32 new_value) {
99   Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
100   MemoryBarrier();
101   return result;
102 }
103 
Release_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)104 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
105                                        Atomic32 old_value,
106                                        Atomic32 new_value) {
107   MemoryBarrier();
108   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
109 }
110 
NoBarrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)111 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
112                                           Atomic32 increment) {
113   Atomic32 value;
114   int reloop;
115   do {
116     // Equivalent to:
117     //
118     //  value = LDREX(ptr)
119     //  value += increment
120     //  reloop = STREX(ptr, value)
121     //
122     __asm__ __volatile__("    ldrex %0, [%3]\n"
123                          "    add %0, %0, %4\n"
124                          "    strex %1, %0, [%3]\n"
125                          : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
126                          : "r"(ptr), "r"(increment)
127                          : "cc", "memory");
128   } while (reloop);
129   return value;
130 }
131 
Barrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)132 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
133                                         Atomic32 increment) {
134   // TODO(digit): Investigate if it's possible to implement this with
135   // a single MemoryBarrier() operation between the LDREX and STREX.
136   // See http://crbug.com/246514
137   MemoryBarrier();
138   Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
139   MemoryBarrier();
140   return result;
141 }
142 
NoBarrier_AtomicExchange(volatile Atomic32 * ptr,Atomic32 new_value)143 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
144                                          Atomic32 new_value) {
145   Atomic32 old_value;
146   int reloop;
147   do {
148     // old_value = LDREX(ptr)
149     // reloop = STREX(ptr, new_value)
150     __asm__ __volatile__("   ldrex %0, [%3]\n"
151                          "   strex %1, %4, [%3]\n"
152                          : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
153                          : "r"(ptr), "r"(new_value)
154                          : "cc", "memory");
155   } while (reloop != 0);
156   return old_value;
157 }
158 
159 // This tests against any known ARMv5 variant.
160 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
161       defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
162 
163 // The kernel also provides a helper function to perform an atomic
164 // compare-and-swap operation at the hard-wired address 0xffff0fc0.
165 // On ARMv5, this is implemented by a special code path that the kernel
166 // detects and treats specially when thread pre-emption happens.
167 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.
168 //
169 // Note that this always perform a full memory barrier, there is no
170 // need to add calls MemoryBarrier() before or after it. It also
171 // returns 0 on success, and 1 on exit.
172 //
173 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS
174 // use newer kernel revisions, so this should not be a concern.
175 namespace {
176 
LinuxKernelCmpxchg(Atomic32 old_value,Atomic32 new_value,volatile Atomic32 * ptr)177 inline int LinuxKernelCmpxchg(Atomic32 old_value,
178                               Atomic32 new_value,
179                               volatile Atomic32* ptr) {
180   typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*);
181   return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
182 }
183 
184 }  // namespace
185 
NoBarrier_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)186 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
187                                          Atomic32 old_value,
188                                          Atomic32 new_value) {
189   Atomic32 prev_value;
190   for (;;) {
191     prev_value = *ptr;
192     if (prev_value != old_value)
193       return prev_value;
194     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
195       return old_value;
196   }
197 }
198 
NoBarrier_AtomicExchange(volatile Atomic32 * ptr,Atomic32 new_value)199 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
200                                          Atomic32 new_value) {
201   Atomic32 old_value;
202   do {
203     old_value = *ptr;
204   } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
205   return old_value;
206 }
207 
NoBarrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)208 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
209                                           Atomic32 increment) {
210   return Barrier_AtomicIncrement(ptr, increment);
211 }
212 
Barrier_AtomicIncrement(volatile Atomic32 * ptr,Atomic32 increment)213 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
214                                         Atomic32 increment) {
215   for (;;) {
216     // Atomic exchange the old value with an incremented one.
217     Atomic32 old_value = *ptr;
218     Atomic32 new_value = old_value + increment;
219     if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
220       // The exchange took place as expected.
221       return new_value;
222     }
223     // Otherwise, *ptr changed mid-loop and we need to retry.
224   }
225 }
226 
Acquire_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)227 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
228                                        Atomic32 old_value,
229                                        Atomic32 new_value) {
230   Atomic32 prev_value;
231   for (;;) {
232     prev_value = *ptr;
233     if (prev_value != old_value) {
234       // Always ensure acquire semantics.
235       MemoryBarrier();
236       return prev_value;
237     }
238     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
239       return old_value;
240   }
241 }
242 
Release_CompareAndSwap(volatile Atomic32 * ptr,Atomic32 old_value,Atomic32 new_value)243 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
244                                        Atomic32 old_value,
245                                        Atomic32 new_value) {
246   // This could be implemented as:
247   //    MemoryBarrier();
248   //    return NoBarrier_CompareAndSwap();
249   //
250   // But would use 3 barriers per succesful CAS. To save performance,
251   // use Acquire_CompareAndSwap(). Its implementation guarantees that:
252   // - A succesful swap uses only 2 barriers (in the kernel helper).
253   // - An early return due to (prev_value != old_value) performs
254   //   a memory barrier with no store, which is equivalent to the
255   //   generic implementation above.
256   return Acquire_CompareAndSwap(ptr, old_value, new_value);
257 }
258 
259 #else
260 #  error "Your CPU's ARM architecture is not supported yet"
261 #endif
262 
263 // NOTE: Atomicity of the following load and store operations is only
264 // guaranteed in case of 32-bit alignement of |ptr| values.
265 
NoBarrier_Store(volatile Atomic32 * ptr,Atomic32 value)266 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
267   *ptr = value;
268 }
269 
Acquire_Store(volatile Atomic32 * ptr,Atomic32 value)270 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
271   *ptr = value;
272   MemoryBarrier();
273 }
274 
Release_Store(volatile Atomic32 * ptr,Atomic32 value)275 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
276   MemoryBarrier();
277   *ptr = value;
278 }
279 
NoBarrier_Load(volatile const Atomic32 * ptr)280 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
281 
Acquire_Load(volatile const Atomic32 * ptr)282 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
283   Atomic32 value = *ptr;
284   MemoryBarrier();
285   return value;
286 }
287 
Release_Load(volatile const Atomic32 * ptr)288 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
289   MemoryBarrier();
290   return *ptr;
291 }
292 
293 // Byte accessors.
294 
NoBarrier_Store(volatile Atomic8 * ptr,Atomic8 value)295 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) {
296   *ptr = value;
297 }
298 
NoBarrier_Load(volatile const Atomic8 * ptr)299 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; }
300 
301 }  // namespace base
302 }  // namespace v8
303 
304 #endif  // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
305