1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #ifndef BIONIC_ATOMIC_ARM_H
17 #define BIONIC_ATOMIC_ARM_H
18
19 #include <machine/cpu-features.h>
20
21 /* Some of the harware instructions used below are not available in Thumb-1
22 * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
23 * problem, we're going to use the same technique than libatomics_ops,
24 * which is to temporarily switch to ARM, do the operation, then switch
25 * back to Thumb-1.
26 *
27 * This results in two 'bx' jumps, just like a normal function call, but
28 * everything is kept inlined, avoids loading or computing the function's
29 * address, and prevents a little I-cache trashing too.
30 *
31 * However, it is highly recommended to avoid compiling any C library source
32 * file that use these functions in Thumb-1 mode.
33 *
34 * Define three helper macros to implement this:
35 */
36 #if defined(__thumb__) && !defined(__thumb2__)
37 # define __ATOMIC_SWITCH_TO_ARM \
38 "adr r3, 5f\n" \
39 "bx r3\n" \
40 ".align\n" \
41 ".arm\n" \
42 "5:\n"
43 /* note: the leading \n below is intentional */
44 # define __ATOMIC_SWITCH_TO_THUMB \
45 "\n" \
46 "adr r3, 6f\n" \
47 "bx r3\n" \
48 ".thumb" \
49 "6:\n"
50
51 # define __ATOMIC_CLOBBERS "r3" /* list of clobbered registers */
52
53 /* Warn the user that ARM mode should really be preferred! */
54 # warning Rebuilding this source file in ARM mode is highly recommended for performance!!
55
56 #else
57 # define __ATOMIC_SWITCH_TO_ARM /* nothing */
58 # define __ATOMIC_SWITCH_TO_THUMB /* nothing */
59 # define __ATOMIC_CLOBBERS /* nothing */
60 #endif
61
62
63 /* Define a full memory barrier, this is only needed if we build the
64 * platform for a multi-core device. For the record, using a 'dmb'
65 * instruction on a Nexus One device can take up to 180 ns even if
66 * it is completely un-necessary on this device.
67 *
68 * NOTE: This is where the platform and NDK headers atomic headers are
69 * going to diverge. With the NDK, we don't know if the generated
70 * code is going to run on a single or multi-core device, so we
71 * need to be cautious.
72 *
73 * Fortunately, we can use the kernel helper function that is
74 * mapped at address 0xffff0fa0 in all user process, and that
75 * provides a device-specific barrier operation.
76 *
77 * I.e. on single-core devices, the helper immediately returns,
78 * on multi-core devices, it uses "dmb" or any other means to
79 * perform a full-memory barrier.
80 *
81 * There are three cases to consider for the platform:
82 *
83 * - multi-core ARMv7-A => use the 'dmb' hardware instruction
84 * - multi-core ARMv6 => use the coprocessor
85 * - single core ARMv5TE/6/7 => do not use any hardware barrier
86 */
87 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
88
89 /* Sanity check, multi-core is only supported starting from ARMv6 */
90 # if __ARM_ARCH__ < 6
91 # error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
92 # endif
93
94 # ifdef __ARM_HAVE_DMB
95 /* For ARMv7-A, we can use the 'dmb' instruction directly */
96 __ATOMIC_INLINE__ void
__bionic_memory_barrier(void)97 __bionic_memory_barrier(void)
98 {
99 /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
100 * bother with __ATOMIC_SWITCH_TO_ARM */
101 __asm__ __volatile__ ( "dmb" : : : "memory" );
102 }
103 # else /* !__ARM_HAVE_DMB */
104 /* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
105 * which requires the use of a general-purpose register, which is slightly
106 * less efficient.
107 */
108 __ATOMIC_INLINE__ void
__bionic_memory_barrier(void)109 __bionic_memory_barrier(void)
110 {
111 __asm__ __volatile__ (
112 __SWITCH_TO_ARM
113 "mcr p15, 0, %0, c7, c10, 5"
114 __SWITCH_TO_THUMB
115 : : "r" (0) : __ATOMIC_CLOBBERS "memory");
116 }
117 # endif /* !__ARM_HAVE_DMB */
118 #else /* !ANDROID_SMP */
119 __ATOMIC_INLINE__ void
__bionic_memory_barrier(void)120 __bionic_memory_barrier(void)
121 {
122 /* A simple compiler barrier */
123 __asm__ __volatile__ ( "" : : : "memory" );
124 }
125 #endif /* !ANDROID_SMP */
126
127 /* Compare-and-swap, without any explicit barriers. Note that this functions
128 * returns 0 on success, and 1 on failure. The opposite convention is typically
129 * used on other platforms.
130 *
131 * There are two cases to consider:
132 *
133 * - ARMv6+ => use LDREX/STREX instructions
134 * - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
135 *
136 * LDREX/STREX are only available starting from ARMv6
137 */
138 #ifdef __ARM_HAVE_LDREX_STREX
139 __ATOMIC_INLINE__ int
__bionic_cmpxchg(int32_t old_value,int32_t new_value,volatile int32_t * ptr)140 __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
141 {
142 int32_t prev, status;
143 do {
144 __asm__ __volatile__ (
145 __ATOMIC_SWITCH_TO_ARM
146 "ldrex %0, [%3]\n"
147 "mov %1, #0\n"
148 "teq %0, %4\n"
149 #ifdef __thumb2__
150 "it eq\n"
151 #endif
152 "strexeq %1, %5, [%3]"
153 __ATOMIC_SWITCH_TO_THUMB
154 : "=&r" (prev), "=&r" (status), "+m"(*ptr)
155 : "r" (ptr), "Ir" (old_value), "r" (new_value)
156 : __ATOMIC_CLOBBERS "cc");
157 } while (__builtin_expect(status != 0, 0));
158 return prev != old_value;
159 }
160 # else /* !__ARM_HAVE_LDREX_STREX */
161
162 /* Use the handy kernel helper function mapped at 0xffff0fc0 */
163 typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
164
165 __ATOMIC_INLINE__ int
__kernel_cmpxchg(int32_t old_value,int32_t new_value,volatile int32_t * ptr)166 __kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
167 {
168 /* Note: the kernel function returns 0 on success too */
169 return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
170 }
171
172 __ATOMIC_INLINE__ int
__bionic_cmpxchg(int32_t old_value,int32_t new_value,volatile int32_t * ptr)173 __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
174 {
175 return __kernel_cmpxchg(old_value, new_value, ptr);
176 }
177 #endif /* !__ARM_HAVE_LDREX_STREX */
178
179 /* Swap operation, without any explicit barriers.
180 * There are again two similar cases to consider:
181 *
182 * ARMv6+ => use LDREX/STREX
183 * < ARMv6 => use SWP instead.
184 */
185 #ifdef __ARM_HAVE_LDREX_STREX
186 __ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value,volatile int32_t * ptr)187 __bionic_swap(int32_t new_value, volatile int32_t* ptr)
188 {
189 int32_t prev, status;
190 do {
191 __asm__ __volatile__ (
192 __ATOMIC_SWITCH_TO_ARM
193 "ldrex %0, [%3]\n"
194 "strex %1, %4, [%3]"
195 __ATOMIC_SWITCH_TO_THUMB
196 : "=&r" (prev), "=&r" (status), "+m" (*ptr)
197 : "r" (ptr), "r" (new_value)
198 : __ATOMIC_CLOBBERS "cc");
199 } while (__builtin_expect(status != 0, 0));
200 return prev;
201 }
202 #else /* !__ARM_HAVE_LDREX_STREX */
203 __ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value,volatile int32_t * ptr)204 __bionic_swap(int32_t new_value, volatile int32_t* ptr)
205 {
206 int32_t prev;
207 /* NOTE: SWP is available in Thumb-1 too */
208 __asm__ __volatile__ ("swp %0, %2, [%3]"
209 : "=&r" (prev), "+m" (*ptr)
210 : "r" (new_value), "r" (ptr)
211 : "cc");
212 return prev;
213 }
214 #endif /* !__ARM_HAVE_LDREX_STREX */
215
216 /* Atomic increment - without any barriers
217 * This returns the old value
218 */
219 #ifdef __ARM_HAVE_LDREX_STREX
220 __ATOMIC_INLINE__ int32_t
__bionic_atomic_inc(volatile int32_t * ptr)221 __bionic_atomic_inc(volatile int32_t* ptr)
222 {
223 int32_t prev, tmp, status;
224 do {
225 __asm__ __volatile__ (
226 __ATOMIC_SWITCH_TO_ARM
227 "ldrex %0, [%4]\n"
228 "add %1, %0, #1\n"
229 "strex %2, %1, [%4]"
230 __ATOMIC_SWITCH_TO_THUMB
231 : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
232 : "r" (ptr)
233 : __ATOMIC_CLOBBERS "cc");
234 } while (__builtin_expect(status != 0, 0));
235 return prev;
236 }
237 #else
238 __ATOMIC_INLINE__ int32_t
__bionic_atomic_inc(volatile int32_t * ptr)239 __bionic_atomic_inc(volatile int32_t* ptr)
240 {
241 int32_t prev, status;
242 do {
243 prev = *ptr;
244 status = __kernel_cmpxchg(prev, prev+1, ptr);
245 } while (__builtin_expect(status != 0, 0));
246 return prev;
247 }
248 #endif
249
250 /* Atomic decrement - without any barriers
251 * This returns the old value.
252 */
253 #ifdef __ARM_HAVE_LDREX_STREX
254 __ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t * ptr)255 __bionic_atomic_dec(volatile int32_t* ptr)
256 {
257 int32_t prev, tmp, status;
258 do {
259 __asm__ __volatile__ (
260 __ATOMIC_SWITCH_TO_ARM
261 "ldrex %0, [%4]\n"
262 "sub %1, %0, #1\n"
263 "strex %2, %1, [%4]"
264 __ATOMIC_SWITCH_TO_THUMB
265 : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
266 : "r" (ptr)
267 : __ATOMIC_CLOBBERS "cc");
268 } while (__builtin_expect(status != 0, 0));
269 return prev;
270 }
271 #else
272 __ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t * ptr)273 __bionic_atomic_dec(volatile int32_t* ptr)
274 {
275 int32_t prev, status;
276 do {
277 prev = *ptr;
278 status = __kernel_cmpxchg(prev, prev-1, ptr);
279 } while (__builtin_expect(status != 0, 0));
280 return prev;
281 }
282 #endif
283
284 #endif /* SYS_ATOMICS_ARM_H */
285