1 /* SPDX-License-Identifier: GPL-2.0-or-later
2 * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
3 */
4
5 /* The LTP library has some of its own atomic synchronisation primitives
6 * contained in this file. Generally speaking these should not be used
7 * directly in tests for synchronisation, instead use tst_checkpoint.h,
8 * tst_fuzzy_sync.h or the POSIX library.
9 *
10 * Notes on compile and runtime memory barriers and atomics.
11 *
12 * Within the LTP library we have three concerns when accessing variables
13 * shared by multiple threads or processes:
14 *
15 * (1) Removal or reordering of accesses by the compiler.
16 * (2) Atomicity of addition.
17 * (3) LOAD-STORE ordering between threads.
18 *
19 * The first (1) is the most likely to cause an error if not properly
20 * handled. We avoid it by using volatile variables and statements which will
21 * not be removed or reordered by the compiler during optimisation. This includes
22 * the __atomic and __sync intrinsics and volatile asm statements marked with
23 * "memory" as well as variables marked with volatile.
24 *
25 * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
26 * 32-bit integer will be atomic. However fetching and adding to a variable is
27 * quite likely not; so for (2) we need to ensure we use atomic addition.
28 *
29 * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
30 * STOREs of any shared variables (including non-atomics) that are made
31 * between calls to tst_fzsync_wait are completed (globally visible) before
32 * tst_fzsync_wait completes. For this, runtime memory and instruction
33 * barriers are required in addition to compile time.
34 *
35 * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
36 * simplicity. LTP tests tend to be syscall heavy so any performance gain from
37 * using a weaker memory model is unlikely to result in a relatively large
38 * performance improvement while at the same time being a potent source of
39 * confusion.
40 *
41 * Likewise, for the fallback ASM, the simplest "definitely will work, always"
42 * approach is preferred over anything more performant.
43 *
44 * Also see Documentation/memory-barriers.txt in the kernel tree and
45 * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
46 * terminology may vary between sources.
47 */
48
49 #ifndef TST_ATOMIC_H__
50 #define TST_ATOMIC_H__
51
52 #include "config.h"
53
54 #if HAVE_ATOMIC_MEMORY_MODEL == 1
tst_atomic_add_return(int i,int * v)55 static inline int tst_atomic_add_return(int i, int *v)
56 {
57 return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
58 }
59
tst_atomic_load(int * v)60 static inline int tst_atomic_load(int *v)
61 {
62 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
63 }
64
tst_atomic_store(int i,int * v)65 static inline void tst_atomic_store(int i, int *v)
66 {
67 __atomic_store_n(v, i, __ATOMIC_SEQ_CST);
68 }
69
70 #elif HAVE_SYNC_ADD_AND_FETCH == 1
tst_atomic_add_return(int i,int * v)71 static inline int tst_atomic_add_return(int i, int *v)
72 {
73 return __sync_add_and_fetch(v, i);
74 }
75
tst_atomic_load(int * v)76 static inline int tst_atomic_load(int *v)
77 {
78 int ret;
79
80 __sync_synchronize();
81 ret = *v;
82 __sync_synchronize();
83 return ret;
84 }
85
tst_atomic_store(int i,int * v)86 static inline void tst_atomic_store(int i, int *v)
87 {
88 __sync_synchronize();
89 *v = i;
90 __sync_synchronize();
91 }
92
93 #elif defined(__i386__) || defined(__x86_64__)
94 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
95
tst_atomic_add_return(int i,int * v)96 static inline int tst_atomic_add_return(int i, int *v)
97 {
98 int __ret = i;
99
100 /*
101 * taken from arch/x86/include/asm/cmpxchg.h
102 */
103 asm volatile ("lock; xaddl %0, %1\n"
104 : "+r" (__ret), "+m" (*v) : : "memory", "cc");
105
106 return i + __ret;
107 }
108
109 #elif defined(__powerpc__) || defined(__powerpc64__)
tst_atomic_add_return(int i,int * v)110 static inline int tst_atomic_add_return(int i, int *v)
111 {
112 int t;
113
114 /* taken from arch/powerpc/include/asm/atomic.h */
115 asm volatile(
116 " sync\n"
117 "1: lwarx %0,0,%2 # atomic_add_return\n"
118 " add %0,%1,%0\n"
119 " stwcx. %0,0,%2 \n"
120 " bne- 1b\n"
121 " sync\n"
122 : "=&r" (t)
123 : "r" (i), "r" (v)
124 : "cc", "memory");
125
126 return t;
127 }
128
tst_atomic_load(int * v)129 static inline int tst_atomic_load(int *v)
130 {
131 int ret;
132
133 asm volatile("sync\n" : : : "memory");
134 ret = *v;
135 asm volatile("sync\n" : : : "memory");
136
137 return ret;
138 }
139
tst_atomic_store(int i,int * v)140 static inline void tst_atomic_store(int i, int *v)
141 {
142 asm volatile("sync\n" : : : "memory");
143 *v = i;
144 asm volatile("sync\n" : : : "memory");
145 }
146
147 #elif defined(__s390__) || defined(__s390x__)
148 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
149
tst_atomic_add_return(int i,int * v)150 static inline int tst_atomic_add_return(int i, int *v)
151 {
152 int old_val, new_val;
153
154 /* taken from arch/s390/include/asm/atomic.h */
155 asm volatile(
156 " l %0,%2\n"
157 "0: lr %1,%0\n"
158 " ar %1,%3\n"
159 " cs %0,%1,%2\n"
160 " jl 0b"
161 : "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
162 : "d" (i)
163 : "cc", "memory");
164
165 return old_val + i;
166 }
167
168 #elif defined(__arc__)
169
170 /*ARCv2 defines the smp barriers */
171 #ifdef __ARC700__
172 #define smp_mb() asm volatile("" : : : "memory")
173 #else
174 #define smp_mb() asm volatile("dmb 3\n" : : : "memory")
175 #endif
176
tst_atomic_add_return(int i,int * v)177 static inline int tst_atomic_add_return(int i, int *v)
178 {
179 unsigned int val;
180
181 smp_mb();
182
183 asm volatile(
184 "1: llock %[val], [%[ctr]] \n"
185 " add %[val], %[val], %[i] \n"
186 " scond %[val], [%[ctr]] \n"
187 " bnz 1b \n"
188 : [val] "=&r" (val)
189 : [ctr] "r" (v),
190 [i] "ir" (i)
191 : "cc", "memory");
192
193 smp_mb();
194
195 return val;
196 }
197
tst_atomic_load(int * v)198 static inline int tst_atomic_load(int *v)
199 {
200 int ret;
201
202 smp_mb();
203 ret = *v;
204 smp_mb();
205
206 return ret;
207 }
208
tst_atomic_store(int i,int * v)209 static inline void tst_atomic_store(int i, int *v)
210 {
211 smp_mb();
212 *v = i;
213 smp_mb();
214 }
215
216 #elif defined (__aarch64__)
tst_atomic_add_return(int i,int * v)217 static inline int tst_atomic_add_return(int i, int *v)
218 {
219 unsigned long tmp;
220 int result;
221
222 __asm__ __volatile__(
223 " prfm pstl1strm, %2 \n"
224 "1: ldaxr %w0, %2 \n"
225 " add %w0, %w0, %w3 \n"
226 " stlxr %w1, %w0, %2 \n"
227 " cbnz %w1, 1b \n"
228 " dmb ish \n"
229 : "=&r" (result), "=&r" (tmp), "+Q" (*v)
230 : "Ir" (i)
231 : "memory");
232
233 return result;
234 }
235
236 /* We are using load and store exclusive (ldaxr & stlxr) instructions to try
237 * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
238 * functions from interfering with tst_atomic_add_return which takes advantage
239 * of exclusivity. It is not clear if this is a good idea or not, but does
240 * mean that all three functions are very similar.
241 */
tst_atomic_load(int * v)242 static inline int tst_atomic_load(int *v)
243 {
244 int ret;
245 unsigned long tmp;
246
247 asm volatile("//atomic_load \n"
248 " prfm pstl1strm, %[v] \n"
249 "1: ldaxr %w[ret], %[v] \n"
250 " stlxr %w[tmp], %w[ret], %[v] \n"
251 " cbnz %w[tmp], 1b \n"
252 " dmb ish \n"
253 : [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
254 : : "memory");
255
256 return ret;
257 }
258
tst_atomic_store(int i,int * v)259 static inline void tst_atomic_store(int i, int *v)
260 {
261 unsigned long tmp;
262
263 asm volatile("//atomic_store \n"
264 " prfm pstl1strm, %[v] \n"
265 "1: ldaxr %w[tmp], %[v] \n"
266 " stlxr %w[tmp], %w[i], %[v] \n"
267 " cbnz %w[tmp], 1b \n"
268 " dmb ish \n"
269 : [tmp] "=&r" (tmp), [v] "+Q" (*v)
270 : [i] "r" (i)
271 : "memory");
272 }
273
274 #elif defined(__sparc__) && defined(__arch64__)
275 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
tst_atomic_add_return(int i,int * v)276 static inline int tst_atomic_add_return(int i, int *v)
277 {
278 int ret, tmp;
279
280 /* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
281 * function removed because we are unlikely to have a large (>= 16?)
282 * number of cores continuously trying to update one variable.
283 */
284 asm volatile("/*atomic_add_return*/ \n"
285 "1: ldsw [%[v]], %[ret]; \n"
286 " add %[ret], %[i], %[tmp]; \n"
287 " cas [%[v]], %[ret], %[tmp]; \n"
288 " cmp %[ret], %[tmp]; \n"
289 " bne,pn %%icc, 1b; \n"
290 " nop; \n"
291 " add %[ret], %[i], %[ret]; \n"
292 : [ret] "=r&" (ret), [tmp] "=r&" (tmp)
293 : [i] "r" (i), [v] "r" (v)
294 : "memory", "cc");
295
296 return ret;
297 }
298
299 #else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
300 # error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
301 and an LTP implementation is missing for your architecture.
302 #endif
303
304 #ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
tst_atomic_load(int * v)305 static inline int tst_atomic_load(int *v)
306 {
307 int ret;
308
309 asm volatile("" : : : "memory");
310 ret = *v;
311 asm volatile("" : : : "memory");
312
313 return ret;
314 }
315
tst_atomic_store(int i,int * v)316 static inline void tst_atomic_store(int i, int *v)
317 {
318 asm volatile("" : : : "memory");
319 *v = i;
320 asm volatile("" : : : "memory");
321 }
322 #endif
323
tst_atomic_inc(int * v)324 static inline int tst_atomic_inc(int *v)
325 {
326 return tst_atomic_add_return(1, v);
327 }
328
tst_atomic_dec(int * v)329 static inline int tst_atomic_dec(int *v)
330 {
331 return tst_atomic_add_return(-1, v);
332 }
333
334 #endif /* TST_ATOMIC_H__ */
335