1 /*
2 * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17 /* The LTP library has some of its own atomic synchronisation primitives
18 * contained in this file. Generally speaking these should not be used
19 * directly in tests for synchronisation, instead use tst_checkpoint.h,
20 * tst_fuzzy_sync.h or the POSIX library.
21 *
22 * Notes on compile and runtime memory barriers and atomics.
23 *
24 * Within the LTP library we have three concerns when accessing variables
25 * shared by multiple threads or processes:
26 *
27 * (1) Removal or reordering of accesses by the compiler.
28 * (2) Atomicity of addition.
29 * (3) LOAD-STORE ordering between threads.
30 *
31 * The first (1) is the most likely to cause an error if not properly
32 * handled. We avoid it by using volatile variables and statements which will
33 * not be removed or reordered by the compiler during optimisation. This includes
34 * the __atomic and __sync intrinsics and volatile asm statements marked with
35 * "memory" as well as variables marked with volatile.
36 *
37 * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
38 * 32-bit integer will be atomic. However fetching and adding to a variable is
39 * quite likely not; so for (2) we need to ensure we use atomic addition.
40 *
41 * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
42 * STOREs of any shared variables (including non-atomics) that are made
43 * between calls to tst_fzsync_wait are completed (globally visible) before
44 * tst_fzsync_wait completes. For this, runtime memory and instruction
45 * barriers are required in addition to compile time.
46 *
47 * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
48 * simplicity. LTP tests tend to be syscall heavy so any performance gain from
49 * using a weaker memory model is unlikely to result in a relatively large
50 * performance improvement while at the same time being a potent source of
51 * confusion.
52 *
53 * Likewise, for the fallback ASM, the simplest "definitely will work, always"
54 * approach is preferred over anything more performant.
55 *
56 * Also see Documentation/memory-barriers.txt in the kernel tree and
57 * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
58 * terminology may vary between sources.
59 */
60
61 #ifndef TST_ATOMIC_H__
62 #define TST_ATOMIC_H__
63
64 #include "config.h"
65
66 #if HAVE_ATOMIC_MEMORY_MODEL == 1
tst_atomic_add_return(int i,int * v)67 static inline int tst_atomic_add_return(int i, int *v)
68 {
69 return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
70 }
71
tst_atomic_load(int * v)72 static inline int tst_atomic_load(int *v)
73 {
74 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
75 }
76
tst_atomic_store(int i,int * v)77 static inline void tst_atomic_store(int i, int *v)
78 {
79 __atomic_store_n(v, i, __ATOMIC_SEQ_CST);
80 }
81
82 #elif HAVE_SYNC_ADD_AND_FETCH == 1
tst_atomic_add_return(int i,int * v)83 static inline int tst_atomic_add_return(int i, int *v)
84 {
85 return __sync_add_and_fetch(v, i);
86 }
87
tst_atomic_load(int * v)88 static inline int tst_atomic_load(int *v)
89 {
90 int ret;
91
92 __sync_synchronize();
93 ret = *v;
94 __sync_synchronize();
95 return ret;
96 }
97
tst_atomic_store(int i,int * v)98 static inline void tst_atomic_store(int i, int *v)
99 {
100 __sync_synchronize();
101 *v = i;
102 __sync_synchronize();
103 }
104
105 #elif defined(__i386__) || defined(__x86_64__)
106 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
107
tst_atomic_add_return(int i,int * v)108 static inline int tst_atomic_add_return(int i, int *v)
109 {
110 int __ret = i;
111
112 /*
113 * taken from arch/x86/include/asm/cmpxchg.h
114 */
115 asm volatile ("lock; xaddl %0, %1\n"
116 : "+r" (__ret), "+m" (*v) : : "memory", "cc");
117
118 return i + __ret;
119 }
120
121 #elif defined(__powerpc__) || defined(__powerpc64__)
tst_atomic_add_return(int i,int * v)122 static inline int tst_atomic_add_return(int i, int *v)
123 {
124 int t;
125
126 /* taken from arch/powerpc/include/asm/atomic.h */
127 asm volatile(
128 " sync\n"
129 "1: lwarx %0,0,%2 # atomic_add_return\n"
130 " add %0,%1,%0\n"
131 " stwcx. %0,0,%2 \n"
132 " bne- 1b\n"
133 " sync\n"
134 : "=&r" (t)
135 : "r" (i), "r" (v)
136 : "cc", "memory");
137
138 return t;
139 }
140
tst_atomic_load(int * v)141 static inline int tst_atomic_load(int *v)
142 {
143 int ret;
144
145 asm volatile("sync\n" : : : "memory");
146 ret = *v;
147 asm volatile("sync\n" : : : "memory");
148
149 return ret;
150 }
151
tst_atomic_store(int i,int * v)152 static inline void tst_atomic_store(int i, int *v)
153 {
154 asm volatile("sync\n" : : : "memory");
155 *v = i;
156 asm volatile("sync\n" : : : "memory");
157 }
158
159 #elif defined(__s390__) || defined(__s390x__)
160 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
161
tst_atomic_add_return(int i,int * v)162 static inline int tst_atomic_add_return(int i, int *v)
163 {
164 int old_val, new_val;
165
166 /* taken from arch/s390/include/asm/atomic.h */
167 asm volatile(
168 " l %0,%2\n"
169 "0: lr %1,%0\n"
170 " ar %1,%3\n"
171 " cs %0,%1,%2\n"
172 " jl 0b"
173 : "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
174 : "d" (i)
175 : "cc", "memory");
176
177 return old_val + i;
178 }
179
180 #elif defined(__arc__)
181
182 /*ARCv2 defines the smp barriers */
183 #ifdef __ARC700__
184 #define smp_mb() asm volatile("" : : : "memory")
185 #else
186 #define smp_mb() asm volatile("dmb 3\n" : : : "memory")
187 #endif
188
tst_atomic_add_return(int i,int * v)189 static inline int tst_atomic_add_return(int i, int *v)
190 {
191 unsigned int val;
192
193 smp_mb();
194
195 asm volatile(
196 "1: llock %[val], [%[ctr]] \n"
197 " add %[val], %[val], %[i] \n"
198 " scond %[val], [%[ctr]] \n"
199 " bnz 1b \n"
200 : [val] "=&r" (val)
201 : [ctr] "r" (v),
202 [i] "ir" (i)
203 : "cc", "memory");
204
205 smp_mb();
206
207 return val;
208 }
209
tst_atomic_load(int * v)210 static inline int tst_atomic_load(int *v)
211 {
212 int ret;
213
214 smp_mb();
215 ret = *v;
216 smp_mb();
217
218 return ret;
219 }
220
tst_atomic_store(int i,int * v)221 static inline void tst_atomic_store(int i, int *v)
222 {
223 smp_mb();
224 *v = i;
225 smp_mb();
226 }
227
228 #elif defined (__aarch64__)
tst_atomic_add_return(int i,int * v)229 static inline int tst_atomic_add_return(int i, int *v)
230 {
231 unsigned long tmp;
232 int result;
233
234 __asm__ __volatile__(
235 " prfm pstl1strm, %2 \n"
236 "1: ldaxr %w0, %2 \n"
237 " add %w0, %w0, %w3 \n"
238 " stlxr %w1, %w0, %2 \n"
239 " cbnz %w1, 1b \n"
240 " dmb ish \n"
241 : "=&r" (result), "=&r" (tmp), "+Q" (*v)
242 : "Ir" (i)
243 : "memory");
244
245 return result;
246 }
247
248 /* We are using load and store exclusive (ldaxr & stlxr) instructions to try
249 * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
250 * functions from interfering with tst_atomic_add_return which takes advantage
251 * of exclusivity. It is not clear if this is a good idea or not, but does
252 * mean that all three functions are very similar.
253 */
tst_atomic_load(int * v)254 static inline int tst_atomic_load(int *v)
255 {
256 int ret;
257 unsigned long tmp;
258
259 asm volatile("//atomic_load \n"
260 " prfm pstl1strm, %[v] \n"
261 "1: ldaxr %w[ret], %[v] \n"
262 " stlxr %w[tmp], %w[ret], %[v] \n"
263 " cbnz %w[tmp], 1b \n"
264 " dmb ish \n"
265 : [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
266 : : "memory");
267
268 return ret;
269 }
270
tst_atomic_store(int i,int * v)271 static inline void tst_atomic_store(int i, int *v)
272 {
273 unsigned long tmp;
274
275 asm volatile("//atomic_store \n"
276 " prfm pstl1strm, %[v] \n"
277 "1: ldaxr %w[tmp], %[v] \n"
278 " stlxr %w[tmp], %w[i], %[v] \n"
279 " cbnz %w[tmp], 1b \n"
280 " dmb ish \n"
281 : [tmp] "=&r" (tmp), [v] "+Q" (*v)
282 : [i] "r" (i)
283 : "memory");
284 }
285
286 #elif defined(__sparc__) && defined(__arch64__)
287 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
tst_atomic_add_return(int i,int * v)288 static inline int tst_atomic_add_return(int i, int *v)
289 {
290 int ret, tmp;
291
292 /* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
293 * function removed because we are unlikely to have a large (>= 16?)
294 * number of cores continuously trying to update one variable.
295 */
296 asm volatile("/*atomic_add_return*/ \n"
297 "1: ldsw [%[v]], %[ret]; \n"
298 " add %[ret], %[i], %[tmp]; \n"
299 " cas [%[v]], %[ret], %[tmp]; \n"
300 " cmp %[ret], %[tmp]; \n"
301 " bne,pn %%icc, 1b; \n"
302 " nop; \n"
303 " add %[ret], %[i], %[ret]; \n"
304 : [ret] "=r&" (ret), [tmp] "=r&" (tmp)
305 : [i] "r" (i), [v] "r" (v)
306 : "memory", "cc");
307
308 return ret;
309 }
310
311 #else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
312 # error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
313 and an LTP implementation is missing for your architecture.
314 #endif
315
316 #ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
tst_atomic_load(int * v)317 static inline int tst_atomic_load(int *v)
318 {
319 int ret;
320
321 asm volatile("" : : : "memory");
322 ret = *v;
323 asm volatile("" : : : "memory");
324
325 return ret;
326 }
327
tst_atomic_store(int i,int * v)328 static inline void tst_atomic_store(int i, int *v)
329 {
330 asm volatile("" : : : "memory");
331 *v = i;
332 asm volatile("" : : : "memory");
333 }
334 #endif
335
tst_atomic_inc(int * v)336 static inline int tst_atomic_inc(int *v)
337 {
338 return tst_atomic_add_return(1, v);
339 }
340
tst_atomic_dec(int * v)341 static inline int tst_atomic_dec(int *v)
342 {
343 return tst_atomic_add_return(-1, v);
344 }
345
346 #endif /* TST_ATOMIC_H__ */
347