• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0-or-later
2  * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
3  */
4 
5 /* The LTP library has some of its own atomic synchronisation primitives
6  * contained in this file. Generally speaking these should not be used
7  * directly in tests for synchronisation, instead use tst_checkpoint.h,
8  * tst_fuzzy_sync.h or the POSIX library.
9  *
10  * Notes on compile and runtime memory barriers and atomics.
11  *
12  * Within the LTP library we have three concerns when accessing variables
13  * shared by multiple threads or processes:
14  *
15  * (1) Removal or reordering of accesses by the compiler.
16  * (2) Atomicity of addition.
17  * (3) LOAD-STORE ordering between threads.
18  *
19  * The first (1) is the most likely to cause an error if not properly
20  * handled. We avoid it by using volatile variables and statements which will
21  * not be removed or reordered by the compiler during optimisation. This includes
22  * the __atomic and __sync intrinsics and volatile asm statements marked with
23  * "memory" as well as variables marked with volatile.
24  *
25  * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
26  * 32-bit integer will be atomic. However fetching and adding to a variable is
27  * quite likely not; so for (2) we need to ensure we use atomic addition.
28  *
29  * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
30  * STOREs of any shared variables (including non-atomics) that are made
31  * between calls to tst_fzsync_wait are completed (globally visible) before
32  * tst_fzsync_wait completes. For this, runtime memory and instruction
33  * barriers are required in addition to compile time.
34  *
35  * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
36  * simplicity. LTP tests tend to be syscall heavy so any performance gain from
37  * using a weaker memory model is unlikely to result in a relatively large
38  * performance improvement while at the same time being a potent source of
39  * confusion.
40  *
41  * Likewise, for the fallback ASM, the simplest "definitely will work, always"
42  * approach is preferred over anything more performant.
43  *
44  * Also see Documentation/memory-barriers.txt in the kernel tree and
45  * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
46  * terminology may vary between sources.
47  */
48 
49 #ifndef TST_ATOMIC_H__
50 #define TST_ATOMIC_H__
51 
52 #include "config.h"
53 
54 #if HAVE_ATOMIC_MEMORY_MODEL == 1
tst_atomic_add_return(int i,int * v)55 static inline int tst_atomic_add_return(int i, int *v)
56 {
57 	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
58 }
59 
tst_atomic_load(int * v)60 static inline int tst_atomic_load(int *v)
61 {
62 	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
63 }
64 
tst_atomic_store(int i,int * v)65 static inline void tst_atomic_store(int i, int *v)
66 {
67 	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
68 }
69 
70 #elif HAVE_SYNC_ADD_AND_FETCH == 1
tst_atomic_add_return(int i,int * v)71 static inline int tst_atomic_add_return(int i, int *v)
72 {
73 	return __sync_add_and_fetch(v, i);
74 }
75 
tst_atomic_load(int * v)76 static inline int tst_atomic_load(int *v)
77 {
78 	int ret;
79 
80 	__sync_synchronize();
81 	ret = *v;
82 	__sync_synchronize();
83 	return ret;
84 }
85 
tst_atomic_store(int i,int * v)86 static inline void tst_atomic_store(int i, int *v)
87 {
88 	__sync_synchronize();
89 	*v = i;
90 	__sync_synchronize();
91 }
92 
93 #elif defined(__i386__) || defined(__x86_64__)
94 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
95 
tst_atomic_add_return(int i,int * v)96 static inline int tst_atomic_add_return(int i, int *v)
97 {
98 	int __ret = i;
99 
100 	/*
101 	 * taken from arch/x86/include/asm/cmpxchg.h
102 	 */
103 	asm volatile ("lock; xaddl %0, %1\n"
104 		: "+r" (__ret), "+m" (*v) : : "memory", "cc");
105 
106 	return i + __ret;
107 }
108 
109 #elif defined(__powerpc__) || defined(__powerpc64__)
tst_atomic_add_return(int i,int * v)110 static inline int tst_atomic_add_return(int i, int *v)
111 {
112 	int t;
113 
114 	/* taken from arch/powerpc/include/asm/atomic.h */
115 	asm volatile(
116 		"	sync\n"
117 		"1:	lwarx	%0,0,%2		# atomic_add_return\n"
118 		"	add %0,%1,%0\n"
119 		"	stwcx.	%0,0,%2 \n"
120 		"	bne-	1b\n"
121 		"	sync\n"
122 		: "=&r" (t)
123 		: "r" (i), "r" (v)
124 		: "cc", "memory");
125 
126 	return t;
127 }
128 
tst_atomic_load(int * v)129 static inline int tst_atomic_load(int *v)
130 {
131 	int ret;
132 
133 	asm volatile("sync\n" : : : "memory");
134 	ret = *v;
135 	asm volatile("sync\n" : : : "memory");
136 
137 	return ret;
138 }
139 
tst_atomic_store(int i,int * v)140 static inline void tst_atomic_store(int i, int *v)
141 {
142 	asm volatile("sync\n" : : : "memory");
143 	*v = i;
144 	asm volatile("sync\n" : : : "memory");
145 }
146 
147 #elif defined(__s390__) || defined(__s390x__)
148 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
149 
tst_atomic_add_return(int i,int * v)150 static inline int tst_atomic_add_return(int i, int *v)
151 {
152 	int old_val, new_val;
153 
154 	/* taken from arch/s390/include/asm/atomic.h */
155 	asm volatile(
156 		"	l	%0,%2\n"
157 		"0:	lr	%1,%0\n"
158 		"	ar	%1,%3\n"
159 		"	cs	%0,%1,%2\n"
160 		"	jl	0b"
161 		: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
162 		: "d" (i)
163 		: "cc", "memory");
164 
165 	return old_val + i;
166 }
167 
168 #elif defined(__arc__)
169 
170 /*ARCv2 defines the smp barriers */
171 #ifdef __ARC700__
172 #define smp_mb()	asm volatile("" : : : "memory")
173 #else
174 #define smp_mb()	asm volatile("dmb 3\n" : : : "memory")
175 #endif
176 
tst_atomic_add_return(int i,int * v)177 static inline int tst_atomic_add_return(int i, int *v)
178 {
179 	unsigned int val;
180 
181 	smp_mb();
182 
183 	asm volatile(
184 		"1:	llock   %[val], [%[ctr]]	\n"
185 		"	add     %[val], %[val], %[i]	\n"
186 		"	scond   %[val], [%[ctr]]	\n"
187 		"	bnz     1b			\n"
188 		: [val]	"=&r"	(val)
189 		: [ctr]	"r"	(v),
190 		  [i]	"ir"	(i)
191 		: "cc", "memory");
192 
193 	smp_mb();
194 
195 	return val;
196 }
197 
tst_atomic_load(int * v)198 static inline int tst_atomic_load(int *v)
199 {
200 	int ret;
201 
202 	smp_mb();
203 	ret = *v;
204 	smp_mb();
205 
206 	return ret;
207 }
208 
tst_atomic_store(int i,int * v)209 static inline void tst_atomic_store(int i, int *v)
210 {
211 	smp_mb();
212 	*v = i;
213 	smp_mb();
214 }
215 
216 #elif defined (__aarch64__)
tst_atomic_add_return(int i,int * v)217 static inline int tst_atomic_add_return(int i, int *v)
218 {
219 	unsigned long tmp;
220 	int result;
221 
222 	__asm__ __volatile__(
223 "       prfm    pstl1strm, %2	\n"
224 "1:     ldaxr	%w0, %2		\n"
225 "       add	%w0, %w0, %w3	\n"
226 "       stlxr	%w1, %w0, %2	\n"
227 "       cbnz	%w1, 1b		\n"
228 "       dmb ish			\n"
229 	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
230 	: "Ir" (i)
231 	: "memory");
232 
233 	return result;
234 }
235 
236 /* We are using load and store exclusive (ldaxr & stlxr) instructions to try
237  * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
238  * functions from interfering with tst_atomic_add_return which takes advantage
239  * of exclusivity. It is not clear if this is a good idea or not, but does
240  * mean that all three functions are very similar.
241  */
tst_atomic_load(int * v)242 static inline int tst_atomic_load(int *v)
243 {
244 	int ret;
245 	unsigned long tmp;
246 
247 	asm volatile("//atomic_load			\n"
248 		"	prfm	pstl1strm,  %[v]	\n"
249 		"1:	ldaxr	%w[ret], %[v]		\n"
250 		"	stlxr   %w[tmp], %w[ret], %[v]  \n"
251 		"	cbnz    %w[tmp], 1b		\n"
252 		"	dmb ish				\n"
253 		: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
254 		: : "memory");
255 
256 	return ret;
257 }
258 
tst_atomic_store(int i,int * v)259 static inline void tst_atomic_store(int i, int *v)
260 {
261 	unsigned long tmp;
262 
263 	asm volatile("//atomic_store			\n"
264 		"	prfm	pstl1strm, %[v]		\n"
265 		"1:	ldaxr	%w[tmp], %[v]		\n"
266 		"	stlxr   %w[tmp], %w[i], %[v]	\n"
267 		"	cbnz    %w[tmp], 1b		\n"
268 		"	dmb ish				\n"
269 		: [tmp] "=&r" (tmp), [v] "+Q" (*v)
270 		: [i] "r" (i)
271 		: "memory");
272 }
273 
274 #elif defined(__sparc__) && defined(__arch64__)
275 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
tst_atomic_add_return(int i,int * v)276 static inline int tst_atomic_add_return(int i, int *v)
277 {
278 	int ret, tmp;
279 
280 	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
281 	 * function removed because we are unlikely to have a large (>= 16?)
282 	 * number of cores continuously trying to update one variable.
283 	 */
284 	asm volatile("/*atomic_add_return*/		\n"
285 		"1:	ldsw	[%[v]], %[ret];		\n"
286 		"	add	%[ret], %[i], %[tmp];	\n"
287 		"	cas	[%[v]], %[ret], %[tmp];	\n"
288 		"	cmp	%[ret], %[tmp];		\n"
289 		"	bne,pn	%%icc, 1b;		\n"
290 		"	nop;				\n"
291 		"	add	%[ret], %[i], %[ret];	\n"
292 		: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
293 		: [i] "r" (i), [v] "r" (v)
294 		: "memory", "cc");
295 
296 	return ret;
297 }
298 
299 #else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
300 # error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
301         and an LTP implementation is missing for your architecture.
302 #endif
303 
304 #ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
tst_atomic_load(int * v)305 static inline int tst_atomic_load(int *v)
306 {
307 	int ret;
308 
309 	asm volatile("" : : : "memory");
310 	ret = *v;
311 	asm volatile("" : : : "memory");
312 
313 	return ret;
314 }
315 
tst_atomic_store(int i,int * v)316 static inline void tst_atomic_store(int i, int *v)
317 {
318 	asm volatile("" : : : "memory");
319 	*v = i;
320 	asm volatile("" : : : "memory");
321 }
322 #endif
323 
tst_atomic_inc(int * v)324 static inline int tst_atomic_inc(int *v)
325 {
326 	return tst_atomic_add_return(1, v);
327 }
328 
tst_atomic_dec(int * v)329 static inline int tst_atomic_dec(int *v)
330 {
331 	return tst_atomic_add_return(-1, v);
332 }
333 
334 #endif	/* TST_ATOMIC_H__ */
335