• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
2 
3 // Test that we are generating atomicrmw instructions, rather than
4 // compare-exchange loops for common atomic ops.  This makes a big difference
5 // on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
6 // the load and then another ll/sc in the loop, expanding to about 30
7 // instructions when it should be only 4.  It has a smaller, but still
8 // noticeable, impact on platforms like x86 and RISC-V, where there are atomic
9 // RMW instructions.
10 //
11 // We currently emit cmpxchg loops for most operations on _Bools, because
12 // they're sufficiently rare that it's not worth making sure that the semantics
13 // are correct.
14 
15 typedef int __attribute__((vector_size(16))) vector;
16 
17 _Atomic(_Bool) b;
18 _Atomic(int) i;
19 _Atomic(long long) l;
20 _Atomic(short) s;
21 _Atomic(char*) p;
22 _Atomic(float) f;
23 _Atomic(vector) v;
24 
25 // CHECK: testinc
testinc(void)26 void testinc(void)
27 {
28   // Special case for suffix bool++, sets to true and returns the old value.
29   // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
30   b++;
31   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
32   i++;
33   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
34   l++;
35   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
36   s++;
37   // Prefix increment
38   // Special case for bool: set to true and return true
39   // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
40   ++b;
41   // Currently, we have no variant of atomicrmw that returns the new value, so
42   // we have to generate an atomic add, which returns the old value, and then a
43   // non-atomic add.
44   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
45   // CHECK: add i32
46   ++i;
47   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
48   // CHECK: add i64
49   ++l;
50   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
51   // CHECK: add i16
52   ++s;
53 }
54 // CHECK: testdec
testdec(void)55 void testdec(void)
56 {
57   // CHECK: cmpxchg i8* @b
58   b--;
59   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
60   i--;
61   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
62   l--;
63   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
64   s--;
65   // CHECK: cmpxchg i8* @b
66   --b;
67   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
68   // CHECK: sub i32
69   --i;
70   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
71   // CHECK: sub i64
72   --l;
73   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
74   // CHECK: sub i16
75   --s;
76 }
77 // CHECK: testaddeq
testaddeq(void)78 void testaddeq(void)
79 {
80   // CHECK: cmpxchg i8* @b
81   // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
82   // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
83   // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
84   b += 42;
85   i += 42;
86   l += 42;
87   s += 42;
88 }
89 // CHECK: testsubeq
testsubeq(void)90 void testsubeq(void)
91 {
92   // CHECK: cmpxchg i8* @b
93   // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
94   // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
95   // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
96   b -= 42;
97   i -= 42;
98   l -= 42;
99   s -= 42;
100 }
101 // CHECK: testxoreq
testxoreq(void)102 void testxoreq(void)
103 {
104   // CHECK: cmpxchg i8* @b
105   // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
106   // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
107   // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
108   b ^= 42;
109   i ^= 42;
110   l ^= 42;
111   s ^= 42;
112 }
113 // CHECK: testoreq
testoreq(void)114 void testoreq(void)
115 {
116   // CHECK: cmpxchg i8* @b
117   // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
118   // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
119   // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
120   b |= 42;
121   i |= 42;
122   l |= 42;
123   s |= 42;
124 }
125 // CHECK: testandeq
testandeq(void)126 void testandeq(void)
127 {
128   // CHECK: cmpxchg i8* @b
129   // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
130   // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
131   // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
132   b &= 42;
133   i &= 42;
134   l &= 42;
135   s &= 42;
136 }
137 
138 // CHECK-LABEL: define arm_aapcscc void @testFloat(float*
testFloat(_Atomic (float)* fp)139 void testFloat(_Atomic(float) *fp) {
140 // CHECK:      [[FP:%.*]] = alloca float*
141 // CHECK-NEXT: [[X:%.*]] = alloca float
142 // CHECK-NEXT: [[F:%.*]] = alloca float
143 // CHECK-NEXT: [[TMP0:%.*]] = alloca float
144 // CHECK-NEXT: [[TMP1:%.*]] = alloca float
145 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
146 
147 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
148 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
149   __c11_atomic_init(fp, 1.0f);
150 
151 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
152   _Atomic(float) x = 2.0f;
153 
154 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
155 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8*
156 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8*
157 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5)
158 // CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4
159 // CHECK-NEXT: store float [[T3]], float* [[F]]
160   float f = *fp;
161 
162 // CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
163 // CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
164 // CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4
165 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8*
166 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8*
167 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5)
168   *fp = f;
169 
170 // CHECK-NEXT: ret void
171 }
172 
173 // CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]*
testComplexFloat(_Atomic (_Complex float)* fp)174 void testComplexFloat(_Atomic(_Complex float) *fp) {
175 // CHECK:      [[FP:%.*]] = alloca [[CF]]*, align 4
176 // CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8
177 // CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
178 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
179 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
180 // CHECK-NEXT: store [[CF]]*
181 
182 // CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
183 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
184 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
185 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
186 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
187   __c11_atomic_init(fp, 1.0f);
188 
189 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
190 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
191 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
192 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
193   _Atomic(_Complex float) x = 2.0f;
194 
195 // CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
196 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8*
197 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8*
198 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
199 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
200 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
201 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
202 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
203 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
204 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
205 // CHECK-NEXT: store float [[R]], float* [[T0]]
206 // CHECK-NEXT: store float [[I]], float* [[T1]]
207   _Complex float f = *fp;
208 
209 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
210 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
211 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
212 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
213 // CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
214 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
215 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
216 // CHECK-NEXT: store float [[R]], float* [[T0]]
217 // CHECK-NEXT: store float [[I]], float* [[T1]]
218 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8*
219 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8*
220 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5)
221   *fp = f;
222 
223 // CHECK-NEXT: ret void
224 }
225 
226 typedef struct { short x, y, z, w; } S;
227 // CHECK: define arm_aapcscc void @testStruct([[S:.*]]*
testStruct(_Atomic (S)* fp)228 void testStruct(_Atomic(S) *fp) {
229 // CHECK:      [[FP:%.*]] = alloca [[S]]*, align 4
230 // CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8
231 // CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
232 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
233 // CHECK-NEXT: store [[S]]*
234 
235 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
236 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
237 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
238 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
239 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
240 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
241 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
242 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
243 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
244   __c11_atomic_init(fp, (S){1,2,3,4});
245 
246 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
247 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
248 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
249 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
250 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
251 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
252 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
253 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
254   _Atomic(S) x = (S){1,2,3,4};
255 
256 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
257 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8*
258 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
259 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
260   S f = *fp;
261 
262 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
263 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
264 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
265 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
266 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8*
267 // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
268 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5)
269   *fp = f;
270 
271 // CHECK-NEXT: ret void
272 }
273 
274 typedef struct { short x, y, z; } PS;
275 // CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]*
testPromotedStruct(_Atomic (PS)* fp)276 void testPromotedStruct(_Atomic(PS) *fp) {
277 // CHECK:      [[FP:%.*]] = alloca [[APS]]*, align 4
278 // CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8
279 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
280 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
281 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
282 // CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
283 // CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
284 // CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
285 // CHECK-NEXT: store [[APS]]*
286 
287 // CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
288 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
289 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
290 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
291 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
292 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
293 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
294 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
295 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
296 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
297   __c11_atomic_init(fp, (PS){1,2,3});
298 
299 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
300 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
301 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
302 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
303 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
304 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
305 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
306 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
307 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
308   _Atomic(PS) x = (PS){1,2,3};
309 
310 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
311 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
312 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8*
313 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
314 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
315 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
316 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
317 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
318   PS f = *fp;
319 
320 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
321 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
322 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
323 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
324 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
325 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
326 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
327 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8*
328 // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8*
329 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
330   *fp = f;
331 
332 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4
333 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
334 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
335 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
336 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0
337 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
338 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
339 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
340 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0
341 // CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2
342 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
343 // CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
344   int a = ((PS)*fp).x;
345 
346 // CHECK-NEXT: ret void
347 }
348 
349 // CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
350 
351 // FIXME: none of these look right, but we can leave the "test" here
352 // to make sure they at least don't crash.
testPromotedStructOps(_Atomic (PS)* p)353 void testPromotedStructOps(_Atomic(PS) *p) {
354   PS a = __c11_atomic_load(p, 5);
355   __c11_atomic_store(p, a, 5);
356   PS b = __c11_atomic_exchange(p, a, 5);
357   _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
358   v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
359 }
360