1 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
2 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
3 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
4 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
5 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
6 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
7 #include <stdarg.h>
8
9 // CHECK-LABEL: define signext i8 @f0()
f0(void)10 char f0(void) {
11 return 0;
12 }
13
14 // CHECK-LABEL: define signext i16 @f1()
f1(void)15 short f1(void) {
16 return 0;
17 }
18
19 // CHECK-LABEL: define i32 @f2()
f2(void)20 int f2(void) {
21 return 0;
22 }
23
24 // CHECK-LABEL: define float @f3()
f3(void)25 float f3(void) {
26 return 0;
27 }
28
29 // CHECK-LABEL: define double @f4()
f4(void)30 double f4(void) {
31 return 0;
32 }
33
34 // CHECK-LABEL: define x86_fp80 @f5()
f5(void)35 long double f5(void) {
36 return 0;
37 }
38
39 // CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
f6(char a0,short a1,int a2,long long a3,void * a4)40 void f6(char a0, short a1, int a2, long long a3, void *a4) {
41 }
42
43 // CHECK-LABEL: define void @f7(i32 %a0)
44 typedef enum { A, B, C } e7;
f7(e7 a0)45 void f7(e7 a0) {
46 }
47
48 // Test merging/passing of upper eightbyte with X87 class.
49 //
50 // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
51 // CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0)
52 union u8 {
53 long double a;
54 int b;
55 };
f8_1()56 union u8 f8_1() { while (1) {} }
f8_2(union u8 a0)57 void f8_2(union u8 a0) {}
58
59 // CHECK-LABEL: define i64 @f9()
f9(void)60 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
61
62 // CHECK-LABEL: define void @f10(i64 %a0.coerce)
63 struct s10 { int a; int b; int : 0; };
f10(struct s10 a0)64 void f10(struct s10 a0) {}
65
66 // CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result)
f11()67 union { long double a; float b; } f11() { while (1) {} }
68
69 // CHECK-LABEL: define i32 @f12_0()
70 // CHECK-LABEL: define void @f12_1(i32 %a0.coerce)
71 struct s12 { int a __attribute__((aligned(16))); };
f12_0(void)72 struct s12 f12_0(void) { while (1) {} }
f12_1(struct s12 a0)73 void f12_1(struct s12 a0) {}
74
75 // Check that sret parameter is accounted for when checking available integer
76 // registers.
77 // CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f)
78
79 struct s13_0 { long long f0[3]; };
80 struct s13_1 { long long f0[2]; };
f13(int a,int b,int c,int d,struct s13_1 e,int f)81 struct s13_0 f13(int a, int b, int c, int d,
82 struct s13_1 e, int f) { while (1) {} }
83
84 // CHECK: define void @f14({{.*}}, i8 signext %X)
f14(int a,int b,int c,int d,int e,int f,char X)85 void f14(int a, int b, int c, int d, int e, int f, char X) {}
86
87 // CHECK: define void @f15({{.*}}, i8* %X)
f15(int a,int b,int c,int d,int e,int f,void * X)88 void f15(int a, int b, int c, int d, int e, int f, void *X) {}
89
90 // CHECK: define void @f16({{.*}}, float %X)
f16(float a,float b,float c,float d,float e,float f,float g,float h,float X)91 void f16(float a, float b, float c, float d, float e, float f, float g, float h,
92 float X) {}
93
94 // CHECK: define void @f17({{.*}}, x86_fp80 %X)
f17(float a,float b,float c,float d,float e,float f,float g,float h,long double X)95 void f17(float a, float b, float c, float d, float e, float f, float g, float h,
96 long double X) {}
97
98 // Check for valid coercion. The struct should be passed/returned as i32, not
99 // as i64 for better code quality.
100 // rdar://8135035
101 // CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce)
102 struct f18_s0 { int f0; };
f18(int a,struct f18_s0 f18_arg1)103 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
104
105 // Check byval alignment.
106
107 // CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x)
108 struct s19 {
109 long double a;
110 };
f19(struct s19 x)111 void f19(struct s19 x) {}
112
113 // CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
114 struct __attribute__((aligned(32))) s20 {
115 int x;
116 int y;
117 };
f20(struct s20 x)118 void f20(struct s20 x) {}
119
120 struct StringRef {
121 long x;
122 const char *Ptr;
123 };
124
125 // rdar://7375902
126 // CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1)
f21(struct StringRef S)127 const char *f21(struct StringRef S) { return S.x+S.Ptr; }
128
129 // PR7567
130 typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L;
f22(L x,L y)131 void f22(L x, L y) { }
132 // CHECK: @f22
133 // CHECK: %x = alloca{{.*}}, align 16
134 // CHECK: %y = alloca{{.*}}, align 16
135
136
137
138 // PR7714
139 struct f23S {
140 short f0;
141 unsigned f1;
142 int f2;
143 };
144
145
f23(int A,struct f23S B)146 void f23(int A, struct f23S B) {
147 // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1)
148 }
149
150 struct f24s { long a; int b; };
151
f24(struct f23S * X,struct f24s * P2)152 struct f23S f24(struct f23S *X, struct f24s *P2) {
153 return *X;
154
155 // CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2)
156 }
157
158 // rdar://8248065
159 typedef float v4f32 __attribute__((__vector_size__(16)));
f25(v4f32 X)160 v4f32 f25(v4f32 X) {
161 // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X)
162 // CHECK-NOT: alloca
163 // CHECK: alloca <4 x float>
164 // CHECK-NOT: alloca
165 // CHECK: store <4 x float> %X, <4 x float>*
166 // CHECK-NOT: store
167 // CHECK: ret <4 x float>
168 return X+X;
169 }
170
171 struct foo26 {
172 int *X;
173 float *Y;
174 };
175
f26(struct foo26 * P)176 struct foo26 f26(struct foo26 *P) {
177 // CHECK: define { i32*, float* } @f26(%struct.foo26* %P)
178 return *P;
179 }
180
181
182 struct v4f32wrapper {
183 v4f32 v;
184 };
185
f27(struct v4f32wrapper X)186 struct v4f32wrapper f27(struct v4f32wrapper X) {
187 // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce)
188 return X;
189 }
190
191 // PR22563 - We should unwrap simple structs and arrays to pass
192 // and return them in the appropriate vector registers if possible.
193
194 typedef float v8f32 __attribute__((__vector_size__(32)));
195 struct v8f32wrapper {
196 v8f32 v;
197 };
198
f27a(struct v8f32wrapper X)199 struct v8f32wrapper f27a(struct v8f32wrapper X) {
200 // AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce)
201 return X;
202 }
203
204 struct v8f32wrapper_wrapper {
205 v8f32 v[1];
206 };
207
f27b(struct v8f32wrapper_wrapper X)208 struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) {
209 // AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce)
210 return X;
211 }
212
213 // rdar://5711709
214 struct f28c {
215 double x;
216 int y;
217 };
f28(struct f28c C)218 void f28(struct f28c C) {
219 // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1)
220 }
221
222 struct f29a {
223 struct c {
224 double x;
225 int y;
226 } x[1];
227 };
228
f29a(struct f29a A)229 void f29a(struct f29a A) {
230 // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1)
231 }
232
233 // rdar://8249586
234 struct S0 { char f0[8]; char f2; char f3; char f4; };
f30(struct S0 p_4)235 void f30(struct S0 p_4) {
236 // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1)
237 }
238
239 // Pass the third element as a float when followed by tail padding.
240 // rdar://8251384
241 struct f31foo { float a, b, c; };
f31(struct f31foo X)242 float f31(struct f31foo X) {
243 // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
244 return X.c;
245 }
246
f32(_Complex float A,_Complex float B)247 _Complex float f32(_Complex float A, _Complex float B) {
248 // rdar://6379669
249 // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
250 return A+B;
251 }
252
253
254 // rdar://8357396
255 struct f33s { long x; float c,d; };
256
f33(va_list X)257 void f33(va_list X) {
258 va_arg(X, struct f33s);
259 }
260
261 typedef unsigned long long v1i64 __attribute__((__vector_size__(8)));
262
263 // rdar://8359248
264 // CHECK-LABEL: define i64 @f34(i64 %arg.coerce)
f34(v1i64 arg)265 v1i64 f34(v1i64 arg) { return arg; }
266
267
268 // rdar://8358475
269 // CHECK-LABEL: define i64 @f35(i64 %arg.coerce)
270 typedef unsigned long v1i64_2 __attribute__((__vector_size__(8)));
f35(v1i64_2 arg)271 v1i64_2 f35(v1i64_2 arg) { return arg+arg; }
272
273 // rdar://9122143
274 // CHECK: declare void @func(%struct._str* byval align 16)
275 typedef struct _str {
276 union {
277 long double a;
278 long c;
279 };
280 } str;
281
282 void func(str s);
283 str ss;
f9122143()284 void f9122143()
285 {
286 func(ss);
287 }
288
289 // CHECK-LABEL: define double @f36(double %arg.coerce)
290 typedef unsigned v2i32 __attribute((__vector_size__(8)));
f36(v2i32 arg)291 v2i32 f36(v2i32 arg) { return arg; }
292
293 // AVX: declare void @f38(<8 x float>)
294 // AVX: declare void @f37(<8 x float>)
295 // SSE: declare void @f38(%struct.s256* byval align 32)
296 // SSE: declare void @f37(<8 x float>* byval align 32)
297 typedef float __m256 __attribute__ ((__vector_size__ (32)));
298 typedef struct {
299 __m256 m;
300 } s256;
301
302 s256 x38;
303 __m256 x37;
304
305 void f38(s256 x);
306 void f37(__m256 x);
f39()307 void f39() { f38(x38); f37(x37); }
308
309 // The two next tests make sure that the struct below is passed
310 // in the same way regardless of avx being used
311
312 // CHECK: declare void @func40(%struct.t128* byval align 16)
313 typedef float __m128 __attribute__ ((__vector_size__ (16)));
314 typedef struct t128 {
315 __m128 m;
316 __m128 n;
317 } two128;
318
319 extern void func40(two128 s);
func41(two128 s)320 void func41(two128 s) {
321 func40(s);
322 }
323
324 // CHECK: declare void @func42(%struct.t128_2* byval align 16)
325 typedef struct xxx {
326 __m128 array[2];
327 } Atwo128;
328 typedef struct t128_2 {
329 Atwo128 x;
330 } SA;
331
332 extern void func42(SA s);
func43(SA s)333 void func43(SA s) {
334 func42(s);
335 }
336
337 // CHECK-LABEL: define i32 @f44
338 // CHECK: ptrtoint
339 // CHECK-NEXT: add i64 %{{[0-9]+}}, 31
340 // CHECK-NEXT: and i64 %{{[0-9]+}}, -32
341 // CHECK-NEXT: inttoptr
342 typedef int T44 __attribute((vector_size(32)));
343 struct s44 { T44 x; int y; };
f44(int i,...)344 int f44(int i, ...) {
345 __builtin_va_list ap;
346 __builtin_va_start(ap, i);
347 struct s44 s = __builtin_va_arg(ap, struct s44);
348 __builtin_va_end(ap);
349 return s.y;
350 }
351
352 // Text that vec3 returns the correct LLVM IR type.
353 // AVX-LABEL: define i32 @foo(<3 x i64> %X)
354 typedef long long3 __attribute((ext_vector_type(3)));
foo(long3 X)355 int foo(long3 X)
356 {
357 return 0;
358 }
359
360 // Make sure we don't use a varargs convention for a function without a
361 // prototype where AVX types are involved.
362 // AVX: @test45
363 // AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*)
364 int f45();
365 __m256 x45;
test45()366 void test45() { f45(x45); }
367
368 // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call
369 // lowering can't handle this case correctly because it runs after legalization.
370 // CHECK: @test46
371 // CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}})
372 typedef float v46 __attribute((vector_size(8)));
373 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
test46()374 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
375
376 // Check that we pass the struct below without using byval, which helps out
377 // codegen.
378 //
379 // CHECK: @test47
380 // CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
381 struct s47 { unsigned a; };
382 void f47(int,int,int,int,int,int,struct s47);
test47(int a,struct s47 b)383 void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }
384
385 // rdar://12723368
386 // In the following example, there are holes in T4 at the 3rd byte and the 4th
387 // byte, however, T2 does not have those holes. T4 is chosen to be the
388 // representing type for union T1, but we can't use load or store of T4 since
389 // it will skip the 3rd byte and the 4th byte.
390 // In general, Since we don't accurately represent the data fields of a union,
391 // do not use load or store of the representing llvm type for the union.
392 typedef _Complex int T2;
393 typedef _Complex char T5;
394 typedef _Complex int T7;
395 typedef struct T4 { T5 field0; T7 field1; } T4;
396 typedef union T1 { T2 field0; T4 field1; } T1;
397 extern T1 T1_retval;
test48(void)398 T1 test48(void) {
399 // CHECK: @test48
400 // CHECK: memcpy
401 // CHECK: memcpy
402 return T1_retval;
403 }
404
405 void test49_helper(double, ...);
test49(double d,double e)406 void test49(double d, double e) {
407 test49_helper(d, e);
408 }
409 // CHECK-LABEL: define void @test49(
410 // CHECK: [[T0:%.*]] = load double, double*
411 // CHECK-NEXT: [[T1:%.*]] = load double, double*
412 // CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]])
413
414 void test50_helper();
test50(double d,double e)415 void test50(double d, double e) {
416 test50_helper(d, e);
417 }
418 // CHECK-LABEL: define void @test50(
419 // CHECK: [[T0:%.*]] = load double, double*
420 // CHECK-NEXT: [[T1:%.*]] = load double, double*
421 // CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]])
422
423 struct test51_s { __uint128_t intval; };
test51(struct test51_s * s,__builtin_va_list argList)424 void test51(struct test51_s *s, __builtin_va_list argList) {
425 *s = __builtin_va_arg(argList, struct test51_s);
426 }
427
428 // CHECK-LABEL: define void @test51
429 // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16
430 // CHECK: br i1
431 // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3
432 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]]
433 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}}
434 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
435 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
436 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
437 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false)
438 // CHECK-NEXT: add i32 {{.*}}, 16
439 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
440 // CHECK-NEXT: br label
441
442 void test52_helper(int, ...);
443 __m256 x52;
test52()444 void test52() {
445 test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
446 }
447 // AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
448
test53(__m256 * m,__builtin_va_list argList)449 void test53(__m256 *m, __builtin_va_list argList) {
450 *m = __builtin_va_arg(argList, __m256);
451 }
452 // AVX-LABEL: define void @test53
453 // AVX-NOT: br i1
454 // AVX: ret void
455
456 void test54_helper(__m256, ...);
457 __m256 x54;
test54()458 void test54() {
459 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
460 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
461 }
462 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
463 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
464
465 typedef float __m512 __attribute__ ((__vector_size__ (64)));
466 typedef struct {
467 __m512 m;
468 } s512;
469
470 s512 x55;
471 __m512 x56;
472
473 // Even on AVX512, aggregates of size larger than four eightbytes have class
474 // MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
475 //
476 // CHECK: declare void @f55(%struct.s512* byval align 64)
477 void f55(s512 x);
478
479 // However, __m512 has type SSE/SSEUP on AVX512.
480 //
481 // AVX512: declare void @f56(<16 x float>)
482 // NO-AVX512: declare void @f56(<16 x float>* byval align 64)
483 void f56(__m512 x);
f57()484 void f57() { f55(x55); f56(x56); }
485
486 // Like for __m128 on AVX, check that the struct below is passed
487 // in the same way regardless of AVX512 being used.
488 //
489 // CHECK: declare void @f58(%struct.t256* byval align 32)
490 typedef struct t256 {
491 __m256 m;
492 __m256 n;
493 } two256;
494
495 extern void f58(two256 s);
f59(two256 s)496 void f59(two256 s) {
497 f58(s);
498 }
499
500 // CHECK: declare void @f60(%struct.sat256* byval align 32)
501 typedef struct at256 {
502 __m256 array[2];
503 } Atwo256;
504 typedef struct sat256 {
505 Atwo256 x;
506 } SAtwo256;
507
508 extern void f60(SAtwo256 s);
f61(SAtwo256 s)509 void f61(SAtwo256 s) {
510 f60(s);
511 }
512
513 // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
514 void f62_helper(int, ...);
515 __m512 x62;
f62()516 void f62() {
517 f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
518 }
519
520 // Like for __m256 on AVX, we always pass __m512 in memory, and don't
521 // need to use the register save area.
522 //
523 // AVX512-LABEL: define void @f63
524 // AVX512-NOT: br i1
525 // AVX512: ret void
f63(__m512 * m,__builtin_va_list argList)526 void f63(__m512 *m, __builtin_va_list argList) {
527 *m = __builtin_va_arg(argList, __m512);
528 }
529
530 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
531 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
532 void f64_helper(__m512, ...);
533 __m512 x64;
f64()534 void f64() {
535 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
536 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
537 }
538