• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -O0 | FileCheck %s
3
4define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, float %f8, float %f9, float %f10, float %f11, float %f12, float %f13, float %f14, float %f15, float %f16) #0 {
5; CHECK-LABEL: makefloat:
6; CHECK:       # %bb.0: # %entry
7; CHECK-NEXT:    pushq %rbp
8; CHECK-NEXT:    .cfi_def_cfa_offset 16
9; CHECK-NEXT:    .cfi_offset %rbp, -16
10; CHECK-NEXT:    movq %rsp, %rbp
11; CHECK-NEXT:    .cfi_def_cfa_register %rbp
12; CHECK-NEXT:    andq $-64, %rsp
13; CHECK-NEXT:    subq $256, %rsp # imm = 0x100
14; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
15; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
16; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
17; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
18; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
19; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
20; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
21; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
22; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp)
23; CHECK-NEXT:    vmovss %xmm1, {{[0-9]+}}(%rsp)
24; CHECK-NEXT:    vmovss %xmm2, {{[0-9]+}}(%rsp)
25; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%rsp)
26; CHECK-NEXT:    vmovss %xmm4, {{[0-9]+}}(%rsp)
27; CHECK-NEXT:    vmovss %xmm5, {{[0-9]+}}(%rsp)
28; CHECK-NEXT:    vmovss %xmm6, {{[0-9]+}}(%rsp)
29; CHECK-NEXT:    vmovss %xmm7, {{[0-9]+}}(%rsp)
30; CHECK-NEXT:    vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
31; CHECK-NEXT:    vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
32; CHECK-NEXT:    vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
33; CHECK-NEXT:    vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
34; CHECK-NEXT:    vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
35; CHECK-NEXT:    vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
36; CHECK-NEXT:    vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
37; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
38; CHECK-NEXT:    vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
39; CHECK-NEXT:    vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
40; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
41; CHECK-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
42; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
43; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
44; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
45; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
46; CHECK-NEXT:    vmovss %xmm15, {{[0-9]+}}(%rsp)
47; CHECK-NEXT:    vmovss %xmm14, {{[0-9]+}}(%rsp)
48; CHECK-NEXT:    vmovss %xmm13, {{[0-9]+}}(%rsp)
49; CHECK-NEXT:    vmovss %xmm12, {{[0-9]+}}(%rsp)
50; CHECK-NEXT:    vmovss %xmm11, {{[0-9]+}}(%rsp)
51; CHECK-NEXT:    vmovss %xmm10, {{[0-9]+}}(%rsp)
52; CHECK-NEXT:    vmovss %xmm9, {{[0-9]+}}(%rsp)
53; CHECK-NEXT:    vmovss %xmm8, {{[0-9]+}}(%rsp)
54; CHECK-NEXT:    vmovss %xmm7, {{[0-9]+}}(%rsp)
55; CHECK-NEXT:    vmovss %xmm6, {{[0-9]+}}(%rsp)
56; CHECK-NEXT:    vmovss %xmm5, {{[0-9]+}}(%rsp)
57; CHECK-NEXT:    vmovss %xmm4, {{[0-9]+}}(%rsp)
58; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%rsp)
59; CHECK-NEXT:    vmovss %xmm2, {{[0-9]+}}(%rsp)
60; CHECK-NEXT:    vmovss %xmm1, {{[0-9]+}}(%rsp)
61; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp)
62; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
63; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
64; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
65; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
66; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
67; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
68; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[0]
69; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
70; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
71; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
72; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
73; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
74; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
75; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0]
76; CHECK-NEXT:    # implicit-def: $ymm0
77; CHECK-NEXT:    vmovaps %xmm2, %xmm0
78; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
79; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
80; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
81; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
82; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
83; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
84; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
85; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0]
86; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
87; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
88; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
89; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
90; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3]
91; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
92; CHECK-NEXT:    vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm3[0]
93; CHECK-NEXT:    # implicit-def: $ymm0
94; CHECK-NEXT:    vmovaps %xmm3, %xmm0
95; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm2
96; CHECK-NEXT:    # implicit-def: $zmm0
97; CHECK-NEXT:    vmovaps %ymm2, %ymm0
98; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
99; CHECK-NEXT:    vmovaps %zmm0, {{[0-9]+}}(%rsp)
100; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %zmm0
101; CHECK-NEXT:    movq %rbp, %rsp
102; CHECK-NEXT:    popq %rbp
103; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
104; CHECK-NEXT:    retq
105entry:
106  %__A.addr.i = alloca float, align 4
107  %__B.addr.i = alloca float, align 4
108  %__C.addr.i = alloca float, align 4
109  %__D.addr.i = alloca float, align 4
110  %__E.addr.i = alloca float, align 4
111  %__F.addr.i = alloca float, align 4
112  %__G.addr.i = alloca float, align 4
113  %__H.addr.i = alloca float, align 4
114  %__I.addr.i = alloca float, align 4
115  %__J.addr.i = alloca float, align 4
116  %__K.addr.i = alloca float, align 4
117  %__L.addr.i = alloca float, align 4
118  %__M.addr.i = alloca float, align 4
119  %__N.addr.i = alloca float, align 4
120  %__O.addr.i = alloca float, align 4
121  %__P.addr.i = alloca float, align 4
122  %.compoundliteral.i = alloca <16 x float>, align 64
123  %f1.addr = alloca float, align 4
124  %f2.addr = alloca float, align 4
125  %f3.addr = alloca float, align 4
126  %f4.addr = alloca float, align 4
127  %f5.addr = alloca float, align 4
128  %f6.addr = alloca float, align 4
129  %f7.addr = alloca float, align 4
130  %f8.addr = alloca float, align 4
131  %f9.addr = alloca float, align 4
132  %f10.addr = alloca float, align 4
133  %f11.addr = alloca float, align 4
134  %f12.addr = alloca float, align 4
135  %f13.addr = alloca float, align 4
136  %f14.addr = alloca float, align 4
137  %f15.addr = alloca float, align 4
138  %f16.addr = alloca float, align 4
139  store float %f1, float* %f1.addr, align 4
140  store float %f2, float* %f2.addr, align 4
141  store float %f3, float* %f3.addr, align 4
142  store float %f4, float* %f4.addr, align 4
143  store float %f5, float* %f5.addr, align 4
144  store float %f6, float* %f6.addr, align 4
145  store float %f7, float* %f7.addr, align 4
146  store float %f8, float* %f8.addr, align 4
147  store float %f9, float* %f9.addr, align 4
148  store float %f10, float* %f10.addr, align 4
149  store float %f11, float* %f11.addr, align 4
150  store float %f12, float* %f12.addr, align 4
151  store float %f13, float* %f13.addr, align 4
152  store float %f14, float* %f14.addr, align 4
153  store float %f15, float* %f15.addr, align 4
154  store float %f16, float* %f16.addr, align 4
155  %0 = load float, float* %f16.addr, align 4
156  %1 = load float, float* %f15.addr, align 4
157  %2 = load float, float* %f14.addr, align 4
158  %3 = load float, float* %f13.addr, align 4
159  %4 = load float, float* %f12.addr, align 4
160  %5 = load float, float* %f11.addr, align 4
161  %6 = load float, float* %f10.addr, align 4
162  %7 = load float, float* %f9.addr, align 4
163  %8 = load float, float* %f8.addr, align 4
164  %9 = load float, float* %f7.addr, align 4
165  %10 = load float, float* %f6.addr, align 4
166  %11 = load float, float* %f5.addr, align 4
167  %12 = load float, float* %f4.addr, align 4
168  %13 = load float, float* %f3.addr, align 4
169  %14 = load float, float* %f2.addr, align 4
170  %15 = load float, float* %f1.addr, align 4
171  store float %0, float* %__A.addr.i, align 4
172  store float %1, float* %__B.addr.i, align 4
173  store float %2, float* %__C.addr.i, align 4
174  store float %3, float* %__D.addr.i, align 4
175  store float %4, float* %__E.addr.i, align 4
176  store float %5, float* %__F.addr.i, align 4
177  store float %6, float* %__G.addr.i, align 4
178  store float %7, float* %__H.addr.i, align 4
179  store float %8, float* %__I.addr.i, align 4
180  store float %9, float* %__J.addr.i, align 4
181  store float %10, float* %__K.addr.i, align 4
182  store float %11, float* %__L.addr.i, align 4
183  store float %12, float* %__M.addr.i, align 4
184  store float %13, float* %__N.addr.i, align 4
185  store float %14, float* %__O.addr.i, align 4
186  store float %15, float* %__P.addr.i, align 4
187  %16 = load float, float* %__P.addr.i, align 4
188  %vecinit.i = insertelement <16 x float> undef, float %16, i32 0
189  %17 = load float, float* %__O.addr.i, align 4
190  %vecinit1.i = insertelement <16 x float> %vecinit.i, float %17, i32 1
191  %18 = load float, float* %__N.addr.i, align 4
192  %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %18, i32 2
193  %19 = load float, float* %__M.addr.i, align 4
194  %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %19, i32 3
195  %20 = load float, float* %__L.addr.i, align 4
196  %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %20, i32 4
197  %21 = load float, float* %__K.addr.i, align 4
198  %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %21, i32 5
199  %22 = load float, float* %__J.addr.i, align 4
200  %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %22, i32 6
201  %23 = load float, float* %__I.addr.i, align 4
202  %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %23, i32 7
203  %24 = load float, float* %__H.addr.i, align 4
204  %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %24, i32 8
205  %25 = load float, float* %__G.addr.i, align 4
206  %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %25, i32 9
207  %26 = load float, float* %__F.addr.i, align 4
208  %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %26, i32 10
209  %27 = load float, float* %__E.addr.i, align 4
210  %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %27, i32 11
211  %28 = load float, float* %__D.addr.i, align 4
212  %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %28, i32 12
213  %29 = load float, float* %__C.addr.i, align 4
214  %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %29, i32 13
215  %30 = load float, float* %__B.addr.i, align 4
216  %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %30, i32 14
217  %31 = load float, float* %__A.addr.i, align 4
218  %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %31, i32 15
219  store <16 x float> %vecinit15.i, <16 x float>* %.compoundliteral.i, align 64
220  %32 = load <16 x float>, <16 x float>* %.compoundliteral.i, align 64
221  ret <16 x float> %32
222}
223