• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s
2; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4
5declare <4 x float> @ext(<4 x float>)
6@g = global <4 x float> zeroinitializer
7
8define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
9; CHECK-LABEL: @f1(
10; CHECK: entry:
11; CHECK:   %init.i0 = extractelement <4 x float> %init, i32 0
12; CHECK:   %init.i1 = extractelement <4 x float> %init, i32 1
13; CHECK:   %init.i2 = extractelement <4 x float> %init, i32 2
14; CHECK:   %init.i3 = extractelement <4 x float> %init, i32 3
15; CHECK:   br label %loop
16; CHECK: loop:
17; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
18; CHECK:   %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
19; CHECK:   %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
20; CHECK:   %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
21; CHECK:   %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
22; CHECK:   %nexti = sub i32 %i, 1
23; CHECK:   %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
24; CHECK:   %ptr.i0 = bitcast <4 x float>* %ptr to float*
25; CHECK:   %val.i0 = load float, float* %ptr.i0, align 16
26; CHECK:   %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
27; CHECK:   %val.i1 = load float, float* %ptr.i1, align 4
28; CHECK:   %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
29; CHECK:   %val.i2 = load float, float* %ptr.i2, align 8
30; CHECK:   %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
31; CHECK:   %val.i3 = load float, float* %ptr.i3, align 4
32; CHECK:   %add.i0 = fadd float %val.i0, %val.i2
33; CHECK:   %add.i1 = fadd float %val.i1, %val.i3
34; CHECK:   %add.i2 = fadd float %acc.i0, %acc.i2
35; CHECK:   %add.i3 = fadd float %acc.i1, %acc.i3
36; CHECK:   %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
37; CHECK:   %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
38; CHECK:   %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
39; CHECK:   %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
40; CHECK:   %call = call <4 x float> @ext(<4 x float> %add)
41; CHECK:   %call.i0 = extractelement <4 x float> %call, i32 0
42; CHECK:   %cmp.i0 = fcmp ogt float %call.i0, 1.0
43; CHECK:   %call.i1 = extractelement <4 x float> %call, i32 1
44; CHECK:   %cmp.i1 = fcmp ogt float %call.i1, 2.0
45; CHECK:   %call.i2 = extractelement <4 x float> %call, i32 2
46; CHECK:   %cmp.i2 = fcmp ogt float %call.i2, 3.0
47; CHECK:   %call.i3 = extractelement <4 x float> %call, i32 3
48; CHECK:   %cmp.i3 = fcmp ogt float %call.i3, 4.0
49; CHECK:   %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
50; CHECK:   %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
51; CHECK:   %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
52; CHECK:   %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
53; CHECK:   store float %sel.i0, float* %ptr.i0
54; CHECK:   store float %sel.i1, float* %ptr.i1
55; CHECK:   store float %sel.i2, float* %ptr.i2
56; CHECK:   store float %sel.i3, float* %ptr.i3
57; CHECK:   %test = icmp eq i32 %nexti, 0
58; CHECK:   br i1 %test, label %loop, label %exit
59; CHECK: exit:
60; CHECK:   ret void
61entry:
62  br label %loop
63
64loop:
65  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
66  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
67  %nexti = sub i32 %i, 1
68
69  %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
70  %val = load <4 x float> , <4 x float> *%ptr
71  %dval = bitcast <4 x float> %val to <2 x double>
72  %dacc = bitcast <4 x float> %acc to <2 x double>
73  %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
74                            <2 x i32> <i32 0, i32 2>
75  %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
76                            <2 x i32> <i32 1, i32 3>
77  %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
78  %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
79  %add = fadd <4 x float> %f1, %f2
80  %call = call <4 x float> @ext(<4 x float> %add)
81  %cmp = fcmp ogt <4 x float> %call,
82                  <float 1.0, float 2.0, float 3.0, float 4.0>
83  %sel = select <4 x i1> %cmp, <4 x float> %call,
84                <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
85  store <4 x float> %sel, <4 x float> *%ptr
86
87  %test = icmp eq i32 %nexti, 0
88  br i1 %test, label %loop, label %exit
89
90exit:
91  ret void
92}
93
94define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
95; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
96; CHECK: entry:
97; CHECK:   %init.i0 = extractelement <4 x i32> %init, i32 0
98; CHECK:   %init.i1 = extractelement <4 x i32> %init, i32 1
99; CHECK:   %init.i2 = extractelement <4 x i32> %init, i32 2
100; CHECK:   %init.i3 = extractelement <4 x i32> %init, i32 3
101; CHECK:   br label %loop
102; CHECK: loop:
103; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
104; CHECK:   %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
105; CHECK:   %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
106; CHECK:   %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
107; CHECK:   %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
108; CHECK:   %nexti = sub i32 %i, 1
109; CHECK:   %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i
110; CHECK:   %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
111; CHECK:   %val.i0 = load i8, i8* %ptr.i0, align 4
112; CHECK:   %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1
113; CHECK:   %val.i1 = load i8, i8* %ptr.i1, align 1
114; CHECK:   %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2
115; CHECK:   %val.i2 = load i8, i8* %ptr.i2, align 2
116; CHECK:   %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3
117; CHECK:   %val.i3 = load i8, i8* %ptr.i3, align 1
118; CHECK:   %ext.i0 = sext i8 %val.i0 to i32
119; CHECK:   %ext.i1 = sext i8 %val.i1 to i32
120; CHECK:   %ext.i2 = sext i8 %val.i2 to i32
121; CHECK:   %ext.i3 = sext i8 %val.i3 to i32
122; CHECK:   %add.i0 = add i32 %ext.i0, %acc.i0
123; CHECK:   %add.i1 = add i32 %ext.i1, %acc.i1
124; CHECK:   %add.i2 = add i32 %ext.i2, %acc.i2
125; CHECK:   %add.i3 = add i32 %ext.i3, %acc.i3
126; CHECK:   %cmp.i0 = icmp slt i32 %add.i0, -10
127; CHECK:   %cmp.i1 = icmp slt i32 %add.i1, -11
128; CHECK:   %cmp.i2 = icmp slt i32 %add.i2, -12
129; CHECK:   %cmp.i3 = icmp slt i32 %add.i3, -13
130; CHECK:   %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
131; CHECK:   %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
132; CHECK:   %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
133; CHECK:   %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
134; CHECK:   %trunc.i0 = trunc i32 %sel.i0 to i8
135; CHECK:   %trunc.i1 = trunc i32 %sel.i1 to i8
136; CHECK:   %trunc.i2 = trunc i32 %sel.i2 to i8
137; CHECK:   %trunc.i3 = trunc i32 %sel.i3 to i8
138; CHECK:   store i8 %trunc.i0, i8* %ptr.i0, align 4
139; CHECK:   store i8 %trunc.i1, i8* %ptr.i1, align 1
140; CHECK:   store i8 %trunc.i2, i8* %ptr.i2, align 2
141; CHECK:   store i8 %trunc.i3, i8* %ptr.i3, align 1
142; CHECK:   %test = icmp eq i32 %nexti, 0
143; CHECK:   br i1 %test, label %loop, label %exit
144; CHECK: exit:
145; CHECK:   ret void
146entry:
147  br label %loop
148
149loop:
150  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
151  %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
152  %nexti = sub i32 %i, 1
153
154  %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i
155  %val = load <4 x i8> , <4 x i8> *%ptr
156  %ext = sext <4 x i8> %val to <4 x i32>
157  %add = add <4 x i32> %ext, %acc
158  %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
159  %single = insertelement <4 x i32> undef, i32 %i, i32 0
160  %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
161                         <4 x i32> zeroinitializer
162  %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
163  %trunc = trunc <4 x i32> %sel to <4 x i8>
164  store <4 x i8> %trunc, <4 x i8> *%ptr
165
166  %test = icmp eq i32 %nexti, 0
167  br i1 %test, label %loop, label %exit
168
169exit:
170  ret void
171}
172
173; Check that !tbaa information is preserved.
174define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
175; CHECK-LABEL: @f3(
176; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
177; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]]
178; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]]
179; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]]
180; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
181; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
182; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
183; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
184; CHECK: ret void
185  %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
186  %add = add <4 x i32> %val, %val
187  store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
188  ret void
189}
190
191; Check that !tbaa.struct information is preserved.
192define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
193; CHECK-LABEL: @f4(
194; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
195; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
196; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
197; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
198; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
199; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
200; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
201; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
202; CHECK: ret void
203  %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
204  %add = add <4 x i32> %val, %val
205  store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
206  ret void
207}
208
209; Check that llvm.access.group information is preserved.
210define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
211; CHECK-LABEL: @f5(
212; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.access.group ![[TAG:[0-9]*]]
213; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.access.group ![[TAG]]
214; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.access.group ![[TAG]]
215; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.access.group ![[TAG]]
216; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.access.group ![[TAG]]
217; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.access.group ![[TAG]]
218; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.access.group ![[TAG]]
219; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.access.group ![[TAG]]
220; CHECK: ret void
221entry:
222  br label %loop
223
224loop:
225  %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
226  %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index
227  %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index
228  %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.access.group !13
229  %add = add <4 x i32> %val, %val
230  store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.access.group !13
231  %next_index = add i32 %index, -1
232  %continue = icmp ne i32 %next_index, %count
233  br i1 %continue, label %loop, label %end, !llvm.loop !3
234
235end:
236  ret void
237}
238
239; Check that fpmath information is preserved.
240define <4 x float> @f6(<4 x float> %x) {
241; CHECK-LABEL: @f6(
242; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
243; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
244; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
245; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
246; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
247; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
248; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
249; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
250; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0
251; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
252; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
253; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
254; CHECK: ret <4 x float> %res
255  %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
256    !fpmath !4
257  ret <4 x float> %res
258}
259
260; Check that random metadata isn't kept.
261define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
262; CHECK-LABEL: @f7(
263; CHECK-NOT: !foo
264; CHECK: ret void
265  %val = load <4 x i32> , <4 x i32> *%src, !foo !5
266  %add = add <4 x i32> %val, %val
267  store <4 x i32> %add, <4 x i32> *%dst, !foo !5
268  ret void
269}
270
271; Test GEP with vectors.
272define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
273                float *%other) {
274; CHECK-LABEL: @f8(
275; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
276; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
277; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
278; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
279; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
280; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
281; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
282; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
283; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
284; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
285; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
286; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
287; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
288; CHECK: store float* %val.i0, float** %dest.i0, align 32
289; CHECK: store float* %val.i1, float** %dest.i1, align 8
290; CHECK: store float* %val.i2, float** %dest.i2, align 16
291; CHECK: store float* %val.i3, float** %dest.i3, align 8
292; CHECK: ret void
293  %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
294  %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
295  %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
296  %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2
297  store <4 x float *> %val, <4 x float *> *%dest
298  ret void
299}
300
301; Test the handling of unaligned loads.
302define void @f9(<4 x float> *%dest, <4 x float> *%src) {
303; CHECK: @f9(
304; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
305; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
306; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
307; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
308; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
309; CHECK: %val.i0 = load float, float* %src.i0, align 4
310; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
311; CHECK: %val.i1 = load float, float* %src.i1, align 4
312; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
313; CHECK: %val.i2 = load float, float* %src.i2, align 4
314; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
315; CHECK: %val.i3 = load float, float* %src.i3, align 4
316; CHECK: store float %val.i0, float* %dest.i0, align 8
317; CHECK: store float %val.i1, float* %dest.i1, align 4
318; CHECK: store float %val.i2, float* %dest.i2, align 8
319; CHECK: store float %val.i3, float* %dest.i3, align 4
320; CHECK: ret void
321  %val = load <4 x float> , <4 x float> *%src, align 4
322  store <4 x float> %val, <4 x float> *%dest, align 8
323  ret void
324}
325
326; ...and again with subelement alignment.
327define void @f10(<4 x float> *%dest, <4 x float> *%src) {
328; CHECK: @f10(
329; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
330; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
331; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
332; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
333; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
334; CHECK: %val.i0 = load float, float* %src.i0, align 1
335; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
336; CHECK: %val.i1 = load float, float* %src.i1, align 1
337; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
338; CHECK: %val.i2 = load float, float* %src.i2, align 1
339; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
340; CHECK: %val.i3 = load float, float* %src.i3, align 1
341; CHECK: store float %val.i0, float* %dest.i0, align 2
342; CHECK: store float %val.i1, float* %dest.i1, align 2
343; CHECK: store float %val.i2, float* %dest.i2, align 2
344; CHECK: store float %val.i3, float* %dest.i3, align 2
345; CHECK: ret void
346  %val = load <4 x float> , <4 x float> *%src, align 1
347  store <4 x float> %val, <4 x float> *%dest, align 2
348  ret void
349}
350
351; Test that sub-byte loads aren't scalarized.
352define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
353; CHECK: @f11(
354; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0
355; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1
356; CHECK: store <32 x i1> %and, <32 x i1>* %dest
357; CHECK: ret void
358  %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
359  %val0 = load <32 x i1> , <32 x i1> *%src0
360  %val1 = load <32 x i1> , <32 x i1> *%src1
361  %and = and <32 x i1> %val0, %val1
362  store <32 x i1> %and, <32 x i1> *%dest
363  ret void
364}
365
366; Test vector GEPs with more than one index.
367define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
368                 float *%other) {
369; CHECK-LABEL: @f13(
370; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
371; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
372; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
373; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
374; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
375; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
376; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0
377; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
378; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
379; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1
380; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
381; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
382; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2
383; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
384; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
385; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3
386; CHECK: store float* %val.i0, float** %dest.i0, align 32
387; CHECK: store float* %val.i1, float** %dest.i1, align 8
388; CHECK: store float* %val.i2, float** %dest.i2, align 16
389; CHECK: store float* %val.i3, float** %dest.i3, align 8
390; CHECK: ret void
391  %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
392                                <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
393                                <4 x i32> %i
394  store <4 x float *> %val, <4 x float *> *%dest
395  ret void
396}
397
398; Test combinations of vector and non-vector PHIs.
399define <4 x float> @f14(<4 x float> %acc, i32 %count) {
400; CHECK-LABEL: @f14(
401; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
402; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
403; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
404; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
405; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
406; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0
407; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
408; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
409; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
410; CHECK: ret <4 x float> %next_acc
411entry:
412  br label %loop
413
414loop:
415  %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
416  %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
417  %foo = call <4 x float> @ext(<4 x float> %this_acc)
418  %next_acc = fadd <4 x float> %this_acc, %foo
419  %next_count = sub i32 %this_count, 1
420  %cmp = icmp eq i32 %next_count, 0
421  br i1 %cmp, label %loop, label %exit
422
423exit:
424  ret <4 x float> %next_acc
425}
426
427; Test unary operator scalarization.
428define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) {
429; CHECK-LABEL: @f15(
430; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
431; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
432; CHECK: %val.i0 = load float, float* %ptr.i0, align 16
433; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
434; CHECK: %val.i1 = load float, float* %ptr.i1, align 4
435; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
436; CHECK: %val.i2 = load float, float* %ptr.i2, align 8
437; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
438; CHECK: %val.i3 = load float, float* %ptr.i3, align 4
439; CHECK: %neg.i0 = fneg float %val.i0
440; CHECK: %neg.i1 = fneg float %val.i1
441; CHECK: %neg.i2 = fneg float %val.i2
442; CHECK: %neg.i3 = fneg float %val.i3
443; CHECK: %neg.upto0 = insertelement <4 x float> undef, float %neg.i0, i32 0
444; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1
445; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2
446; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3
447; CHECK: %call = call <4 x float> @ext(<4 x float> %neg)
448; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
449; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00
450; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
451; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00
452; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
453; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00
454; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
455; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00
456; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00
457; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00
458; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00
459; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00
460; CHECK: store float %sel.i0, float* %ptr.i0, align 16
461; CHECK: store float %sel.i1, float* %ptr.i1, align 4
462; CHECK: store float %sel.i2, float* %ptr.i2, align 8
463; CHECK: store float %sel.i3, float* %ptr.i3, align 4
464entry:
465  br label %loop
466
467loop:
468  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
469  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
470  %nexti = sub i32 %i, 1
471
472  %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
473  %val = load <4 x float> , <4 x float> *%ptr
474  %neg = fneg <4 x float> %val
475  %call = call <4 x float> @ext(<4 x float> %neg)
476  %cmp = fcmp ogt <4 x float> %call,
477  <float 1.0, float 2.0, float 3.0, float 4.0>
478  %sel = select <4 x i1> %cmp, <4 x float> %call,
479  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
480  store <4 x float> %sel, <4 x float> *%ptr
481
482  %test = icmp eq i32 %nexti, 0
483  br i1 %test, label %loop, label %exit
484
485exit:
486  ret void
487}
488
489; Check that IR flags are preserved.
490define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
491; CHECK-LABEL: @f16(
492; CHECK: %res.i0 = add nuw nsw i32
493; CHECK: %res.i1 = add nuw nsw i32
494  %res = add nuw nsw <2 x i32> %i, %j
495  ret <2 x i32> %res
496}
497define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
498; CHECK-LABEL: @f17(
499; CHECK: %res.i0 = sdiv exact i32
500; CHECK: %res.i1 = sdiv exact i32
501  %res = sdiv exact <2 x i32> %i, %j
502  ret <2 x i32> %res
503}
504define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
505; CHECK-LABEL: @f18(
506; CHECK: %res.i0 = fadd fast float
507; CHECK: %res.i1 = fadd fast float
508  %res = fadd fast <2 x float> %x, %y
509  ret <2 x float> %res
510}
511define <2 x float> @f19(<2 x float> %x) {
512; CHECK-LABEL: @f19(
513; CHECK: %res.i0 = fneg fast float
514; CHECK: %res.i1 = fneg fast float
515  %res = fneg fast <2 x float> %x
516  ret <2 x float> %res
517}
518define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
519; CHECK-LABEL: @f20(
520; CHECK: %res.i0 = fcmp fast ogt float
521; CHECK: %res.i1 = fcmp fast ogt float
522  %res = fcmp fast ogt <2 x float> %x, %y
523  ret <2 x i1> %res
524}
525declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
526define <2 x float> @f21(<2 x float> %x) {
527; CHECK-LABEL: @f21(
528; CHECK: %res.i0 = call fast float @llvm.sqrt.f32
529; CHECK: %res.i1 = call fast float @llvm.sqrt.f32
530  %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
531  ret <2 x float> %res
532}
533declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
534define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
535; CHECK-LABEL: @f22(
536; CHECK: %res.i0 = call fast float @llvm.fma.f32
537; CHECK: %res.i1 = call fast float @llvm.fma.f32
538  %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
539  ret <2 x float> %res
540}
541
542; See https://reviews.llvm.org/D83101#2133062
543define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
544; CHECK-LABEL: @f23_crash(
545; CHECK: %v0 = extractelement <2 x i32> %srcvec, i32 0
546; CHECK: %t1.upto0 = insertelement <2 x i32> undef, i32 %v0, i32 0
547; CHECK: %t1 = insertelement <2 x i32> %t1.upto0, i32 %v1, i32 1
548; CHECK: ret <2 x i32> %t1
549  %v0 = extractelement <2 x i32> %srcvec, i32 0
550  %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0
551  %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1
552  ret <2 x i32> %t1
553}
554
555!0 = !{ !"root" }
556!1 = !{ !"set1", !0 }
557!2 = !{ !"set2", !0 }
558!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
559!4 = !{ float 4.0 }
560!5 = !{ i64 0, i64 8, null }
561!13 = distinct !{}
562