• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
3
4
5; Verify that each of the following test cases is folded into a single
6; instruction which performs a blend operation.
7
8define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
9; CHECK-LABEL: test1:
10; CHECK:       # BB#0:
11; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
12; CHECK-NEXT:    retq
13  %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
14  %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
15  %or = or <2 x i64> %shuf1, %shuf2
16  ret <2 x i64> %or
17}
18
19
20define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
21; CHECK-LABEL: test2:
22; CHECK:       # BB#0:
23; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
24; CHECK-NEXT:    retq
25  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
26  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
27  %or = or <4 x i32> %shuf1, %shuf2
28  ret <4 x i32> %or
29}
30
31
32define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
33; CHECK-LABEL: test3:
34; CHECK:       # BB#0:
35; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
36; CHECK-NEXT:    retq
37  %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
38  %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
39  %or = or <2 x i64> %shuf1, %shuf2
40  ret <2 x i64> %or
41}
42
43
44define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
45; CHECK-LABEL: test4:
46; CHECK:       # BB#0:
47; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
48; CHECK-NEXT:    retq
49  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
50  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
51  %or = or <4 x i32> %shuf1, %shuf2
52  ret <4 x i32> %or
53}
54
55
56define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
57; CHECK-LABEL: test5:
58; CHECK:       # BB#0:
59; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
60; CHECK-NEXT:    retq
61  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
62  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
63  %or = or <4 x i32> %shuf1, %shuf2
64  ret <4 x i32> %or
65}
66
67
68define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
69; CHECK-LABEL: test6:
70; CHECK:       # BB#0:
71; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
72; CHECK-NEXT:    retq
73  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
74  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
75  %or = or <4 x i32> %shuf1, %shuf2
76  ret <4 x i32> %or
77}
78
79
80define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
81; CHECK-LABEL: test7:
82; CHECK:       # BB#0:
83; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
84; CHECK-NEXT:    retq
85  %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
86  %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
87  %or = or <4 x i32> %and1, %and2
88  ret <4 x i32> %or
89}
90
91
92define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
93; CHECK-LABEL: test8:
94; CHECK:       # BB#0:
95; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
96; CHECK-NEXT:    retq
97  %and1 = and <2 x i64> %a, <i64 -1, i64 0>
98  %and2 = and <2 x i64> %b, <i64 0, i64 -1>
99  %or = or <2 x i64> %and1, %and2
100  ret <2 x i64> %or
101}
102
103
104define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
105; CHECK-LABEL: test9:
106; CHECK:       # BB#0:
107; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
108; CHECK-NEXT:    retq
109  %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
110  %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
111  %or = or <4 x i32> %and1, %and2
112  ret <4 x i32> %or
113}
114
115
116define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
117; CHECK-LABEL: test10:
118; CHECK:       # BB#0:
119; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
120; CHECK-NEXT:    retq
121  %and1 = and <2 x i64> %a, <i64 0, i64 -1>
122  %and2 = and <2 x i64> %b, <i64 -1, i64 0>
123  %or = or <2 x i64> %and1, %and2
124  ret <2 x i64> %or
125}
126
127
128define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
129; CHECK-LABEL: test11:
130; CHECK:       # BB#0:
131; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
132; CHECK-NEXT:    retq
133  %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
134  %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
135  %or = or <4 x i32> %and1, %and2
136  ret <4 x i32> %or
137}
138
139
140define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
141; CHECK-LABEL: test12:
142; CHECK:       # BB#0:
143; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
144; CHECK-NEXT:    retq
145  %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
146  %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
147  %or = or <4 x i32> %and1, %and2
148  ret <4 x i32> %or
149}
150
151
152; Verify that the following test cases are folded into single shuffles.
153
154define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
155; CHECK-LABEL: test13:
156; CHECK:       # BB#0:
157; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
158; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
159; CHECK-NEXT:    retq
160  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
161  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
162  %or = or <4 x i32> %shuf1, %shuf2
163  ret <4 x i32> %or
164}
165
166
167define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
168; CHECK-LABEL: test14:
169; CHECK:       # BB#0:
170; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
171; CHECK-NEXT:    retq
172  %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
173  %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
174  %or = or <2 x i64> %shuf1, %shuf2
175  ret <2 x i64> %or
176}
177
178
179define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
180; CHECK-LABEL: test15:
181; CHECK:       # BB#0:
182; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1]
183; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,1,2,3]
184; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
185; CHECK-NEXT:    retq
186  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
187  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
188  %or = or <4 x i32> %shuf1, %shuf2
189  ret <4 x i32> %or
190}
191
192
193define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
194; CHECK-LABEL: test16:
195; CHECK:       # BB#0:
196; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
197; CHECK-NEXT:    movdqa %xmm1, %xmm0
198; CHECK-NEXT:    retq
199  %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
200  %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
201  %or = or <2 x i64> %shuf1, %shuf2
202  ret <2 x i64> %or
203}
204
205
206; Verify that the dag-combiner does not fold a OR of two shuffles into a single
207; shuffle instruction when the shuffle indexes are not compatible.
208
209define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
210; CHECK-LABEL: test17:
211; CHECK:       # BB#0:
212; CHECK-NEXT:    psllq $32, %xmm0
213; CHECK-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
214; CHECK-NEXT:    por %xmm1, %xmm0
215; CHECK-NEXT:    retq
216  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
217  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
218  %or = or <4 x i32> %shuf1, %shuf2
219  ret <4 x i32> %or
220}
221
222
223define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
224; CHECK-LABEL: test18:
225; CHECK:       # BB#0:
226; CHECK-NEXT:    pxor %xmm2, %xmm2
227; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
228; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
229; CHECK-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
230; CHECK-NEXT:    por %xmm1, %xmm0
231; CHECK-NEXT:    retq
232  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
233  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
234  %or = or <4 x i32> %shuf1, %shuf2
235  ret <4 x i32> %or
236}
237
238
239define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
240; CHECK-LABEL: test19:
241; CHECK:       # BB#0:
242; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3]
243; CHECK-NEXT:    pxor %xmm3, %xmm3
244; CHECK-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
245; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
246; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7]
247; CHECK-NEXT:    por %xmm2, %xmm0
248; CHECK-NEXT:    retq
249  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
250  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
251  %or = or <4 x i32> %shuf1, %shuf2
252  ret <4 x i32> %or
253}
254
255
256define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
257; CHECK-LABEL: test20:
258; CHECK:       # BB#0:
259; CHECK-NEXT:    por %xmm1, %xmm0
260; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
261; CHECK-NEXT:    retq
262  %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
263  %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
264  %or = or <2 x i64> %shuf1, %shuf2
265  ret <2 x i64> %or
266}
267
268
269define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
270; CHECK-LABEL: test21:
271; CHECK:       # BB#0:
272; CHECK-NEXT:    por %xmm1, %xmm0
273; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
274; CHECK-NEXT:    retq
275  %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
276  %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
277  %or = or <2 x i64> %shuf1, %shuf2
278  ret <2 x i64> %or
279}
280
281
282; Verify that the dag-combiner keeps the correct domain for float/double vectors
283; bitcast to use the mask-or blend combine.
284
285define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
286; CHECK-LABEL: test22:
287; CHECK:       # BB#0:
288; CHECK-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
289; CHECK-NEXT:    retq
290  %bc1 = bitcast <2 x double> %a0 to <2 x i64>
291  %bc2 = bitcast <2 x double> %a1 to <2 x i64>
292  %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
293  %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
294  %or = or <2 x i64> %and1, %and2
295  %bc3 = bitcast <2 x i64> %or to <2 x double>
296  ret <2 x double> %bc3
297}
298
299
300define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) {
301; CHECK-LABEL: test23:
302; CHECK:       # BB#0:
303; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
304; CHECK-NEXT:    retq
305  %bc1 = bitcast <4 x float> %a0 to <4 x i32>
306  %bc2 = bitcast <4 x float> %a1 to <4 x i32>
307  %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
308  %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
309  %or = or <4 x i32> %and1, %and2
310  %bc3 = bitcast <4 x i32> %or to <4 x float>
311  ret <4 x float> %bc3
312}
313
314
315define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
316; CHECK-LABEL: test24:
317; CHECK:       # BB#0:
318; CHECK-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
319; CHECK-NEXT:    retq
320  %bc1 = bitcast <4 x float> %a0 to <2 x i64>
321  %bc2 = bitcast <4 x float> %a1 to <2 x i64>
322  %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
323  %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
324  %or = or <2 x i64> %and1, %and2
325  %bc3 = bitcast <2 x i64> %or to <4 x float>
326  ret <4 x float> %bc3
327}
328
329
330define <4 x float> @test25(<4 x float> %a0) {
331; CHECK-LABEL: test25:
332; CHECK:       # BB#0:
333; CHECK-NEXT:    blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3]
334; CHECK-NEXT:    retq
335  %bc1 = bitcast <4 x float> %a0 to <4 x i32>
336  %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
337  %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
338  %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
339  %or = or <4 x i32> %and1, %and2
340  %bc3 = bitcast <4 x i32> %or to <4 x float>
341  ret <4 x float> %bc3
342}
343
344
345; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
346; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
347; handle legal vector value types.
348define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
349; CHECK-LABEL: test_crash:
350; CHECK:       # BB#0:
351; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
352; CHECK-NEXT:    retq
353  %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
354  %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
355  %or = or <4 x i8> %shuf1, %shuf2
356  ret <4 x i8> %or
357}
358
359; Verify that we can fold regardless of which operand is the zeroinitializer
360
361define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) {
362; CHECK-LABEL: test2b:
363; CHECK:       # BB#0:
364; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
365; CHECK-NEXT:    retq
366  %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
367  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
368  %or = or <4 x i32> %shuf1, %shuf2
369  ret <4 x i32> %or
370}
371
372define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) {
373; CHECK-LABEL: test2c:
374; CHECK:       # BB#0:
375; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
376; CHECK-NEXT:    retq
377  %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
378  %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
379  %or = or <4 x i32> %shuf1, %shuf2
380  ret <4 x i32> %or
381}
382
383
384define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) {
385; CHECK-LABEL: test2d:
386; CHECK:       # BB#0:
387; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
388; CHECK-NEXT:    retq
389  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
390  %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
391  %or = or <4 x i32> %shuf1, %shuf2
392  ret <4 x i32> %or
393}
394
395; Make sure we can have an undef where an index pointing to the zero vector should be
396
397define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) {
398; CHECK-LABEL: test2e:
399; CHECK:       # BB#0:
400; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
401; CHECK-NEXT:    retq
402  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3>
403  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4>
404  %or = or <4 x i32> %shuf1, %shuf2
405  ret <4 x i32> %or
406}
407
408define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
409; CHECK-LABEL: test2f:
410; CHECK:       # BB#0:
411; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
412; CHECK-NEXT:    retq
413  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3>
414  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4>
415  %or = or <4 x i32> %shuf1, %shuf2
416  ret <4 x i32> %or
417}
418