• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
8
9;
10; vXi64
11;
12
13define i64 @test_v2i64(<2 x i64> %a0) {
14; SSE-LABEL: test_v2i64:
15; SSE:       # %bb.0:
16; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
17; SSE-NEXT:    por %xmm0, %xmm1
18; SSE-NEXT:    movq %xmm1, %rax
19; SSE-NEXT:    retq
20;
21; AVX-LABEL: test_v2i64:
22; AVX:       # %bb.0:
23; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
24; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
25; AVX-NEXT:    vmovq %xmm0, %rax
26; AVX-NEXT:    retq
27  %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0)
28  ret i64 %1
29}
30
31define i64 @test_v4i64(<4 x i64> %a0) {
32; SSE-LABEL: test_v4i64:
33; SSE:       # %bb.0:
34; SSE-NEXT:    por %xmm1, %xmm0
35; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
36; SSE-NEXT:    por %xmm0, %xmm1
37; SSE-NEXT:    movq %xmm1, %rax
38; SSE-NEXT:    retq
39;
40; AVX1-LABEL: test_v4i64:
41; AVX1:       # %bb.0:
42; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
43; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
44; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
45; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
46; AVX1-NEXT:    vmovq %xmm0, %rax
47; AVX1-NEXT:    vzeroupper
48; AVX1-NEXT:    retq
49;
50; AVX2-LABEL: test_v4i64:
51; AVX2:       # %bb.0:
52; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
53; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
54; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
55; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
56; AVX2-NEXT:    vmovq %xmm0, %rax
57; AVX2-NEXT:    vzeroupper
58; AVX2-NEXT:    retq
59;
60; AVX512-LABEL: test_v4i64:
61; AVX512:       # %bb.0:
62; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
63; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
64; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
65; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
66; AVX512-NEXT:    vmovq %xmm0, %rax
67; AVX512-NEXT:    vzeroupper
68; AVX512-NEXT:    retq
69  %1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0)
70  ret i64 %1
71}
72
73define i64 @test_v8i64(<8 x i64> %a0) {
74; SSE-LABEL: test_v8i64:
75; SSE:       # %bb.0:
76; SSE-NEXT:    por %xmm3, %xmm1
77; SSE-NEXT:    por %xmm2, %xmm1
78; SSE-NEXT:    por %xmm0, %xmm1
79; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
80; SSE-NEXT:    por %xmm1, %xmm0
81; SSE-NEXT:    movq %xmm0, %rax
82; SSE-NEXT:    retq
83;
84; AVX1-LABEL: test_v8i64:
85; AVX1:       # %bb.0:
86; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
87; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
88; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
89; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
90; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
91; AVX1-NEXT:    vmovq %xmm0, %rax
92; AVX1-NEXT:    vzeroupper
93; AVX1-NEXT:    retq
94;
95; AVX2-LABEL: test_v8i64:
96; AVX2:       # %bb.0:
97; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
98; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
99; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
100; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
101; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
102; AVX2-NEXT:    vmovq %xmm0, %rax
103; AVX2-NEXT:    vzeroupper
104; AVX2-NEXT:    retq
105;
106; AVX512-LABEL: test_v8i64:
107; AVX512:       # %bb.0:
108; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
109; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
110; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
111; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
112; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
113; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
114; AVX512-NEXT:    vmovq %xmm0, %rax
115; AVX512-NEXT:    vzeroupper
116; AVX512-NEXT:    retq
117  %1 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a0)
118  ret i64 %1
119}
120
121define i64 @test_v16i64(<16 x i64> %a0) {
122; SSE-LABEL: test_v16i64:
123; SSE:       # %bb.0:
124; SSE-NEXT:    por %xmm6, %xmm2
125; SSE-NEXT:    por %xmm7, %xmm3
126; SSE-NEXT:    por %xmm5, %xmm3
127; SSE-NEXT:    por %xmm1, %xmm3
128; SSE-NEXT:    por %xmm4, %xmm2
129; SSE-NEXT:    por %xmm3, %xmm2
130; SSE-NEXT:    por %xmm0, %xmm2
131; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
132; SSE-NEXT:    por %xmm2, %xmm0
133; SSE-NEXT:    movq %xmm0, %rax
134; SSE-NEXT:    retq
135;
136; AVX1-LABEL: test_v16i64:
137; AVX1:       # %bb.0:
138; AVX1-NEXT:    vorps %ymm3, %ymm1, %ymm1
139; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
140; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
141; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
142; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
143; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
144; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
145; AVX1-NEXT:    vmovq %xmm0, %rax
146; AVX1-NEXT:    vzeroupper
147; AVX1-NEXT:    retq
148;
149; AVX2-LABEL: test_v16i64:
150; AVX2:       # %bb.0:
151; AVX2-NEXT:    vpor %ymm3, %ymm1, %ymm1
152; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
153; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
154; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
155; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
156; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
157; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
158; AVX2-NEXT:    vmovq %xmm0, %rax
159; AVX2-NEXT:    vzeroupper
160; AVX2-NEXT:    retq
161;
162; AVX512-LABEL: test_v16i64:
163; AVX512:       # %bb.0:
164; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
165; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
166; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
167; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
168; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
169; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
170; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
171; AVX512-NEXT:    vmovq %xmm0, %rax
172; AVX512-NEXT:    vzeroupper
173; AVX512-NEXT:    retq
174  %1 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a0)
175  ret i64 %1
176}
177
178;
179; vXi32
180;
181
182define i32 @test_v2i32(<2 x i32> %a0) {
183; SSE-LABEL: test_v2i32:
184; SSE:       # %bb.0:
185; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
186; SSE-NEXT:    por %xmm0, %xmm1
187; SSE-NEXT:    movd %xmm1, %eax
188; SSE-NEXT:    retq
189;
190; AVX-LABEL: test_v2i32:
191; AVX:       # %bb.0:
192; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
193; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
194; AVX-NEXT:    vmovd %xmm0, %eax
195; AVX-NEXT:    retq
196  %1 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a0)
197  ret i32 %1
198}
199
200define i32 @test_v4i32(<4 x i32> %a0) {
201; SSE-LABEL: test_v4i32:
202; SSE:       # %bb.0:
203; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
204; SSE-NEXT:    por %xmm0, %xmm1
205; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
206; SSE-NEXT:    por %xmm1, %xmm0
207; SSE-NEXT:    movd %xmm0, %eax
208; SSE-NEXT:    retq
209;
210; AVX-LABEL: test_v4i32:
211; AVX:       # %bb.0:
212; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
213; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
214; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
215; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
216; AVX-NEXT:    vmovd %xmm0, %eax
217; AVX-NEXT:    retq
218  %1 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a0)
219  ret i32 %1
220}
221
222define i32 @test_v8i32(<8 x i32> %a0) {
223; SSE-LABEL: test_v8i32:
224; SSE:       # %bb.0:
225; SSE-NEXT:    por %xmm1, %xmm0
226; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
227; SSE-NEXT:    por %xmm0, %xmm1
228; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
229; SSE-NEXT:    por %xmm1, %xmm0
230; SSE-NEXT:    movd %xmm0, %eax
231; SSE-NEXT:    retq
232;
233; AVX1-LABEL: test_v8i32:
234; AVX1:       # %bb.0:
235; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
236; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
237; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
238; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
239; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
240; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
241; AVX1-NEXT:    vmovd %xmm0, %eax
242; AVX1-NEXT:    vzeroupper
243; AVX1-NEXT:    retq
244;
245; AVX2-LABEL: test_v8i32:
246; AVX2:       # %bb.0:
247; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
248; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
249; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
250; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
251; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
252; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
253; AVX2-NEXT:    vmovd %xmm0, %eax
254; AVX2-NEXT:    vzeroupper
255; AVX2-NEXT:    retq
256;
257; AVX512-LABEL: test_v8i32:
258; AVX512:       # %bb.0:
259; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
260; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
261; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
262; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
263; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
264; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
265; AVX512-NEXT:    vmovd %xmm0, %eax
266; AVX512-NEXT:    vzeroupper
267; AVX512-NEXT:    retq
268  %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
269  ret i32 %1
270}
271
272define i32 @test_v16i32(<16 x i32> %a0) {
273; SSE-LABEL: test_v16i32:
274; SSE:       # %bb.0:
275; SSE-NEXT:    por %xmm3, %xmm1
276; SSE-NEXT:    por %xmm2, %xmm1
277; SSE-NEXT:    por %xmm0, %xmm1
278; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
279; SSE-NEXT:    por %xmm1, %xmm0
280; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
281; SSE-NEXT:    por %xmm0, %xmm1
282; SSE-NEXT:    movd %xmm1, %eax
283; SSE-NEXT:    retq
284;
285; AVX1-LABEL: test_v16i32:
286; AVX1:       # %bb.0:
287; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
288; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
289; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
290; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
291; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
292; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1]
293; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
294; AVX1-NEXT:    vmovd %xmm0, %eax
295; AVX1-NEXT:    vzeroupper
296; AVX1-NEXT:    retq
297;
298; AVX2-LABEL: test_v16i32:
299; AVX2:       # %bb.0:
300; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
301; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
302; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
303; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
304; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
305; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
306; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
307; AVX2-NEXT:    vmovd %xmm0, %eax
308; AVX2-NEXT:    vzeroupper
309; AVX2-NEXT:    retq
310;
311; AVX512-LABEL: test_v16i32:
312; AVX512:       # %bb.0:
313; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
314; AVX512-NEXT:    vpord %zmm1, %zmm0, %zmm0
315; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
316; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
317; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
318; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
319; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
320; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
321; AVX512-NEXT:    vmovd %xmm0, %eax
322; AVX512-NEXT:    vzeroupper
323; AVX512-NEXT:    retq
324  %1 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a0)
325  ret i32 %1
326}
327
328define i32 @test_v32i32(<32 x i32> %a0) {
329; SSE-LABEL: test_v32i32:
330; SSE:       # %bb.0:
331; SSE-NEXT:    por %xmm6, %xmm2
332; SSE-NEXT:    por %xmm7, %xmm3
333; SSE-NEXT:    por %xmm5, %xmm3
334; SSE-NEXT:    por %xmm1, %xmm3
335; SSE-NEXT:    por %xmm4, %xmm2
336; SSE-NEXT:    por %xmm3, %xmm2
337; SSE-NEXT:    por %xmm0, %xmm2
338; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
339; SSE-NEXT:    por %xmm2, %xmm0
340; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
341; SSE-NEXT:    por %xmm0, %xmm1
342; SSE-NEXT:    movd %xmm1, %eax
343; SSE-NEXT:    retq
344;
345; AVX1-LABEL: test_v32i32:
346; AVX1:       # %bb.0:
347; AVX1-NEXT:    vorps %ymm3, %ymm1, %ymm1
348; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
349; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
350; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
351; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
352; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
353; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
354; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1]
355; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
356; AVX1-NEXT:    vmovd %xmm0, %eax
357; AVX1-NEXT:    vzeroupper
358; AVX1-NEXT:    retq
359;
360; AVX2-LABEL: test_v32i32:
361; AVX2:       # %bb.0:
362; AVX2-NEXT:    vpor %ymm3, %ymm1, %ymm1
363; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
364; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
365; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
366; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
367; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
368; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
369; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
370; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
371; AVX2-NEXT:    vmovd %xmm0, %eax
372; AVX2-NEXT:    vzeroupper
373; AVX2-NEXT:    retq
374;
375; AVX512-LABEL: test_v32i32:
376; AVX512:       # %bb.0:
377; AVX512-NEXT:    vpord %zmm1, %zmm0, %zmm0
378; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
379; AVX512-NEXT:    vpord %zmm1, %zmm0, %zmm0
380; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
381; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
382; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
383; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
384; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
385; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
386; AVX512-NEXT:    vmovd %xmm0, %eax
387; AVX512-NEXT:    vzeroupper
388; AVX512-NEXT:    retq
389  %1 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %a0)
390  ret i32 %1
391}
392
393;
394; vXi16
395;
396
397define i16 @test_v2i16(<2 x i16> %a0) {
398; SSE-LABEL: test_v2i16:
399; SSE:       # %bb.0:
400; SSE-NEXT:    movdqa %xmm0, %xmm1
401; SSE-NEXT:    psrld $16, %xmm1
402; SSE-NEXT:    por %xmm0, %xmm1
403; SSE-NEXT:    movd %xmm1, %eax
404; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
405; SSE-NEXT:    retq
406;
407; AVX-LABEL: test_v2i16:
408; AVX:       # %bb.0:
409; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
410; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
411; AVX-NEXT:    vmovd %xmm0, %eax
412; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
413; AVX-NEXT:    retq
414  %1 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a0)
415  ret i16 %1
416}
417
418define i16 @test_v4i16(<4 x i16> %a0) {
419; SSE-LABEL: test_v4i16:
420; SSE:       # %bb.0:
421; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
422; SSE-NEXT:    por %xmm0, %xmm1
423; SSE-NEXT:    movdqa %xmm1, %xmm0
424; SSE-NEXT:    psrld $16, %xmm0
425; SSE-NEXT:    por %xmm1, %xmm0
426; SSE-NEXT:    movd %xmm0, %eax
427; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
428; SSE-NEXT:    retq
429;
430; AVX-LABEL: test_v4i16:
431; AVX:       # %bb.0:
432; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
433; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
434; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
435; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
436; AVX-NEXT:    vmovd %xmm0, %eax
437; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
438; AVX-NEXT:    retq
439  %1 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a0)
440  ret i16 %1
441}
442
443define i16 @test_v8i16(<8 x i16> %a0) {
444; SSE-LABEL: test_v8i16:
445; SSE:       # %bb.0:
446; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
447; SSE-NEXT:    por %xmm0, %xmm1
448; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
449; SSE-NEXT:    por %xmm1, %xmm0
450; SSE-NEXT:    movdqa %xmm0, %xmm1
451; SSE-NEXT:    psrld $16, %xmm1
452; SSE-NEXT:    por %xmm0, %xmm1
453; SSE-NEXT:    movd %xmm1, %eax
454; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
455; SSE-NEXT:    retq
456;
457; AVX-LABEL: test_v8i16:
458; AVX:       # %bb.0:
459; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
460; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
461; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
462; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
463; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
464; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
465; AVX-NEXT:    vmovd %xmm0, %eax
466; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
467; AVX-NEXT:    retq
468  %1 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a0)
469  ret i16 %1
470}
471
472define i16 @test_v16i16(<16 x i16> %a0) {
473; SSE-LABEL: test_v16i16:
474; SSE:       # %bb.0:
475; SSE-NEXT:    por %xmm1, %xmm0
476; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
477; SSE-NEXT:    por %xmm0, %xmm1
478; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
479; SSE-NEXT:    por %xmm1, %xmm0
480; SSE-NEXT:    movdqa %xmm0, %xmm1
481; SSE-NEXT:    psrld $16, %xmm1
482; SSE-NEXT:    por %xmm0, %xmm1
483; SSE-NEXT:    movd %xmm1, %eax
484; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
485; SSE-NEXT:    retq
486;
487; AVX1-LABEL: test_v16i16:
488; AVX1:       # %bb.0:
489; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
490; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
491; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
492; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
493; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
494; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
495; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
496; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
497; AVX1-NEXT:    vmovd %xmm0, %eax
498; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
499; AVX1-NEXT:    vzeroupper
500; AVX1-NEXT:    retq
501;
502; AVX2-LABEL: test_v16i16:
503; AVX2:       # %bb.0:
504; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
505; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
506; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
507; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
508; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
509; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
510; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
511; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
512; AVX2-NEXT:    vmovd %xmm0, %eax
513; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
514; AVX2-NEXT:    vzeroupper
515; AVX2-NEXT:    retq
516;
517; AVX512-LABEL: test_v16i16:
518; AVX512:       # %bb.0:
519; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
520; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
521; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
522; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
523; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
524; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
525; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
526; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
527; AVX512-NEXT:    vmovd %xmm0, %eax
528; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
529; AVX512-NEXT:    vzeroupper
530; AVX512-NEXT:    retq
531  %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0)
532  ret i16 %1
533}
534
535define i16 @test_v32i16(<32 x i16> %a0) {
536; SSE-LABEL: test_v32i16:
537; SSE:       # %bb.0:
538; SSE-NEXT:    por %xmm3, %xmm1
539; SSE-NEXT:    por %xmm2, %xmm1
540; SSE-NEXT:    por %xmm0, %xmm1
541; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
542; SSE-NEXT:    por %xmm1, %xmm0
543; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
544; SSE-NEXT:    por %xmm0, %xmm1
545; SSE-NEXT:    movdqa %xmm1, %xmm0
546; SSE-NEXT:    psrld $16, %xmm0
547; SSE-NEXT:    por %xmm1, %xmm0
548; SSE-NEXT:    movd %xmm0, %eax
549; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
550; SSE-NEXT:    retq
551;
552; AVX1-LABEL: test_v32i16:
553; AVX1:       # %bb.0:
554; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
555; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
556; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
557; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
558; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
559; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1]
560; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
561; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
562; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
563; AVX1-NEXT:    vmovd %xmm0, %eax
564; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
565; AVX1-NEXT:    vzeroupper
566; AVX1-NEXT:    retq
567;
568; AVX2-LABEL: test_v32i16:
569; AVX2:       # %bb.0:
570; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
571; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
572; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
573; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
574; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
575; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
576; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
577; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
578; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
579; AVX2-NEXT:    vmovd %xmm0, %eax
580; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
581; AVX2-NEXT:    vzeroupper
582; AVX2-NEXT:    retq
583;
584; AVX512-LABEL: test_v32i16:
585; AVX512:       # %bb.0:
586; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
587; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
588; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
589; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
590; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
591; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
592; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
593; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
594; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
595; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
596; AVX512-NEXT:    vmovd %xmm0, %eax
597; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
598; AVX512-NEXT:    vzeroupper
599; AVX512-NEXT:    retq
600  %1 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %a0)
601  ret i16 %1
602}
603
604define i16 @test_v64i16(<64 x i16> %a0) {
605; SSE-LABEL: test_v64i16:
606; SSE:       # %bb.0:
607; SSE-NEXT:    por %xmm6, %xmm2
608; SSE-NEXT:    por %xmm7, %xmm3
609; SSE-NEXT:    por %xmm5, %xmm3
610; SSE-NEXT:    por %xmm1, %xmm3
611; SSE-NEXT:    por %xmm4, %xmm2
612; SSE-NEXT:    por %xmm3, %xmm2
613; SSE-NEXT:    por %xmm0, %xmm2
614; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
615; SSE-NEXT:    por %xmm2, %xmm0
616; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
617; SSE-NEXT:    por %xmm0, %xmm1
618; SSE-NEXT:    movdqa %xmm1, %xmm0
619; SSE-NEXT:    psrld $16, %xmm0
620; SSE-NEXT:    por %xmm1, %xmm0
621; SSE-NEXT:    movd %xmm0, %eax
622; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
623; SSE-NEXT:    retq
624;
625; AVX1-LABEL: test_v64i16:
626; AVX1:       # %bb.0:
627; AVX1-NEXT:    vorps %ymm3, %ymm1, %ymm1
628; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
629; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
630; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
631; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
632; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
633; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
634; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1]
635; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
636; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
637; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
638; AVX1-NEXT:    vmovd %xmm0, %eax
639; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
640; AVX1-NEXT:    vzeroupper
641; AVX1-NEXT:    retq
642;
643; AVX2-LABEL: test_v64i16:
644; AVX2:       # %bb.0:
645; AVX2-NEXT:    vpor %ymm3, %ymm1, %ymm1
646; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
647; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
648; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
649; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
650; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
651; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
652; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
653; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
654; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
655; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
656; AVX2-NEXT:    vmovd %xmm0, %eax
657; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
658; AVX2-NEXT:    vzeroupper
659; AVX2-NEXT:    retq
660;
661; AVX512-LABEL: test_v64i16:
662; AVX512:       # %bb.0:
663; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
664; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
665; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
666; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
667; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
668; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
669; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
670; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
671; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
672; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
673; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
674; AVX512-NEXT:    vmovd %xmm0, %eax
675; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
676; AVX512-NEXT:    vzeroupper
677; AVX512-NEXT:    retq
678  %1 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %a0)
679  ret i16 %1
680}
681
682;
683; vXi8
684;
685
686define i8 @test_v2i8(<2 x i8> %a0) {
687; SSE-LABEL: test_v2i8:
688; SSE:       # %bb.0:
689; SSE-NEXT:    movdqa %xmm0, %xmm1
690; SSE-NEXT:    psrlw $8, %xmm1
691; SSE-NEXT:    por %xmm0, %xmm1
692; SSE-NEXT:    movd %xmm1, %eax
693; SSE-NEXT:    # kill: def $al killed $al killed $eax
694; SSE-NEXT:    retq
695;
696; AVX-LABEL: test_v2i8:
697; AVX:       # %bb.0:
698; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
699; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
700; AVX-NEXT:    vmovd %xmm0, %eax
701; AVX-NEXT:    # kill: def $al killed $al killed $eax
702; AVX-NEXT:    retq
703  %1 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %a0)
704  ret i8 %1
705}
706
707define i8 @test_v4i8(<4 x i8> %a0) {
708; SSE-LABEL: test_v4i8:
709; SSE:       # %bb.0:
710; SSE-NEXT:    movdqa %xmm0, %xmm1
711; SSE-NEXT:    psrld $16, %xmm1
712; SSE-NEXT:    por %xmm0, %xmm1
713; SSE-NEXT:    movdqa %xmm1, %xmm0
714; SSE-NEXT:    psrlw $8, %xmm0
715; SSE-NEXT:    por %xmm1, %xmm0
716; SSE-NEXT:    movd %xmm0, %eax
717; SSE-NEXT:    # kill: def $al killed $al killed $eax
718; SSE-NEXT:    retq
719;
720; AVX-LABEL: test_v4i8:
721; AVX:       # %bb.0:
722; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
723; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
724; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
725; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
726; AVX-NEXT:    vmovd %xmm0, %eax
727; AVX-NEXT:    # kill: def $al killed $al killed $eax
728; AVX-NEXT:    retq
729  %1 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a0)
730  ret i8 %1
731}
732
733define i8 @test_v8i8(<8 x i8> %a0) {
734; SSE-LABEL: test_v8i8:
735; SSE:       # %bb.0:
736; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
737; SSE-NEXT:    por %xmm0, %xmm1
738; SSE-NEXT:    movdqa %xmm1, %xmm0
739; SSE-NEXT:    psrld $16, %xmm0
740; SSE-NEXT:    por %xmm1, %xmm0
741; SSE-NEXT:    movdqa %xmm0, %xmm1
742; SSE-NEXT:    psrlw $8, %xmm1
743; SSE-NEXT:    por %xmm0, %xmm1
744; SSE-NEXT:    movd %xmm1, %eax
745; SSE-NEXT:    # kill: def $al killed $al killed $eax
746; SSE-NEXT:    retq
747;
748; AVX-LABEL: test_v8i8:
749; AVX:       # %bb.0:
750; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
751; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
752; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
753; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
754; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
755; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
756; AVX-NEXT:    vmovd %xmm0, %eax
757; AVX-NEXT:    # kill: def $al killed $al killed $eax
758; AVX-NEXT:    retq
759  %1 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a0)
760  ret i8 %1
761}
762
763define i8 @test_v16i8(<16 x i8> %a0) {
764; SSE-LABEL: test_v16i8:
765; SSE:       # %bb.0:
766; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
767; SSE-NEXT:    por %xmm0, %xmm1
768; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
769; SSE-NEXT:    por %xmm1, %xmm0
770; SSE-NEXT:    movdqa %xmm0, %xmm1
771; SSE-NEXT:    psrld $16, %xmm1
772; SSE-NEXT:    por %xmm0, %xmm1
773; SSE-NEXT:    movdqa %xmm1, %xmm0
774; SSE-NEXT:    psrlw $8, %xmm0
775; SSE-NEXT:    por %xmm1, %xmm0
776; SSE-NEXT:    movd %xmm0, %eax
777; SSE-NEXT:    # kill: def $al killed $al killed $eax
778; SSE-NEXT:    retq
779;
780; AVX-LABEL: test_v16i8:
781; AVX:       # %bb.0:
782; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
783; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
784; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
785; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
786; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
787; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
788; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
789; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
790; AVX-NEXT:    vmovd %xmm0, %eax
791; AVX-NEXT:    # kill: def $al killed $al killed $eax
792; AVX-NEXT:    retq
793  %1 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a0)
794  ret i8 %1
795}
796
797define i8 @test_v32i8(<32 x i8> %a0) {
798; SSE-LABEL: test_v32i8:
799; SSE:       # %bb.0:
800; SSE-NEXT:    por %xmm1, %xmm0
801; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
802; SSE-NEXT:    por %xmm0, %xmm1
803; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
804; SSE-NEXT:    por %xmm1, %xmm0
805; SSE-NEXT:    movdqa %xmm0, %xmm1
806; SSE-NEXT:    psrld $16, %xmm1
807; SSE-NEXT:    por %xmm0, %xmm1
808; SSE-NEXT:    movdqa %xmm1, %xmm0
809; SSE-NEXT:    psrlw $8, %xmm0
810; SSE-NEXT:    por %xmm1, %xmm0
811; SSE-NEXT:    movd %xmm0, %eax
812; SSE-NEXT:    # kill: def $al killed $al killed $eax
813; SSE-NEXT:    retq
814;
815; AVX1-LABEL: test_v32i8:
816; AVX1:       # %bb.0:
817; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
818; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
819; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
820; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
821; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
822; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
823; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
824; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
825; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
826; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
827; AVX1-NEXT:    vmovd %xmm0, %eax
828; AVX1-NEXT:    # kill: def $al killed $al killed $eax
829; AVX1-NEXT:    vzeroupper
830; AVX1-NEXT:    retq
831;
832; AVX2-LABEL: test_v32i8:
833; AVX2:       # %bb.0:
834; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
835; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
836; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
837; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
838; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
839; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
840; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
841; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
842; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
843; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
844; AVX2-NEXT:    vmovd %xmm0, %eax
845; AVX2-NEXT:    # kill: def $al killed $al killed $eax
846; AVX2-NEXT:    vzeroupper
847; AVX2-NEXT:    retq
848;
849; AVX512-LABEL: test_v32i8:
850; AVX512:       # %bb.0:
851; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
852; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
853; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
854; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
855; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
856; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
857; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
858; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
859; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
860; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
861; AVX512-NEXT:    vmovd %xmm0, %eax
862; AVX512-NEXT:    # kill: def $al killed $al killed $eax
863; AVX512-NEXT:    vzeroupper
864; AVX512-NEXT:    retq
865  %1 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a0)
866  ret i8 %1
867}
868
869define i8 @test_v64i8(<64 x i8> %a0) {
870; SSE-LABEL: test_v64i8:
871; SSE:       # %bb.0:
872; SSE-NEXT:    por %xmm3, %xmm1
873; SSE-NEXT:    por %xmm2, %xmm1
874; SSE-NEXT:    por %xmm0, %xmm1
875; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
876; SSE-NEXT:    por %xmm1, %xmm0
877; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
878; SSE-NEXT:    por %xmm0, %xmm1
879; SSE-NEXT:    movdqa %xmm1, %xmm0
880; SSE-NEXT:    psrld $16, %xmm0
881; SSE-NEXT:    por %xmm1, %xmm0
882; SSE-NEXT:    movdqa %xmm0, %xmm1
883; SSE-NEXT:    psrlw $8, %xmm1
884; SSE-NEXT:    por %xmm0, %xmm1
885; SSE-NEXT:    movd %xmm1, %eax
886; SSE-NEXT:    # kill: def $al killed $al killed $eax
887; SSE-NEXT:    retq
888;
889; AVX1-LABEL: test_v64i8:
890; AVX1:       # %bb.0:
891; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
892; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
893; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
894; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
895; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
896; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1]
897; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
898; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
899; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
900; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
901; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
902; AVX1-NEXT:    vmovd %xmm0, %eax
903; AVX1-NEXT:    # kill: def $al killed $al killed $eax
904; AVX1-NEXT:    vzeroupper
905; AVX1-NEXT:    retq
906;
907; AVX2-LABEL: test_v64i8:
908; AVX2:       # %bb.0:
909; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
910; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
911; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
912; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
913; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
914; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
915; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
916; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
917; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
918; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
919; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
920; AVX2-NEXT:    vmovd %xmm0, %eax
921; AVX2-NEXT:    # kill: def $al killed $al killed $eax
922; AVX2-NEXT:    vzeroupper
923; AVX2-NEXT:    retq
924;
925; AVX512-LABEL: test_v64i8:
926; AVX512:       # %bb.0:
927; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
928; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
929; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
930; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
931; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
932; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
933; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
934; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
935; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
936; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
937; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
938; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
939; AVX512-NEXT:    vmovd %xmm0, %eax
940; AVX512-NEXT:    # kill: def $al killed $al killed $eax
941; AVX512-NEXT:    vzeroupper
942; AVX512-NEXT:    retq
943  %1 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %a0)
944  ret i8 %1
945}
946
947define i8 @test_v128i8(<128 x i8> %a0) {
948; SSE-LABEL: test_v128i8:
949; SSE:       # %bb.0:
950; SSE-NEXT:    por %xmm6, %xmm2
951; SSE-NEXT:    por %xmm7, %xmm3
952; SSE-NEXT:    por %xmm5, %xmm3
953; SSE-NEXT:    por %xmm1, %xmm3
954; SSE-NEXT:    por %xmm4, %xmm2
955; SSE-NEXT:    por %xmm3, %xmm2
956; SSE-NEXT:    por %xmm0, %xmm2
957; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
958; SSE-NEXT:    por %xmm2, %xmm0
959; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
960; SSE-NEXT:    por %xmm0, %xmm1
961; SSE-NEXT:    movdqa %xmm1, %xmm0
962; SSE-NEXT:    psrld $16, %xmm0
963; SSE-NEXT:    por %xmm1, %xmm0
964; SSE-NEXT:    movdqa %xmm0, %xmm1
965; SSE-NEXT:    psrlw $8, %xmm1
966; SSE-NEXT:    por %xmm0, %xmm1
967; SSE-NEXT:    movd %xmm1, %eax
968; SSE-NEXT:    # kill: def $al killed $al killed $eax
969; SSE-NEXT:    retq
970;
971; AVX1-LABEL: test_v128i8:
972; AVX1:       # %bb.0:
973; AVX1-NEXT:    vorps %ymm3, %ymm1, %ymm1
974; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
975; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
976; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
977; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
978; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
979; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
980; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1]
981; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
982; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
983; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
984; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
985; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
986; AVX1-NEXT:    vmovd %xmm0, %eax
987; AVX1-NEXT:    # kill: def $al killed $al killed $eax
988; AVX1-NEXT:    vzeroupper
989; AVX1-NEXT:    retq
990;
991; AVX2-LABEL: test_v128i8:
992; AVX2:       # %bb.0:
993; AVX2-NEXT:    vpor %ymm3, %ymm1, %ymm1
994; AVX2-NEXT:    vpor %ymm1, %ymm2, %ymm1
995; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
996; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
997; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
998; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
999; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1000; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1001; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1002; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
1003; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1004; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1005; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1006; AVX2-NEXT:    vmovd %xmm0, %eax
1007; AVX2-NEXT:    # kill: def $al killed $al killed $eax
1008; AVX2-NEXT:    vzeroupper
1009; AVX2-NEXT:    retq
1010;
1011; AVX512-LABEL: test_v128i8:
1012; AVX512:       # %bb.0:
1013; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
1014; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1015; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
1016; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1017; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
1018; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1019; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
1020; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1021; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
1022; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
1023; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
1024; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1025; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
1026; AVX512-NEXT:    vmovd %xmm0, %eax
1027; AVX512-NEXT:    # kill: def $al killed $al killed $eax
1028; AVX512-NEXT:    vzeroupper
1029; AVX512-NEXT:    retq
1030  %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0)
1031  ret i8 %1
1032}
1033
1034declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
1035declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
1036declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>)
1037declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>)
1038
1039declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
1040declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
1041declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
1042declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>)
1043declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>)
1044
1045declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>)
1046declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
1047declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
1048declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
1049declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>)
1050declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>)
1051
1052declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>)
1053declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>)
1054declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
1055declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
1056declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
1057declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>)
1058declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>)
1059