• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -expand-reductions -S | FileCheck %s
3; Tests without a target which should expand all reductions
4declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
5declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
6declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
7declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
8declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
9
10declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
11declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
12
13declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
14declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
15declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
16declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
17
18declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
19declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
20
21
22define i64 @add_i64(<2 x i64> %vec) {
23; CHECK-LABEL: @add_i64(
24; CHECK-NEXT:  entry:
25; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
26; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
27; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
28; CHECK-NEXT:    ret i64 [[TMP0]]
29;
30entry:
31  %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %vec)
32  ret i64 %r
33}
34
35define i64 @mul_i64(<2 x i64> %vec) {
36; CHECK-LABEL: @mul_i64(
37; CHECK-NEXT:  entry:
38; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
39; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
40; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
41; CHECK-NEXT:    ret i64 [[TMP0]]
42;
43entry:
44  %r = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %vec)
45  ret i64 %r
46}
47
48define i64 @and_i64(<2 x i64> %vec) {
49; CHECK-LABEL: @and_i64(
50; CHECK-NEXT:  entry:
51; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
52; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
53; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
54; CHECK-NEXT:    ret i64 [[TMP0]]
55;
56entry:
57  %r = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %vec)
58  ret i64 %r
59}
60
61define i64 @or_i64(<2 x i64> %vec) {
62; CHECK-LABEL: @or_i64(
63; CHECK-NEXT:  entry:
64; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
65; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
66; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
67; CHECK-NEXT:    ret i64 [[TMP0]]
68;
69entry:
70  %r = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %vec)
71  ret i64 %r
72}
73
74define i64 @xor_i64(<2 x i64> %vec) {
75; CHECK-LABEL: @xor_i64(
76; CHECK-NEXT:  entry:
77; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
78; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
79; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
80; CHECK-NEXT:    ret i64 [[TMP0]]
81;
82entry:
83  %r = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %vec)
84  ret i64 %r
85}
86
87define float @fadd_f32(<4 x float> %vec) {
88; CHECK-LABEL: @fadd_f32(
89; CHECK-NEXT:  entry:
90; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
91; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
92; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
93; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
94; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
95; CHECK-NEXT:    ret float [[TMP0]]
96;
97entry:
98  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
99  ret float %r
100}
101
102define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
103; CHECK-LABEL: @fadd_f32_accum(
104; CHECK-NEXT:  entry:
105; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
106; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
107; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
108; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
109; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
110; CHECK-NEXT:    ret float [[TMP0]]
111;
112entry:
113  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
114  ret float %r
115}
116
117define float @fadd_f32_strict(<4 x float> %vec) {
118; CHECK-LABEL: @fadd_f32_strict(
119; CHECK-NEXT:  entry:
120; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
121; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]]
122; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
123; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
124; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
125; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
126; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
127; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
128; CHECK-NEXT:    ret float [[BIN_RDX3]]
129;
130entry:
131  %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
132  ret float %r
133}
134
135define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) {
136; CHECK-LABEL: @fadd_f32_strict_accum(
137; CHECK-NEXT:  entry:
138; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
139; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]]
140; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
141; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
142; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
143; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
144; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
145; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
146; CHECK-NEXT:    ret float [[BIN_RDX3]]
147;
148entry:
149  %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
150  ret float %r
151}
152
153define float @fmul_f32(<4 x float> %vec) {
154; CHECK-LABEL: @fmul_f32(
155; CHECK-NEXT:  entry:
156; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
157; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
158; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
159; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
160; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
161; CHECK-NEXT:    ret float [[TMP0]]
162;
163entry:
164  %r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
165  ret float %r
166}
167
168define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
169; CHECK-LABEL: @fmul_f32_accum(
170; CHECK-NEXT:  entry:
171; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
172; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
173; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
174; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
175; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
176; CHECK-NEXT:    ret float [[TMP0]]
177;
178entry:
179  %r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
180  ret float %r
181}
182
183define float @fmul_f32_strict(<4 x float> %vec) {
184; CHECK-LABEL: @fmul_f32_strict(
185; CHECK-NEXT:  entry:
186; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
187; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]]
188; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
189; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
190; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
191; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
192; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
193; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
194; CHECK-NEXT:    ret float [[BIN_RDX3]]
195;
196entry:
197  %r = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
198  ret float %r
199}
200
201define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) {
202; CHECK-LABEL: @fmul_f32_strict_accum(
203; CHECK-NEXT:  entry:
204; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
205; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]]
206; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
207; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
208; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
209; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
210; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
211; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
212; CHECK-NEXT:    ret float [[BIN_RDX3]]
213;
214entry:
215  %r = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
216  ret float %r
217}
218
219define i64 @smax_i64(<2 x i64> %vec) {
220; CHECK-LABEL: @smax_i64(
221; CHECK-NEXT:  entry:
222; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
223; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
224; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
225; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
226; CHECK-NEXT:    ret i64 [[TMP0]]
227;
228entry:
229  %r = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %vec)
230  ret i64 %r
231}
232
233define i64 @smin_i64(<2 x i64> %vec) {
234; CHECK-LABEL: @smin_i64(
235; CHECK-NEXT:  entry:
236; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
237; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
238; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
239; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
240; CHECK-NEXT:    ret i64 [[TMP0]]
241;
242entry:
243  %r = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %vec)
244  ret i64 %r
245}
246
247define i64 @umax_i64(<2 x i64> %vec) {
248; CHECK-LABEL: @umax_i64(
249; CHECK-NEXT:  entry:
250; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
251; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
252; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
253; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
254; CHECK-NEXT:    ret i64 [[TMP0]]
255;
256entry:
257  %r = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %vec)
258  ret i64 %r
259}
260
261define i64 @umin_i64(<2 x i64> %vec) {
262; CHECK-LABEL: @umin_i64(
263; CHECK-NEXT:  entry:
264; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
265; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
266; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
267; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
268; CHECK-NEXT:    ret i64 [[TMP0]]
269;
270entry:
271  %r = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %vec)
272  ret i64 %r
273}
274
275define double @fmax_f64(<2 x double> %vec) {
276; CHECK-LABEL: @fmax_f64(
277; CHECK-NEXT:  entry:
278; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
279; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]]
280; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
281; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
282; CHECK-NEXT:    ret double [[TMP0]]
283;
284entry:
285  %r = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %vec)
286  ret double %r
287}
288
289define double @fmin_f64(<2 x double> %vec) {
290; CHECK-LABEL: @fmin_f64(
291; CHECK-NEXT:  entry:
292; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
293; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]]
294; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
295; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
296; CHECK-NEXT:    ret double [[TMP0]]
297;
298entry:
299  %r = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %vec)
300  ret double %r
301}
302