• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=fiji  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
3
4; GCN-LABEL: {{^}}reduction_half4:
5; GFX9:      v_pk_add_f16 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
6; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7
8; VI:      v_add_f16_sdwa
9; VI-NEXT: v_add_f16_e32
10; VI-NEXT: v_add_f16_e32
11define half @reduction_half4(<4 x half> %vec4) {
12entry:
13  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
14  %bin.rdx = fadd fast <4 x half> %vec4, %rdx.shuf
15  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
16  %bin.rdx2 = fadd fast <4 x half> %bin.rdx, %rdx.shuf1
17  %res = extractelement <4 x half> %bin.rdx2, i32 0
18  ret half %res
19}
20
21; GCN-LABEL: {{^}}reduction_v4i16:
22; GFX9:      v_pk_add_u16 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
23; GFX9-NEXT: v_add_u16_sdwa v{{[0-9]+}}, [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
24
25; VI:      v_add_u16_sdwa
26; VI-NEXT: v_add_u16_e32
27; VI-NEXT: v_add_u16_e32
28define i16 @reduction_v4i16(<4 x i16> %vec4) {
29entry:
30  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
31  %bin.rdx = add <4 x i16> %vec4, %rdx.shuf
32  %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
33  %bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1
34  %res = extractelement <4 x i16> %bin.rdx2, i32 0
35  ret i16 %res
36}
37
38; GCN-LABEL: {{^}}reduction_half8:
39; GFX9:      v_pk_add_f16 [[ADD1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
40; GFX9-NEXT: v_pk_add_f16 [[ADD2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
41; GFX9-NEXT: v_pk_add_f16 [[ADD3:v[0-9]+]], [[ADD2]], [[ADD1]]{{$}}
42; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
43
44; VI:      v_add_f16_sdwa
45; VI-NEXT: v_add_f16_sdwa
46; VI-NEXT: v_add_f16_e32
47; VI-NEXT: v_add_f16_e32
48; VI-NEXT: v_add_f16_e32
49; VI-NEXT: v_add_f16_e32
50; VI-NEXT: v_add_f16_e32
51
52define half @reduction_half8(<8 x half> %vec8) {
53entry:
54  %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
55  %bin.rdx = fadd fast <8 x half> %vec8, %rdx.shuf
56  %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
57  %bin.rdx2 = fadd fast <8 x half> %bin.rdx, %rdx.shuf1
58  %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
59  %bin.rdx4 = fadd fast <8 x half> %bin.rdx2, %rdx.shuf3
60  %res = extractelement <8 x half> %bin.rdx4, i32 0
61  ret half %res
62}
63
64; GCN-LABEL: {{^}}reduction_v8i16:
65; GFX9:      v_pk_add_u16 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
66; GFX9-NEXT: v_pk_add_u16 [[ADD2]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
67; GFX9-NEXT: v_pk_add_u16 [[ADD3]], [[ADD2]], [[ADD1]]{{$}}
68; GFX9-NEXT: v_add_u16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
69
70; VI:      v_add_u16_sdwa
71; VI-NEXT: v_add_u16_sdwa
72; VI-NEXT: v_add_u16_e32
73; VI-NEXT: v_add_u16_e32
74; VI-NEXT: v_add_u16_e32
75; VI-NEXT: v_add_u16_e32
76; VI-NEXT: v_add_u16_e32
77
78define i16 @reduction_v8i16(<8 x i16> %vec8) {
79entry:
80  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
81  %bin.rdx = add <8 x i16> %vec8, %rdx.shuf
82  %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
83  %bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1
84  %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
85  %bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3
86  %res = extractelement <8 x i16> %bin.rdx4, i32 0
87  ret i16 %res
88}
89
90; GCN-LABEL: {{^}}reduction_half16:
91; GFX9:      v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
92; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
93; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
94; GFX9:      v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
95; GFX9-NEXT: v_pk_add_f16 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
96; GFX9-NEXT: v_pk_add_f16 [[ADD2]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
97; GFX9-NEXT: v_pk_add_f16 [[ADD3]], [[ADD2]], [[ADD1]]{{$}}
98; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
99
100; VI:      v_add_f16_sdwa
101; VI-NEXT: v_add_f16_sdwa
102; VI-NEXT: v_add_f16_sdwa
103; VI-NEXT: v_add_f16_sdwa
104; VI-NEXT: v_add_f16_e32
105; VI-NEXT: v_add_f16_e32
106; VI-NEXT: v_add_f16_e32
107; VI-NEXT: v_add_f16_e32
108; VI-NEXT: v_add_f16_e32
109; VI-NEXT: v_add_f16_e32
110; VI-NEXT: v_add_f16_e32
111; VI-NEXT: v_add_f16_e32
112; VI-NEXT: v_add_f16_e32
113; VI-NEXT: v_add_f16_e32
114; VI-NEXT: v_add_f16_e32
115
116define half @reduction_half16(<16 x half> %vec16) {
117entry:
118  %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
119  %bin.rdx = fadd fast <16 x half> %vec16, %rdx.shuf
120  %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
121  %bin.rdx2 = fadd fast <16 x half> %bin.rdx, %rdx.shuf1
122  %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
123  %bin.rdx4 = fadd fast <16 x half> %bin.rdx2, %rdx.shuf3
124  %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
125  %bin.rdx6 = fadd fast <16 x half> %bin.rdx4, %rdx.shuf5
126  %res = extractelement <16 x half> %bin.rdx6, i32 0
127  ret half %res
128}
129
130; GCN-LABEL: {{^}}reduction_min_v4i16:
131; GFX9:      v_pk_min_u16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
132; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
133
134; VI:      v_min_u16_sdwa
135; VI-NEXT: v_min_u16_e32
136; VI-NEXT: v_min_u16_e32
137define i16 @reduction_min_v4i16(<4 x i16> %vec4) {
138entry:
139  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
140  %rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf
141  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
142  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
143  %rdx.minmax.cmp2 = icmp ult <4 x i16> %rdx.minmax.select, %rdx.shuf1
144  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
145  %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
146  ret i16 %res
147}
148
149; GCN-LABEL: {{^}}reduction_umin_v8i16:
150; GFX9:      v_pk_min_u16 [[MIN1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
151; GFX9-NEXT: v_pk_min_u16 [[MIN2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
152; GFX9-NEXT: v_pk_min_u16 [[MIN3:v[0-9]+]], [[MIN2]], [[MIN1]]{{$}}
153; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, [[MIN3]], [[MIN3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
154
155; VI:      v_min_u16_sdwa
156; VI-NEXT: v_min_u16_sdwa
157; VI-NEXT: v_min_u16_e32
158; VI-NEXT: v_min_u16_e32
159; VI-NEXT: v_min_u16_e32
160; VI-NEXT: v_min_u16_e32
161; VI-NEXT: v_min_u16_e32
162define i16 @reduction_umin_v8i16(<8 x i16> %vec8) {
163entry:
164  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
165  %rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf
166  %rdx.minmax.select = select <8 x i1> %rdx.minmax.cmp, <8 x i16> %vec8, <8 x i16> %rdx.shuf
167  %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
168  %rdx.minmax.cmp2 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf1
169  %rdx.minmax.select3 = select <8 x i1> %rdx.minmax.cmp2, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf1
170  %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
171  %rdx.minmax.cmp5 = icmp ult <8 x i16> %rdx.minmax.select3, %rdx.shuf4
172  %rdx.minmax.select6 = select <8 x i1> %rdx.minmax.cmp5, <8 x i16> %rdx.minmax.select3, <8 x i16> %rdx.shuf4
173  %res = extractelement <8 x i16> %rdx.minmax.select6, i32 0
174  ret i16 %res
175}
176
177; Tests to make sure without slp the number of instructions are more.
178; GCN-LABEL: {{^}}reduction_umin_v8i16_woslp:
179; GFX9:      v_lshrrev_b32_e32
180; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
181; GFX9-NEXT: v_lshrrev_b32_e32
182; GFX9-NEXT: v_min3_u16
183; GFX9-NEXT: v_lshrrev_b32_e32
184; GFX9-NEXT: v_min3_u16
185; GFX9-NEXT: v_min3_u16
186define i16 @reduction_umin_v8i16_woslp(<8 x i16> %vec8) {
187entry:
188  %elt0 = extractelement <8 x i16> %vec8, i64 0
189  %elt1 = extractelement <8 x i16> %vec8, i64 1
190  %elt2 = extractelement <8 x i16> %vec8, i64 2
191  %elt3 = extractelement <8 x i16> %vec8, i64 3
192  %elt4 = extractelement <8 x i16> %vec8, i64 4
193  %elt5 = extractelement <8 x i16> %vec8, i64 5
194  %elt6 = extractelement <8 x i16> %vec8, i64 6
195  %elt7 = extractelement <8 x i16> %vec8, i64 7
196
197  %cmp0 = icmp ult i16 %elt1, %elt0
198  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
199  %cmp1 = icmp ult i16 %elt2, %min1
200  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
201  %cmp2 = icmp ult i16 %elt3, %min2
202  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
203
204  %cmp3 = icmp ult i16 %elt4, %min3
205  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
206  %cmp4 = icmp ult i16 %elt5, %min4
207  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
208
209  %cmp5 = icmp ult i16 %elt6, %min5
210  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
211  %cmp6 = icmp ult i16 %elt7, %min6
212  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
213
214  ret i16 %min7
215}
216
217; GCN-LABEL: {{^}}reduction_smin_v16i16:
218; GFX9:        v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
219; GFX9-NEXT:   v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
220; GFX9-NEXT:   v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
221; GFX9-NEXT:   v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
222; GFX9-NEXT:   v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
223; GFX9-NEXT:   v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
224; GFX9-NEXT:   v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
225; GFX9-NEXT:   v_min_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
226
227; VI:      v_min_i16_sdwa
228; VI-NEXT: v_min_i16_sdwa
229; VI-NEXT: v_min_i16_sdwa
230; VI-NEXT: v_min_i16_sdwa
231; VI-NEXT: v_min_i16_e32
232; VI-NEXT: v_min_i16_e32
233; VI-NEXT: v_min_i16_e32
234; VI-NEXT: v_min_i16_e32
235; VI-NEXT: v_min_i16_e32
236; VI-NEXT: v_min_i16_e32
237; VI-NEXT: v_min_i16_e32
238; VI-NEXT: v_min_i16_e32
239; VI-NEXT: v_min_i16_e32
240; VI-NEXT: v_min_i16_e32
241; VI-NEXT: v_min_i16_e32
242define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
243entry:
244  %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
245  %rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf
246  %rdx.minmax.select = select <16 x i1> %rdx.minmax.cmp, <16 x i16> %vec16, <16 x i16> %rdx.shuf
247  %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
248  %rdx.minmax.cmp2 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf1
249  %rdx.minmax.select3 = select <16 x i1> %rdx.minmax.cmp2, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf1
250  %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
251  %rdx.minmax.cmp5 = icmp slt <16 x i16> %rdx.minmax.select3, %rdx.shuf4
252  %rdx.minmax.select6 = select <16 x i1> %rdx.minmax.cmp5, <16 x i16> %rdx.minmax.select3, <16 x i16> %rdx.shuf4
253  %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
254  %rdx.minmax.cmp8 = icmp slt <16 x i16> %rdx.minmax.select6, %rdx.shuf7
255  %rdx.minmax.select9 = select <16 x i1> %rdx.minmax.cmp8, <16 x i16> %rdx.minmax.select6, <16 x i16> %rdx.shuf7
256  %res = extractelement <16 x i16> %rdx.minmax.select9, i32 0
257  ret i16 %res
258}
259
260; Tests to make sure without slp the number of instructions are more.
261; GCN-LABEL: {{^}}reduction_smin_v16i16_woslp:
262; GFX9:      v_lshrrev_b32_e32
263; GFX9-NEXT: v_min_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
264; GFX9-NEXT: v_lshrrev_b32_e32
265; GFX9-NEXT: v_min3_i16
266; GFX9-NEXT: v_lshrrev_b32_e32
267; GFX9-NEXT: v_min3_i16
268; GFX9-NEXT: v_lshrrev_b32_e32
269; GFX9-NEXT: v_min3_i16
270; GFX9-NEXT: v_lshrrev_b32_e32
271; GFX9-NEXT: v_min3_i16
272; GFX9-NEXT: v_lshrrev_b32_e32
273; GFX9-NEXT: v_min3_i16
274; GFX9-NEXT: v_lshrrev_b32_e32
275; GFX9-NEXT: v_min3_i16
276; GFX9-NEXT: v_min3_i16
277define i16 @reduction_smin_v16i16_woslp(<16 x i16> %vec16) {
278entry:
279  %elt0 = extractelement <16 x i16> %vec16, i64 0
280  %elt1 = extractelement <16 x i16> %vec16, i64 1
281  %elt2 = extractelement <16 x i16> %vec16, i64 2
282  %elt3 = extractelement <16 x i16> %vec16, i64 3
283  %elt4 = extractelement <16 x i16> %vec16, i64 4
284  %elt5 = extractelement <16 x i16> %vec16, i64 5
285  %elt6 = extractelement <16 x i16> %vec16, i64 6
286  %elt7 = extractelement <16 x i16> %vec16, i64 7
287
288  %elt8 = extractelement <16 x i16> %vec16, i64 8
289  %elt9 = extractelement <16 x i16> %vec16, i64 9
290  %elt10 = extractelement <16 x i16> %vec16, i64 10
291  %elt11 = extractelement <16 x i16> %vec16, i64 11
292  %elt12 = extractelement <16 x i16> %vec16, i64 12
293  %elt13 = extractelement <16 x i16> %vec16, i64 13
294  %elt14 = extractelement <16 x i16> %vec16, i64 14
295  %elt15 = extractelement <16 x i16> %vec16, i64 15
296
297  %cmp0 = icmp slt i16 %elt1, %elt0
298  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
299  %cmp1 = icmp slt i16 %elt2, %min1
300  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
301  %cmp2 = icmp slt i16 %elt3, %min2
302  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
303
304  %cmp3 = icmp slt i16 %elt4, %min3
305  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
306  %cmp4 = icmp slt i16 %elt5, %min4
307  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
308
309  %cmp5 = icmp slt i16 %elt6, %min5
310  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
311  %cmp6 = icmp slt i16 %elt7, %min6
312  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
313
314  %cmp7 = icmp slt i16 %elt8, %min7
315  %min8 = select i1 %cmp7, i16 %elt8, i16 %min7
316  %cmp8 = icmp slt i16 %elt9, %min8
317  %min9 = select i1 %cmp8, i16 %elt9, i16 %min8
318
319  %cmp9 = icmp slt i16 %elt10, %min9
320  %min10 = select i1 %cmp9, i16 %elt10, i16 %min9
321  %cmp10 = icmp slt i16 %elt11, %min10
322  %min11 = select i1 %cmp10, i16 %elt11, i16 %min10
323
324  %cmp11 = icmp slt i16 %elt12, %min11
325  %min12 = select i1 %cmp11, i16 %elt12, i16 %min11
326  %cmp12 = icmp slt i16 %elt13, %min12
327  %min13 = select i1 %cmp12, i16 %elt13, i16 %min12
328
329  %cmp13 = icmp slt i16 %elt14, %min13
330  %min14 = select i1 %cmp13, i16 %elt14, i16 %min13
331  %cmp14 = icmp slt i16 %elt15, %min14
332  %min15 = select i1 %cmp14, i16 %elt15, i16 %min14
333
334
335  ret i16 %min15
336}
337
338; GCN-LABEL: {{^}}reduction_umax_v4i16:
339; GFX9:      v_pk_max_u16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
340; GFX9-NEXT: v_max_u16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
341
342; VI:      v_max_u16_sdwa
343; VI-NEXT: v_max_u16_e32
344; VI-NEXT: v_max_u16_e32
345define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
346entry:
347  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
348  %rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf
349  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
350  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
351  %rdx.minmax.cmp2 = icmp ugt <4 x i16> %rdx.minmax.select, %rdx.shuf1
352  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
353  %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
354  ret i16 %res
355}
356
357; GCN-LABEL: {{^}}reduction_smax_v4i16:
358; GFX9:      v_pk_max_i16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
359; GFX9-NEXT: v_max_i16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
360
361; VI:      v_max_i16_sdwa
362; VI-NEXT: v_max_i16_e32
363; VI-NEXT: v_max_i16_e32
364define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 {
365entry:
366  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
367  %rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf
368  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
369  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
370  %rdx.minmax.cmp2 = icmp sgt <4 x i16> %rdx.minmax.select, %rdx.shuf1
371  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
372  %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
373  ret i16 %res
374}
375
376; GCN-LABEL: {{^}}reduction_fmax_v4half:
377; GFX9:      v_pk_max_f16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
378; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
379
380; VI:      v_max_f16_sdwa
381; VI-NEXT: v_max_f16_e32
382; VI-NEXT: v_max_f16_e32
383define half @reduction_fmax_v4half(<4 x half> %vec4) {
384entry:
385  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
386  %rdx.minmax.cmp = fcmp fast ogt <4 x half> %vec4, %rdx.shuf
387  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
388  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
389  %rdx.minmax.cmp2 = fcmp fast ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
390  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
391  %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
392  ret half %res
393}
394
395; GCN-LABEL: {{^}}reduction_fmin_v4half:
396; GFX9:      v_pk_min_f16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
397; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
398
399; VI:      v_min_f16_sdwa
400; VI-NEXT: v_min_f16_e32
401; VI-NEXT: v_min_f16_e32
402define half @reduction_fmin_v4half(<4 x half> %vec4) {
403entry:
404  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
405  %rdx.minmax.cmp = fcmp fast olt <4 x half> %vec4, %rdx.shuf
406  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
407  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
408  %rdx.minmax.cmp2 = fcmp fast olt <4 x half> %rdx.minmax.select, %rdx.shuf1
409  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
410  %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
411  ret half %res
412}
413