• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: @add_i3(
5; SI: %r = add i3 %a, %b
6; SI-NEXT: store volatile i3 %r
7; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
8; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
9; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
10; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
11; VI-NEXT: store volatile i3 %[[R_3]]
12define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
13  %r = add i3 %a, %b
14  store volatile i3 %r, i3 addrspace(1)* undef
15  ret void
16}
17
18; GCN-LABEL: @add_nsw_i3(
19; SI: %r = add nsw i3 %a, %b
20; SI-NEXT: store volatile i3 %r
21; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
22; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
23; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
24; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
25; VI-NEXT: store volatile i3 %[[R_3]]
26define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
27  %r = add nsw i3 %a, %b
28  store volatile i3 %r, i3 addrspace(1)* undef
29  ret void
30}
31
32; GCN-LABEL: @add_nuw_i3(
33; SI: %r = add nuw i3 %a, %b
34; SI-NEXT: store volatile i3 %r
35; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
36; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
37; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
38; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
39; VI-NEXT: store volatile i3 %[[R_3]]
40define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
41  %r = add nuw i3 %a, %b
42  store volatile i3 %r, i3 addrspace(1)* undef
43  ret void
44}
45
46; GCN-LABEL: @add_nuw_nsw_i3(
47; SI: %r = add nuw nsw i3 %a, %b
48; SI-NEXT: store volatile i3 %r
49; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
50; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
51; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
52; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
53; VI-NEXT: store volatile i3 %[[R_3]]
54define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
55  %r = add nuw nsw i3 %a, %b
56  store volatile i3 %r, i3 addrspace(1)* undef
57  ret void
58}
59
60; GCN-LABEL: @sub_i3(
61; SI: %r = sub i3 %a, %b
62; SI-NEXT: store volatile i3 %r
63; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
64; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
65; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
66; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
67; VI-NEXT: store volatile i3 %[[R_3]]
68define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) {
69  %r = sub i3 %a, %b
70  store volatile i3 %r, i3 addrspace(1)* undef
71  ret void
72}
73
74; GCN-LABEL: @sub_nsw_i3(
75; SI: %r = sub nsw i3 %a, %b
76; SI-NEXT: store volatile i3 %r
77; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
78; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
79; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
80; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
81; VI-NEXT: store volatile i3 %[[R_3]]
82define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) {
83  %r = sub nsw i3 %a, %b
84  store volatile i3 %r, i3 addrspace(1)* undef
85  ret void
86}
87
88; GCN-LABEL: @sub_nuw_i3(
89; SI: %r = sub nuw i3 %a, %b
90; SI-NEXT: store volatile i3 %r
91; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
92; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
93; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
94; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
95; VI-NEXT: store volatile i3 %[[R_3]]
96define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) {
97  %r = sub nuw i3 %a, %b
98  store volatile i3 %r, i3 addrspace(1)* undef
99  ret void
100}
101
102; GCN-LABEL: @sub_nuw_nsw_i3(
103; SI: %r = sub nuw nsw i3 %a, %b
104; SI-NEXT: store volatile i3 %r
105; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
106; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
107; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
108; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
109; VI-NEXT: store volatile i3 %[[R_3]]
110define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) {
111  %r = sub nuw nsw i3 %a, %b
112  store volatile i3 %r, i3 addrspace(1)* undef
113  ret void
114}
115
116; GCN-LABEL: @mul_i3(
117; SI: %r = mul i3 %a, %b
118; SI-NEXT: store volatile i3 %r
119; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
120; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
121; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
122; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
123; VI-NEXT: store volatile i3 %[[R_3]]
124define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) {
125  %r = mul i3 %a, %b
126  store volatile i3 %r, i3 addrspace(1)* undef
127  ret void
128}
129
130; GCN-LABEL: @mul_nsw_i3(
131; SI: %r = mul nsw i3 %a, %b
132; SI-NEXT: store volatile i3 %r
133; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
134; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
135; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
136; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
137; VI-NEXT: store volatile i3 %[[R_3]]
138define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) {
139  %r = mul nsw i3 %a, %b
140  store volatile i3 %r, i3 addrspace(1)* undef
141  ret void
142}
143
144; GCN-LABEL: @mul_nuw_i3(
145; SI: %r = mul nuw i3 %a, %b
146; SI-NEXT: store volatile i3 %r
147; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
148; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
149; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
150; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
151; VI-NEXT: store volatile i3 %[[R_3]]
152define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) {
153  %r = mul nuw i3 %a, %b
154  store volatile i3 %r, i3 addrspace(1)* undef
155  ret void
156}
157
158; GCN-LABEL: @mul_nuw_nsw_i3(
159; SI: %r = mul nuw nsw i3 %a, %b
160; SI-NEXT: store volatile i3 %r
161; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
162; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
163; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
164; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
165; VI-NEXT: store volatile i3 %[[R_3]]
166define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) {
167  %r = mul nuw nsw i3 %a, %b
168  store volatile i3 %r, i3 addrspace(1)* undef
169  ret void
170}
171
172; GCN-LABEL: @shl_i3(
173; SI: %r = shl i3 %a, %b
174; SI-NEXT: store volatile i3 %r
175; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
176; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
177; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
178; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
179; VI-NEXT: store volatile i3 %[[R_3]]
180define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) {
181  %r = shl i3 %a, %b
182  store volatile i3 %r, i3 addrspace(1)* undef
183  ret void
184}
185
186; GCN-LABEL: @shl_nsw_i3(
187; SI: %r = shl nsw i3 %a, %b
188; SI-NEXT: store volatile i3 %r
189; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
190; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
191; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
192; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
193; VI-NEXT: store volatile i3 %[[R_3]]
194define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) {
195  %r = shl nsw i3 %a, %b
196  store volatile i3 %r, i3 addrspace(1)* undef
197  ret void
198}
199
200; GCN-LABEL: @shl_nuw_i3(
201; SI: %r = shl nuw i3 %a, %b
202; SI-NEXT: store volatile i3 %r
203; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
204; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
205; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
206; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
207; VI-NEXT: store volatile i3 %[[R_3]]
208define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) {
209  %r = shl nuw i3 %a, %b
210  store volatile i3 %r, i3 addrspace(1)* undef
211  ret void
212}
213
214; GCN-LABEL: @shl_nuw_nsw_i3(
215; SI: %r = shl nuw nsw i3 %a, %b
216; SI-NEXT: store volatile i3 %r
217; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
218; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
219; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
220; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
221; VI-NEXT: store volatile i3 %[[R_3]]
222define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) {
223  %r = shl nuw nsw i3 %a, %b
224  store volatile i3 %r, i3 addrspace(1)* undef
225  ret void
226}
227
228; GCN-LABEL: @lshr_i3(
229; SI: %r = lshr i3 %a, %b
230; SI-NEXT: store volatile i3 %r
231; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
232; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
233; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
234; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
235; VI-NEXT: store volatile i3 %[[R_3]]
236define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) {
237  %r = lshr i3 %a, %b
238  store volatile i3 %r, i3 addrspace(1)* undef
239  ret void
240}
241
242; GCN-LABEL: @lshr_exact_i3(
243; SI: %r = lshr exact i3 %a, %b
244; SI-NEXT: store volatile i3 %r
245; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
246; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
247; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
248; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
249; VI-NEXT: store volatile i3 %[[R_3]]
250define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) {
251  %r = lshr exact i3 %a, %b
252  store volatile i3 %r, i3 addrspace(1)* undef
253  ret void
254}
255
256; GCN-LABEL: @ashr_i3(
257; SI: %r = ashr i3 %a, %b
258; SI-NEXT: store volatile i3 %r
259; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32
260; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32
261; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
262; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
263; VI-NEXT: store volatile i3 %[[R_3]]
264define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) {
265  %r = ashr i3 %a, %b
266  store volatile i3 %r, i3 addrspace(1)* undef
267  ret void
268}
269
270; GCN-LABEL: @ashr_exact_i3(
271; SI: %r = ashr exact i3 %a, %b
272; SI-NEXT: store volatile i3 %r
273; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32
274; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32
275; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
276; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
277; VI-NEXT: store volatile i3 %[[R_3]]
278define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) {
279  %r = ashr exact i3 %a, %b
280  store volatile i3 %r, i3 addrspace(1)* undef
281  ret void
282}
283
284; GCN-LABEL: @and_i3(
285; SI: %r = and i3 %a, %b
286; SI-NEXT: store volatile i3 %r
287; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
288; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
289; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
290; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
291; VI-NEXT: store volatile i3 %[[R_3]]
292define amdgpu_kernel void @and_i3(i3 %a, i3 %b) {
293  %r = and i3 %a, %b
294  store volatile i3 %r, i3 addrspace(1)* undef
295  ret void
296}
297
298; GCN-LABEL: @or_i3(
299; SI: %r = or i3 %a, %b
300; SI-NEXT: store volatile i3 %r
301; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
302; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
303; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
304; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
305; VI-NEXT: store volatile i3 %[[R_3]]
306define amdgpu_kernel void @or_i3(i3 %a, i3 %b) {
307  %r = or i3 %a, %b
308  store volatile i3 %r, i3 addrspace(1)* undef
309  ret void
310}
311
312; GCN-LABEL: @xor_i3(
313; SI: %r = xor i3 %a, %b
314; SI-NEXT: store volatile i3 %r
315; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
316; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32
317; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
318; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3
319; VI-NEXT: store volatile i3 %[[R_3]]
320define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) {
321  %r = xor i3 %a, %b
322  store volatile i3 %r, i3 addrspace(1)* undef
323  ret void
324}
325
326; GCN-LABEL: @select_eq_i3(
327; SI: %cmp = icmp eq i3 %a, %b
328; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
329; SI-NEXT: store volatile i3 %sel
330; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
331; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
332; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
333; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
334; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
335; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
336; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
337; VI-NEXT: store volatile i3 %[[SEL_3]]
338define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) {
339  %cmp = icmp eq i3 %a, %b
340  %sel = select i1 %cmp, i3 %a, i3 %b
341  store volatile i3 %sel, i3 addrspace(1)* undef
342  ret void
343}
344
345; GCN-LABEL: @select_ne_i3(
346; SI: %cmp = icmp ne i3 %a, %b
347; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
348; SI-NEXT: store volatile i3 %sel
349; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
350; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
351; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
352; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
353; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
354; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
355; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
356; VI-NEXT: store volatile i3 %[[SEL_3]]
357define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) {
358  %cmp = icmp ne i3 %a, %b
359  %sel = select i1 %cmp, i3 %a, i3 %b
360  store volatile i3 %sel, i3 addrspace(1)* undef
361  ret void
362}
363
364; GCN-LABEL: @select_ugt_i3(
365; SI: %cmp = icmp ugt i3 %a, %b
366; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
367; SI-NEXT: store volatile i3 %sel
368; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
369; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
370; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
371; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
372; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
373; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
374; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
375; VI-NEXT: store volatile i3 %[[SEL_3]]
376define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) {
377  %cmp = icmp ugt i3 %a, %b
378  %sel = select i1 %cmp, i3 %a, i3 %b
379  store volatile i3 %sel, i3 addrspace(1)* undef
380  ret void
381}
382
383; GCN-LABEL: @select_uge_i3(
384; SI: %cmp = icmp uge i3 %a, %b
385; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
386; SI-NEXT: store volatile i3 %sel
387; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
388; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
389; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
390; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
391; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
392; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
393; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
394; VI-NEXT: store volatile i3 %[[SEL_3]]
395define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) {
396  %cmp = icmp uge i3 %a, %b
397  %sel = select i1 %cmp, i3 %a, i3 %b
398  store volatile i3 %sel, i3 addrspace(1)* undef
399  ret void
400}
401
402; GCN-LABEL: @select_ult_i3(
403; SI: %cmp = icmp ult i3 %a, %b
404; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
405; SI-NEXT: store volatile i3 %sel
406; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
407; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
408; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
409; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
410; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
411; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
412; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
413; VI-NEXT: store volatile i3 %[[SEL_3]]
414define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) {
415  %cmp = icmp ult i3 %a, %b
416  %sel = select i1 %cmp, i3 %a, i3 %b
417  store volatile i3 %sel, i3 addrspace(1)* undef
418  ret void
419}
420
421; GCN-LABEL: @select_ule_i3(
422; SI: %cmp = icmp ule i3 %a, %b
423; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
424; SI-NEXT: store volatile i3 %sel
425; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32
426; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32
427; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
428; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32
429; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32
430; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
431; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
432; VI-NEXT: store volatile i3 %[[SEL_3]]
433define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) {
434  %cmp = icmp ule i3 %a, %b
435  %sel = select i1 %cmp, i3 %a, i3 %b
436  store volatile i3 %sel, i3 addrspace(1)* undef
437  ret void
438}
439
440; GCN-LABEL: @select_sgt_i3(
441; SI: %cmp = icmp sgt i3 %a, %b
442; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
443; SI-NEXT: store volatile i3 %sel
444; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
445; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
446; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
447; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
448; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
449; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
450; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
451; VI-NEXT: store volatile i3 %[[SEL_3]]
452define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) {
453  %cmp = icmp sgt i3 %a, %b
454  %sel = select i1 %cmp, i3 %a, i3 %b
455  store volatile i3 %sel, i3 addrspace(1)* undef
456  ret void
457}
458
459; GCN-LABEL: @select_sge_i3(
460; SI: %cmp = icmp sge i3 %a, %b
461; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
462; SI-NEXT: store volatile i3 %sel
463; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
464; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
465; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
466; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
467; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
468; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
469; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
470; VI-NEXT: store volatile i3 %[[SEL_3]]
471define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) {
472  %cmp = icmp sge i3 %a, %b
473  %sel = select i1 %cmp, i3 %a, i3 %b
474  store volatile i3 %sel, i3 addrspace(1)* undef
475  ret void
476}
477
478; GCN-LABEL: @select_slt_i3(
479; SI: %cmp = icmp slt i3 %a, %b
480; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
481; SI-NEXT: store volatile i3 %sel
482; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
483; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
484; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
485; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
486; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
487; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
488; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
489; VI-NEXT: store volatile i3 %[[SEL_3]]
490define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) {
491  %cmp = icmp slt i3 %a, %b
492  %sel = select i1 %cmp, i3 %a, i3 %b
493  store volatile i3 %sel, i3 addrspace(1)* undef
494  ret void
495}
496
497; GCN-LABEL: @select_sle_i3(
498; SI: %cmp = icmp sle i3 %a, %b
499; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b
500; SI-NEXT: store volatile i3 %sel
501; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32
502; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32
503; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
504; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32
505; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32
506; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
507; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3
508; VI-NEXT: store volatile i3 %[[SEL_3]]
509define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) {
510  %cmp = icmp sle i3 %a, %b
511  %sel = select i1 %cmp, i3 %a, i3 %b
512  store volatile i3 %sel, i3 addrspace(1)* undef
513  ret void
514}
515
516declare i3 @llvm.bitreverse.i3(i3)
517; GCN-LABEL: @bitreverse_i3(
518; SI: %brev = call i3 @llvm.bitreverse.i3(i3 %a)
519; SI-NEXT: store volatile i3 %brev
520; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32
521; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]])
522; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 29
523; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[S_32]] to i3
524; VI-NEXT: store volatile i3 %[[R_3]]
525define amdgpu_kernel void @bitreverse_i3(i3 %a) {
526  %brev = call i3 @llvm.bitreverse.i3(i3 %a)
527  store volatile i3 %brev, i3 addrspace(1)* undef
528  ret void
529}
530
531; GCN-LABEL: @add_i16(
532; SI: %r = add i16 %a, %b
533; SI-NEXT: store volatile i16 %r
534; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
535; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
536; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
537; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
538; VI-NEXT: store volatile i16 %[[R_16]]
539define amdgpu_kernel void @add_i16(i16 %a, i16 %b) {
540  %r = add i16 %a, %b
541  store volatile i16 %r, i16 addrspace(1)* undef
542  ret void
543}
544
545; GCN-LABEL: @constant_add_i16(
546; VI: store volatile i16 3
547define amdgpu_kernel void @constant_add_i16() {
548  %r = add i16 1, 2
549  store volatile i16 %r, i16 addrspace(1)* undef
550  ret void
551}
552
553; GCN-LABEL: @constant_add_nsw_i16(
554; VI: store volatile i16 3
555define amdgpu_kernel void @constant_add_nsw_i16() {
556  %r = add nsw i16 1, 2
557  store volatile i16 %r, i16 addrspace(1)* undef
558  ret void
559}
560
561; GCN-LABEL: @constant_add_nuw_i16(
562; VI: store volatile i16 3
563define amdgpu_kernel void @constant_add_nuw_i16() {
564  %r = add nsw i16 1, 2
565  store volatile i16 %r, i16 addrspace(1)* undef
566  ret void
567}
568
569; GCN-LABEL: @add_nsw_i16(
570; SI: %r = add nsw i16 %a, %b
571; SI-NEXT: store volatile i16 %r
572; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
573; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
574; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
575; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
576; VI-NEXT: store volatile i16 %[[R_16]]
577define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) {
578  %r = add nsw i16 %a, %b
579  store volatile i16 %r, i16 addrspace(1)* undef
580  ret void
581}
582
583; GCN-LABEL: @add_nuw_i16(
584; SI: %r = add nuw i16 %a, %b
585; SI-NEXT: store volatile i16 %r
586; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
587; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
588; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
589; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
590; VI-NEXT: store volatile i16 %[[R_16]]
591define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) {
592  %r = add nuw i16 %a, %b
593  store volatile i16 %r, i16 addrspace(1)* undef
594  ret void
595}
596
597; GCN-LABEL: @add_nuw_nsw_i16(
598; SI: %r = add nuw nsw i16 %a, %b
599; SI-NEXT: store volatile i16 %r
600; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
601; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
602; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
603; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
604; VI-NEXT: store volatile i16 %[[R_16]]
605define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) {
606  %r = add nuw nsw i16 %a, %b
607  store volatile i16 %r, i16 addrspace(1)* undef
608  ret void
609}
610
611; GCN-LABEL: @sub_i16(
612; SI: %r = sub i16 %a, %b
613; SI-NEXT: store volatile i16 %r
614; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
615; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
616; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
617; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
618; VI-NEXT: store volatile i16 %[[R_16]]
619define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) {
620  %r = sub i16 %a, %b
621  store volatile i16 %r, i16 addrspace(1)* undef
622  ret void
623}
624
625; GCN-LABEL: @sub_nsw_i16(
626; SI: %r = sub nsw i16 %a, %b
627; SI-NEXT: store volatile i16 %r
628; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
629; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
630; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
631; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
632; VI-NEXT: store volatile i16 %[[R_16]]
633define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) {
634  %r = sub nsw i16 %a, %b
635  store volatile i16 %r, i16 addrspace(1)* undef
636  ret void
637}
638
639; GCN-LABEL: @sub_nuw_i16(
640; SI: %r = sub nuw i16 %a, %b
641; SI-NEXT: store volatile i16 %r
642; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
643; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
644; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
645; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
646; VI-NEXT: store volatile i16 %[[R_16]]
647define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) {
648  %r = sub nuw i16 %a, %b
649  store volatile i16 %r, i16 addrspace(1)* undef
650  ret void
651}
652
653; GCN-LABEL: @sub_nuw_nsw_i16(
654; SI: %r = sub nuw nsw i16 %a, %b
655; SI-NEXT: store volatile i16 %r
656; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
657; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
658; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
659; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
660; VI-NEXT: store volatile i16 %[[R_16]]
661define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) {
662  %r = sub nuw nsw i16 %a, %b
663  store volatile i16 %r, i16 addrspace(1)* undef
664  ret void
665}
666
667; GCN-LABEL: @mul_i16(
668; SI: %r = mul i16 %a, %b
669; SI-NEXT: store volatile i16 %r
670; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
671; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
672; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
673; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
674; VI-NEXT: store volatile i16 %[[R_16]]
675define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) {
676  %r = mul i16 %a, %b
677  store volatile i16 %r, i16 addrspace(1)* undef
678  ret void
679}
680
681; GCN-LABEL: @mul_nsw_i16(
682; SI: %r = mul nsw i16 %a, %b
683; SI-NEXT: store volatile i16 %r
684; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
685; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
686; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
687; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
688; VI-NEXT: store volatile i16 %[[R_16]]
689define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) {
690  %r = mul nsw i16 %a, %b
691  store volatile i16 %r, i16 addrspace(1)* undef
692  ret void
693}
694
695; GCN-LABEL: @mul_nuw_i16(
696; SI: %r = mul nuw i16 %a, %b
697; SI-NEXT: store volatile i16 %r
698; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
699; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
700; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
701; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
702; VI-NEXT: store volatile i16 %[[R_16]]
703define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) {
704  %r = mul nuw i16 %a, %b
705  store volatile i16 %r, i16 addrspace(1)* undef
706  ret void
707}
708
709; GCN-LABEL: @mul_nuw_nsw_i16(
710; SI: %r = mul nuw nsw i16 %a, %b
711; SI-NEXT: store volatile i16 %r
712; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
713; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
714; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
715; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
716; VI-NEXT: store volatile i16 %[[R_16]]
717define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) {
718  %r = mul nuw nsw i16 %a, %b
719  store volatile i16 %r, i16 addrspace(1)* undef
720  ret void
721}
722
723; GCN-LABEL: @shl_i16(
724; SI: %r = shl i16 %a, %b
725; SI-NEXT: store volatile i16 %r
726; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
727; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
728; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
729; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
730; VI-NEXT: store volatile i16 %[[R_16]]
731define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) {
732  %r = shl i16 %a, %b
733  store volatile i16 %r, i16 addrspace(1)* undef
734  ret void
735}
736
737; GCN-LABEL: @shl_nsw_i16(
738; SI: %r = shl nsw i16 %a, %b
739; SI-NEXT: store volatile i16 %r
740; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
741; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
742; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
743; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
744; VI-NEXT: store volatile i16 %[[R_16]]
745define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) {
746  %r = shl nsw i16 %a, %b
747  store volatile i16 %r, i16 addrspace(1)* undef
748  ret void
749}
750
751; GCN-LABEL: @shl_nuw_i16(
752; SI: %r = shl nuw i16 %a, %b
753; SI-NEXT: store volatile i16 %r
754; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
755; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
756; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
757; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
758; VI-NEXT: store volatile i16 %[[R_16]]
759define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) {
760  %r = shl nuw i16 %a, %b
761  store volatile i16 %r, i16 addrspace(1)* undef
762  ret void
763}
764
765; GCN-LABEL: @shl_nuw_nsw_i16(
766; SI: %r = shl nuw nsw i16 %a, %b
767; SI-NEXT: store volatile i16 %r
768; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
769; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
770; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
771; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
772; VI-NEXT: store volatile i16 %[[R_16]]
773define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) {
774  %r = shl nuw nsw i16 %a, %b
775  store volatile i16 %r, i16 addrspace(1)* undef
776  ret void
777}
778
779; GCN-LABEL: @lshr_i16(
780; SI: %r = lshr i16 %a, %b
781; SI-NEXT: store volatile i16 %r
782; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
783; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
784; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
785; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
786; VI-NEXT: store volatile i16 %[[R_16]]
787define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) {
788  %r = lshr i16 %a, %b
789  store volatile i16 %r, i16 addrspace(1)* undef
790  ret void
791}
792
793; GCN-LABEL: @lshr_exact_i16(
794; SI: %r = lshr exact i16 %a, %b
795; SI-NEXT: store volatile i16 %r
796; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
797; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
798; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
799; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
800; VI-NEXT: store volatile i16 %[[R_16]]
801define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) {
802  %r = lshr exact i16 %a, %b
803  store volatile i16 %r, i16 addrspace(1)* undef
804  ret void
805}
806
807; GCN-LABEL: @ashr_i16(
808; SI: %r = ashr i16 %a, %b
809; SI-NEXT: store volatile i16 %r
810; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
811; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
812; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
813; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
814; VI-NEXT: store volatile i16 %[[R_16]]
815define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) {
816  %r = ashr i16 %a, %b
817  store volatile i16 %r, i16 addrspace(1)* undef
818  ret void
819}
820
821; GCN-LABEL: @ashr_exact_i16(
822; SI: %r = ashr exact i16 %a, %b
823; SI-NEXT: store volatile i16 %r
824; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
825; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32
826; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
827; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
828; VI-NEXT: store volatile i16 %[[R_16]]
829define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) {
830  %r = ashr exact i16 %a, %b
831  store volatile i16 %r, i16 addrspace(1)* undef
832  ret void
833}
834
835; GCN-LABEL: @constant_lshr_exact_i16(
836; VI: store volatile i16 2
837define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) {
838  %r = lshr exact i16 4, 1
839  store volatile i16 %r, i16 addrspace(1)* undef
840  ret void
841}
842
843; GCN-LABEL: @and_i16(
844; SI: %r = and i16 %a, %b
845; SI-NEXT: store volatile i16 %r
846; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
847; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
848; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
849; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
850; VI-NEXT: store volatile i16 %[[R_16]]
851define amdgpu_kernel void @and_i16(i16 %a, i16 %b) {
852  %r = and i16 %a, %b
853  store volatile i16 %r, i16 addrspace(1)* undef
854  ret void
855}
856
857; GCN-LABEL: @or_i16(
858; SI: %r = or i16 %a, %b
859; SI-NEXT: store volatile i16 %r
860; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
861; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
862; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
863; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
864; VI-NEXT: store volatile i16 %[[R_16]]
865define amdgpu_kernel void @or_i16(i16 %a, i16 %b) {
866  %r = or i16 %a, %b
867  store volatile i16 %r, i16 addrspace(1)* undef
868  ret void
869}
870
871; GCN-LABEL: @xor_i16(
872; SI: %r = xor i16 %a, %b
873; SI-NEXT: store volatile i16 %r
874; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
875; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32
876; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
877; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
878; VI-NEXT: store volatile i16 %[[R_16]]
879define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) {
880  %r = xor i16 %a, %b
881  store volatile i16 %r, i16 addrspace(1)* undef
882  ret void
883}
884
885; GCN-LABEL: @select_eq_i16(
886; SI: %cmp = icmp eq i16 %a, %b
887; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
888; SI-NEXT: store volatile i16 %sel
889; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
890; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
891; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
892; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
893; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
894; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
895; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
896; VI-NEXT: store volatile i16 %[[SEL_16]]
897define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) {
898  %cmp = icmp eq i16 %a, %b
899  %sel = select i1 %cmp, i16 %a, i16 %b
900  store volatile i16 %sel, i16 addrspace(1)* undef
901  ret void
902}
903
904; GCN-LABEL: @select_ne_i16(
905; SI: %cmp = icmp ne i16 %a, %b
906; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
907; SI-NEXT: store volatile i16 %sel
908; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
909; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
910; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
911; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
912; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
913; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
914; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
915; VI-NEXT: store volatile i16 %[[SEL_16]]
916define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) {
917  %cmp = icmp ne i16 %a, %b
918  %sel = select i1 %cmp, i16 %a, i16 %b
919  store volatile i16 %sel, i16 addrspace(1)* undef
920  ret void
921}
922
923; GCN-LABEL: @select_ugt_i16(
924; SI: %cmp = icmp ugt i16 %a, %b
925; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
926; SI-NEXT: store volatile i16 %sel
927; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
928; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
929; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
930; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
931; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
932; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
933; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
934; VI-NEXT: store volatile i16 %[[SEL_16]]
935define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) {
936  %cmp = icmp ugt i16 %a, %b
937  %sel = select i1 %cmp, i16 %a, i16 %b
938  store volatile i16 %sel, i16 addrspace(1)* undef
939  ret void
940}
941
942; GCN-LABEL: @select_uge_i16(
943; SI: %cmp = icmp uge i16 %a, %b
944; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
945; SI-NEXT: store volatile i16 %sel
946; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
947; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
948; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
949; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
950; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
951; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
952; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
953; VI-NEXT: store volatile i16 %[[SEL_16]]
954define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) {
955  %cmp = icmp uge i16 %a, %b
956  %sel = select i1 %cmp, i16 %a, i16 %b
957  store volatile i16 %sel, i16 addrspace(1)* undef
958  ret void
959}
960
961; GCN-LABEL: @select_ult_i16(
962; SI: %cmp = icmp ult i16 %a, %b
963; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
964; SI-NEXT: store volatile i16 %sel
965; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
966; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
967; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
968; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
969; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
970; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
971; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
972; VI-NEXT: store volatile i16 %[[SEL_16]]
973define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) {
974  %cmp = icmp ult i16 %a, %b
975  %sel = select i1 %cmp, i16 %a, i16 %b
976  store volatile i16 %sel, i16 addrspace(1)* undef
977  ret void
978}
979
980; GCN-LABEL: @select_ule_i16(
981; SI: %cmp = icmp ule i16 %a, %b
982; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
983; SI-NEXT: store volatile i16 %sel
984; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
985; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
986; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
987; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
988; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
989; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
990; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
991; VI-NEXT: store volatile i16 %[[SEL_16]]
992define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) {
993  %cmp = icmp ule i16 %a, %b
994  %sel = select i1 %cmp, i16 %a, i16 %b
995  store volatile i16 %sel, i16 addrspace(1)* undef
996  ret void
997}
998
999; GCN-LABEL: @select_sgt_i16(
1000; SI: %cmp = icmp sgt i16 %a, %b
1001; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1002; SI-NEXT: store volatile i16 %sel
1003; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1004; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1005; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
1006; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1007; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1008; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1009; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1010; VI-NEXT: store volatile i16 %[[SEL_16]]
1011define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) {
1012  %cmp = icmp sgt i16 %a, %b
1013  %sel = select i1 %cmp, i16 %a, i16 %b
1014  store volatile i16 %sel, i16 addrspace(1)* undef
1015  ret void
1016}
1017
1018; GCN-LABEL: @select_sge_i16(
1019; SI: %cmp = icmp sge i16 %a, %b
1020; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1021; SI-NEXT: store volatile i16 %sel
1022; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1023; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1024; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
1025; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1026; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1027; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1028; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1029; VI-NEXT: store volatile i16 %[[SEL_16]]
1030define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) {
1031  %cmp = icmp sge i16 %a, %b
1032  %sel = select i1 %cmp, i16 %a, i16 %b
1033  store volatile i16 %sel, i16 addrspace(1)* undef
1034  ret void
1035}
1036
1037; GCN-LABEL: @select_slt_i16(
1038; SI: %cmp = icmp slt i16 %a, %b
1039; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1040; SI-NEXT: store volatile i16 %sel
1041; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1042; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1043; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
1044; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1045; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1046; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1047; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1048; VI-NEXT: store volatile i16 %[[SEL_16]]
1049define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) {
1050  %cmp = icmp slt i16 %a, %b
1051  %sel = select i1 %cmp, i16 %a, i16 %b
1052  store volatile i16 %sel, i16 addrspace(1)* undef
1053  ret void
1054}
1055
1056; GCN-LABEL: @select_sle_i16(
1057; SI: %cmp = icmp sle i16 %a, %b
1058; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b
1059; SI-NEXT: store volatile i16 %sel
1060; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
1061; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
1062; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
1063; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
1064; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
1065; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
1066; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
1067; VI-NEXT: store volatile i16 %[[SEL_16]]
1068define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) {
1069  %cmp = icmp sle i16 %a, %b
1070  %sel = select i1 %cmp, i16 %a, i16 %b
1071  store volatile i16 %sel, i16 addrspace(1)* undef
1072  ret void
1073}
1074
1075declare i16 @llvm.bitreverse.i16(i16)
1076
1077; GCN-LABEL: @bitreverse_i16(
1078; SI: %brev = call i16 @llvm.bitreverse.i16(i16 %a)
1079; SI-NEXT: store volatile i16 %brev
1080; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
1081; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]])
1082; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 16
1083; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[S_32]] to i16
1084; VI-NEXT: store volatile i16 %[[R_16]]
1085define amdgpu_kernel void @bitreverse_i16(i16 %a) {
1086  %brev = call i16 @llvm.bitreverse.i16(i16 %a)
1087  store volatile i16 %brev, i16 addrspace(1)* undef
1088  ret void
1089}
1090
1091; GCN-LABEL: @add_3xi15(
1092; SI: %r = add <3 x i15> %a, %b
1093; SI-NEXT: store volatile <3 x i15> %r
1094; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1095; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1096; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1097; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1098; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1099define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
1100  %r = add <3 x i15> %a, %b
1101  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1102  ret void
1103}
1104
1105; GCN-LABEL: @add_nsw_3xi15(
1106; SI: %r = add nsw <3 x i15> %a, %b
1107; SI-NEXT: store volatile <3 x i15> %r
1108; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1109; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1110; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1111; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1112; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1113define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1114  %r = add nsw <3 x i15> %a, %b
1115  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1116  ret void
1117}
1118
1119; GCN-LABEL: @add_nuw_3xi15(
1120; SI: %r = add nuw <3 x i15> %a, %b
1121; SI-NEXT: store volatile <3 x i15> %r
1122; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1123; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1124; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1125; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1126; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1127define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1128  %r = add nuw <3 x i15> %a, %b
1129  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1130  ret void
1131}
1132
1133; GCN-LABEL: @add_nuw_nsw_3xi15(
1134; SI: %r = add nuw nsw <3 x i15> %a, %b
1135; SI-NEXT: store volatile <3 x i15> %r
1136; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1137; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1138; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1139; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1140; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1141define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1142  %r = add nuw nsw <3 x i15> %a, %b
1143  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1144  ret void
1145}
1146
1147; GCN-LABEL: @sub_3xi15(
1148; SI: %r = sub <3 x i15> %a, %b
1149; SI-NEXT: store volatile <3 x i15> %r
1150; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1151; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1152; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1153; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1154; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1155define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
1156  %r = sub <3 x i15> %a, %b
1157  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1158  ret void
1159}
1160
1161; GCN-LABEL: @sub_nsw_3xi15(
1162; SI: %r = sub nsw <3 x i15> %a, %b
1163; SI-NEXT: store volatile <3 x i15> %r
1164; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1165; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1166; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1167; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1168; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1169define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1170  %r = sub nsw <3 x i15> %a, %b
1171  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1172  ret void
1173}
1174
1175; GCN-LABEL: @sub_nuw_3xi15(
1176; SI: %r = sub nuw <3 x i15> %a, %b
1177; SI-NEXT: store volatile <3 x i15> %r
1178; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1179; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1180; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1181; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1182; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1183define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1184  %r = sub nuw <3 x i15> %a, %b
1185  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1186  ret void
1187}
1188
1189; GCN-LABEL: @sub_nuw_nsw_3xi15(
1190; SI: %r = sub nuw nsw <3 x i15> %a, %b
1191; SI-NEXT: store volatile <3 x i15> %r
1192; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1193; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1194; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1195; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1196; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1197define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1198  %r = sub nuw nsw <3 x i15> %a, %b
1199  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1200  ret void
1201}
1202
1203; GCN-LABEL: @mul_3xi15(
1204; SI: %r = mul <3 x i15> %a, %b
1205; SI-NEXT: store volatile <3 x i15> %r
1206; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1207; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1208; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1209; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1210; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1211define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
1212  %r = mul <3 x i15> %a, %b
1213  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1214  ret void
1215}
1216
1217; GCN-LABEL: @mul_nsw_3xi15(
1218; SI: %r = mul nsw <3 x i15> %a, %b
1219; SI-NEXT: store volatile <3 x i15> %r
1220; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1221; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1222; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1223; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1224; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1225define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1226  %r = mul nsw <3 x i15> %a, %b
1227  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1228  ret void
1229}
1230
1231; GCN-LABEL: @mul_nuw_3xi15(
1232; SI: %r = mul nuw <3 x i15> %a, %b
1233; SI-NEXT: store volatile <3 x i15> %r
1234; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1235; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1236; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1237; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1238; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1239define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1240  %r = mul nuw <3 x i15> %a, %b
1241  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1242  ret void
1243}
1244
1245; GCN-LABEL: @mul_nuw_nsw_3xi15(
1246; SI: %r = mul nuw nsw <3 x i15> %a, %b
1247; SI-NEXT: store volatile <3 x i15> %r
1248; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1249; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1250; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1251; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1252; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1253define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1254  %r = mul nuw nsw <3 x i15> %a, %b
1255  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1256  ret void
1257}
1258
1259; GCN-LABEL: @shl_3xi15(
1260; SI: %r = shl <3 x i15> %a, %b
1261; SI-NEXT: store volatile <3 x i15> %r
1262; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1263; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1264; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1265; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1266; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1267define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
1268  %r = shl <3 x i15> %a, %b
1269  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1270  ret void
1271}
1272
1273; GCN-LABEL: @shl_nsw_3xi15(
1274; SI: %r = shl nsw <3 x i15> %a, %b
1275; SI-NEXT: store volatile <3 x i15> %r
1276; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1277; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1278; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1279; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1280; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1281define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1282  %r = shl nsw <3 x i15> %a, %b
1283  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1284  ret void
1285}
1286
1287; GCN-LABEL: @shl_nuw_3xi15(
1288; SI: %r = shl nuw <3 x i15> %a, %b
1289; SI-NEXT: store volatile <3 x i15> %r
1290; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1291; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1292; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1293; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1294; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1295define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1296  %r = shl nuw <3 x i15> %a, %b
1297  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1298  ret void
1299}
1300
1301; GCN-LABEL: @shl_nuw_nsw_3xi15(
1302; SI: %r = shl nuw nsw <3 x i15> %a, %b
1303; SI-NEXT: store volatile <3 x i15> %r
1304; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1305; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1306; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1307; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1308; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1309define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1310  %r = shl nuw nsw <3 x i15> %a, %b
1311  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1312  ret void
1313}
1314
1315; GCN-LABEL: @lshr_3xi15(
1316; SI: %r = lshr <3 x i15> %a, %b
1317; SI-NEXT: store volatile <3 x i15> %r
1318; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1319; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1320; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
1321; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1322; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1323define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1324  %r = lshr <3 x i15> %a, %b
1325  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1326  ret void
1327}
1328
1329; GCN-LABEL: @lshr_exact_3xi15(
1330; SI: %r = lshr exact <3 x i15> %a, %b
1331; SI-NEXT: store volatile <3 x i15> %r
1332; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1333; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1334; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
1335; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1336; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1337define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1338  %r = lshr exact <3 x i15> %a, %b
1339  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1340  ret void
1341}
1342
1343; GCN-LABEL: @ashr_3xi15(
1344; SI: %r = ashr <3 x i15> %a, %b
1345; SI-NEXT: store volatile <3 x i15> %r
1346; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1347; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1348; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
1349; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1350; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1351define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1352  %r = ashr <3 x i15> %a, %b
1353  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1354  ret void
1355}
1356
1357; GCN-LABEL: @ashr_exact_3xi15(
1358; SI: %r = ashr exact <3 x i15> %a, %b
1359; SI-NEXT: store volatile <3 x i15> %r
1360; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1361; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1362; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
1363; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1364; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1365define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1366  %r = ashr exact <3 x i15> %a, %b
1367  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1368  ret void
1369}
1370
1371; GCN-LABEL: @and_3xi15(
1372; SI: %r = and <3 x i15> %a, %b
1373; SI-NEXT: store volatile <3 x i15> %r
1374; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1375; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1376; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
1377; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1378; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1379define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
1380  %r = and <3 x i15> %a, %b
1381  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1382  ret void
1383}
1384
1385; GCN-LABEL: @or_3xi15(
1386; SI: %r = or <3 x i15> %a, %b
1387; SI-NEXT: store volatile <3 x i15> %r
1388; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1389; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1390; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
1391; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1392; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1393define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
1394  %r = or <3 x i15> %a, %b
1395  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1396  ret void
1397}
1398
1399; GCN-LABEL: @xor_3xi15(
1400; SI: %r = xor <3 x i15> %a, %b
1401; SI-NEXT: store volatile <3 x i15> %r
1402; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1403; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1404; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
1405; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15>
1406; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1407define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
1408  %r = xor <3 x i15> %a, %b
1409  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1410  ret void
1411}
1412
1413; GCN-LABEL: @select_eq_3xi15(
1414; SI: %cmp = icmp eq <3 x i15> %a, %b
1415; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1416; SI-NEXT: store volatile <3 x i15> %sel
1417; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1418; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1419; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
1420; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1421; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1422; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1423; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1424; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1425define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
1426  %cmp = icmp eq <3 x i15> %a, %b
1427  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1428  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1429  ret void
1430}
1431
1432; GCN-LABEL: @select_ne_3xi15(
1433; SI: %cmp = icmp ne <3 x i15> %a, %b
1434; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1435; SI-NEXT: store volatile <3 x i15> %sel
1436; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1437; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1438; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
1439; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1440; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1441; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1442; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1443; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1444define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
1445  %cmp = icmp ne <3 x i15> %a, %b
1446  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1447  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1448  ret void
1449}
1450
1451; GCN-LABEL: @select_ugt_3xi15(
1452; SI: %cmp = icmp ugt <3 x i15> %a, %b
1453; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1454; SI-NEXT: store volatile <3 x i15> %sel
1455; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1456; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1457; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1458; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1459; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1460; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1461; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1462; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1463define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1464  %cmp = icmp ugt <3 x i15> %a, %b
1465  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1466  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1467  ret void
1468}
1469
1470; GCN-LABEL: @select_uge_3xi15(
1471; SI: %cmp = icmp uge <3 x i15> %a, %b
1472; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1473; SI-NEXT: store volatile <3 x i15> %sel
1474; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1475; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1476; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
1477; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1478; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1479; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1480; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1481; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1482define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
1483  %cmp = icmp uge <3 x i15> %a, %b
1484  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1485  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1486  ret void
1487}
1488
1489; GCN-LABEL: @select_ult_3xi15(
1490; SI: %cmp = icmp ult <3 x i15> %a, %b
1491; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1492; SI-NEXT: store volatile <3 x i15> %sel
1493; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1494; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1495; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
1496; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1497; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1498; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1499; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1500; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1501define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
1502  %cmp = icmp ult <3 x i15> %a, %b
1503  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1504  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1505  ret void
1506}
1507
1508; GCN-LABEL: @select_ule_3xi15(
1509; SI: %cmp = icmp ule <3 x i15> %a, %b
1510; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1511; SI-NEXT: store volatile <3 x i15> %sel
1512; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1513; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1514; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
1515; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1516; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32>
1517; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1518; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1519; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1520define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
1521  %cmp = icmp ule <3 x i15> %a, %b
1522  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1523  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1524  ret void
1525}
1526
1527; GCN-LABEL: @select_sgt_3xi15(
1528; SI: %cmp = icmp sgt <3 x i15> %a, %b
1529; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1530; SI-NEXT: store volatile <3 x i15> %sel
1531; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1532; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1533; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1534; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1535; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1536; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1537; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1538; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1539define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1540  %cmp = icmp sgt <3 x i15> %a, %b
1541  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1542  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1543  ret void
1544}
1545
1546; GCN-LABEL: @select_sge_3xi15(
1547; SI: %cmp = icmp sge <3 x i15> %a, %b
1548; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1549; SI-NEXT: store volatile <3 x i15> %sel
1550; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1551; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1552; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
1553; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1554; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1555; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1556; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1557; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1558define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
1559  %cmp = icmp sge <3 x i15> %a, %b
1560  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1561  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1562  ret void
1563}
1564
1565; GCN-LABEL: @select_slt_3xi15(
1566; SI: %cmp = icmp slt <3 x i15> %a, %b
1567; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1568; SI-NEXT: store volatile <3 x i15> %sel
1569; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1570; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1571; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1572; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1573; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1574; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1575; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1576; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1577define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1578  %cmp = icmp slt <3 x i15> %a, %b
1579  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1580  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1581  ret void
1582}
1583
1584; GCN-LABEL: @select_sle_3xi15(
1585; SI: %cmp = icmp sle <3 x i15> %a, %b
1586; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1587; SI-NEXT: store volatile <3 x i15> %sel
1588; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1589; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1590; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
1591; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32>
1592; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32>
1593; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1594; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15>
1595; VI-NEXT: store volatile <3 x i15> %[[SEL_15]]
1596define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
1597  %cmp = icmp sle <3 x i15> %a, %b
1598  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1599  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1600  ret void
1601}
1602
1603declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>)
1604; GCN-LABEL: @bitreverse_3xi15(
1605; SI: %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
1606; SI-NEXT: store volatile <3 x i15> %brev
1607; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32>
1608; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]])
1609; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 17, i32 17, i32 17>
1610; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i15>
1611; VI-NEXT: store volatile <3 x i15> %[[R_15]]
1612define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
1613  %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
1614  store volatile <3 x i15> %brev, <3 x i15> addrspace(1)* undef
1615  ret void
1616}
1617
1618; GCN-LABEL: @add_3xi16(
1619; SI: %r = add <3 x i16> %a, %b
1620; SI-NEXT: store volatile <3 x i16> %r
1621; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1622; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1623; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1624; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1625; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1626define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
1627  %r = add <3 x i16> %a, %b
1628  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1629  ret void
1630}
1631
1632; GCN-LABEL: @add_nsw_3xi16(
1633; SI: %r = add nsw <3 x i16> %a, %b
1634; SI-NEXT: store volatile <3 x i16> %r
1635; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1636; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1637; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1638; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1639; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1640define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1641  %r = add nsw <3 x i16> %a, %b
1642  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1643  ret void
1644}
1645
1646; GCN-LABEL: @add_nuw_3xi16(
1647; SI: %r = add nuw <3 x i16> %a, %b
1648; SI-NEXT: store volatile <3 x i16> %r
1649; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1650; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1651; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1652; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1653; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1654define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1655  %r = add nuw <3 x i16> %a, %b
1656  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1657  ret void
1658}
1659
1660; GCN-LABEL: @add_nuw_nsw_3xi16(
1661; SI: %r = add nuw nsw <3 x i16> %a, %b
1662; SI-NEXT: store volatile <3 x i16> %r
1663; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1664; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1665; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1666; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1667; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1668define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1669  %r = add nuw nsw <3 x i16> %a, %b
1670  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1671  ret void
1672}
1673
1674; GCN-LABEL: @sub_3xi16(
1675; SI: %r = sub <3 x i16> %a, %b
1676; SI-NEXT: store volatile <3 x i16> %r
1677; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1678; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1679; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1680; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1681; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1682define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
1683  %r = sub <3 x i16> %a, %b
1684  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1685  ret void
1686}
1687
1688; GCN-LABEL: @sub_nsw_3xi16(
1689; SI: %r = sub nsw <3 x i16> %a, %b
1690; SI-NEXT: store volatile <3 x i16> %r
1691; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1692; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1693; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
1694; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1695; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1696define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1697  %r = sub nsw <3 x i16> %a, %b
1698  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1699  ret void
1700}
1701
1702; GCN-LABEL: @sub_nuw_3xi16(
1703; SI: %r = sub nuw <3 x i16> %a, %b
1704; SI-NEXT: store volatile <3 x i16> %r
1705; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1706; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1707; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1708; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1709; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1710define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1711  %r = sub nuw <3 x i16> %a, %b
1712  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1713  ret void
1714}
1715
1716; GCN-LABEL: @sub_nuw_nsw_3xi16(
1717; SI: %r = sub nuw nsw <3 x i16> %a, %b
1718; SI-NEXT: store volatile <3 x i16> %r
1719; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1720; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1721; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1722; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1723; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1724define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1725  %r = sub nuw nsw <3 x i16> %a, %b
1726  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1727  ret void
1728}
1729
1730; GCN-LABEL: @mul_3xi16(
1731; SI: %r = mul <3 x i16> %a, %b
1732; SI-NEXT: store volatile <3 x i16> %r
1733; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1734; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1735; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1736; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1737; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1738define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
1739  %r = mul <3 x i16> %a, %b
1740  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1741  ret void
1742}
1743
1744; GCN-LABEL: @mul_nsw_3xi16(
1745; SI: %r = mul nsw <3 x i16> %a, %b
1746; SI-NEXT: store volatile <3 x i16> %r
1747; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1748; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1749; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
1750; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1751; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1752define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1753  %r = mul nsw <3 x i16> %a, %b
1754  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1755  ret void
1756}
1757
1758; GCN-LABEL: @mul_nuw_3xi16(
1759; SI: %r = mul nuw <3 x i16> %a, %b
1760; SI-NEXT: store volatile <3 x i16> %r
1761; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1762; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1763; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1764; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1765; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1766define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1767  %r = mul nuw <3 x i16> %a, %b
1768  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1769  ret void
1770}
1771
1772; GCN-LABEL: @mul_nuw_nsw_3xi16(
1773; SI: %r = mul nuw nsw <3 x i16> %a, %b
1774; SI-NEXT: store volatile <3 x i16> %r
1775; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1776; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1777; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1778; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1779; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1780define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1781  %r = mul nuw nsw <3 x i16> %a, %b
1782  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1783  ret void
1784}
1785
1786; GCN-LABEL: @shl_3xi16(
1787; SI: %r = shl <3 x i16> %a, %b
1788; SI-NEXT: store volatile <3 x i16> %r
1789; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1790; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1791; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1792; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1793; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1794define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
1795  %r = shl <3 x i16> %a, %b
1796  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1797  ret void
1798}
1799
1800; GCN-LABEL: @shl_nsw_3xi16(
1801; SI: %r = shl nsw <3 x i16> %a, %b
1802; SI-NEXT: store volatile <3 x i16> %r
1803; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1804; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1805; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1806; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1807; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1808define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1809  %r = shl nsw <3 x i16> %a, %b
1810  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1811  ret void
1812}
1813
1814; GCN-LABEL: @shl_nuw_3xi16(
1815; SI: %r = shl nuw <3 x i16> %a, %b
1816; SI-NEXT: store volatile <3 x i16> %r
1817; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1818; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1819; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1820; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1821; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1822define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1823  %r = shl nuw <3 x i16> %a, %b
1824  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1825  ret void
1826}
1827
1828; GCN-LABEL: @shl_nuw_nsw_3xi16(
1829; SI: %r = shl nuw nsw <3 x i16> %a, %b
1830; SI-NEXT: store volatile <3 x i16> %r
1831; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1832; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1833; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
1834; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1835; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1836define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
1837  %r = shl nuw nsw <3 x i16> %a, %b
1838  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1839  ret void
1840}
1841
1842; GCN-LABEL: @lshr_3xi16(
1843; SI: %r = lshr <3 x i16> %a, %b
1844; SI-NEXT: store volatile <3 x i16> %r
1845; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1846; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1847; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
1848; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1849; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1850define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
1851  %r = lshr <3 x i16> %a, %b
1852  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1853  ret void
1854}
1855
1856; GCN-LABEL: @lshr_exact_3xi16(
1857; SI: %r = lshr exact <3 x i16> %a, %b
1858; SI-NEXT: store volatile <3 x i16> %r
1859; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1860; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1861; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
1862; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1863; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1864define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
1865  %r = lshr exact <3 x i16> %a, %b
1866  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1867  ret void
1868}
1869
1870; GCN-LABEL: @ashr_3xi16(
1871; SI: %r = ashr <3 x i16> %a, %b
1872; SI-NEXT: store volatile <3 x i16> %r
1873; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
1874; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
1875; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
1876; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1877; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1878define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
1879  %r = ashr <3 x i16> %a, %b
1880  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1881  ret void
1882}
1883
1884; GCN-LABEL: @ashr_exact_3xi16(
1885; SI: %r = ashr exact <3 x i16> %a, %b
1886; SI-NEXT: store volatile <3 x i16> %r
1887; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
1888; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
1889; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
1890; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1891; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1892define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
1893  %r = ashr exact <3 x i16> %a, %b
1894  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1895  ret void
1896}
1897
1898; GCN-LABEL: @and_3xi16(
1899; SI: %r = and <3 x i16> %a, %b
1900; SI-NEXT: store volatile <3 x i16> %r
1901; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1902; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1903; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
1904; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1905; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1906define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
1907  %r = and <3 x i16> %a, %b
1908  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1909  ret void
1910}
1911
1912; GCN-LABEL: @or_3xi16(
1913; SI: %r = or <3 x i16> %a, %b
1914; SI-NEXT: store volatile <3 x i16> %r
1915; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1916; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1917; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
1918; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1919; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1920define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
1921  %r = or <3 x i16> %a, %b
1922  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1923  ret void
1924}
1925
1926; GCN-LABEL: @xor_3xi16(
1927; SI: %r = xor <3 x i16> %a, %b
1928; SI-NEXT: store volatile <3 x i16> %r
1929; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1930; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1931; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
1932; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
1933; VI-NEXT: store volatile <3 x i16> %[[R_16]]
1934define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
1935  %r = xor <3 x i16> %a, %b
1936  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
1937  ret void
1938}
1939
1940; GCN-LABEL: @select_eq_3xi16(
1941; SI: %cmp = icmp eq <3 x i16> %a, %b
1942; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1943; SI-NEXT: store volatile <3 x i16> %sel
1944; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1945; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1946; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
1947; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1948; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1949; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1950; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
1951; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
1952define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
1953  %cmp = icmp eq <3 x i16> %a, %b
1954  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1955  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
1956  ret void
1957}
1958
1959; GCN-LABEL: @select_ne_3xi16(
1960; SI: %cmp = icmp ne <3 x i16> %a, %b
1961; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1962; SI-NEXT: store volatile <3 x i16> %sel
1963; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1964; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1965; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
1966; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1967; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1968; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1969; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
1970; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
1971define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
1972  %cmp = icmp ne <3 x i16> %a, %b
1973  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1974  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
1975  ret void
1976}
1977
1978; GCN-LABEL: @select_ugt_3xi16(
1979; SI: %cmp = icmp ugt <3 x i16> %a, %b
1980; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1981; SI-NEXT: store volatile <3 x i16> %sel
1982; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1983; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1984; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
1985; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
1986; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
1987; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
1988; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
1989; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
1990define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
1991  %cmp = icmp ugt <3 x i16> %a, %b
1992  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
1993  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
1994  ret void
1995}
1996
1997; GCN-LABEL: @select_uge_3xi16(
1998; SI: %cmp = icmp uge <3 x i16> %a, %b
1999; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2000; SI-NEXT: store volatile <3 x i16> %sel
2001; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2002; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2003; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
2004; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2005; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2006; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2007; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2008; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2009define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2010  %cmp = icmp uge <3 x i16> %a, %b
2011  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2012  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2013  ret void
2014}
2015
2016; GCN-LABEL: @select_ult_3xi16(
2017; SI: %cmp = icmp ult <3 x i16> %a, %b
2018; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2019; SI-NEXT: store volatile <3 x i16> %sel
2020; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2021; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2022; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
2023; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2024; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2025; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2026; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2027; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2028define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
2029  %cmp = icmp ult <3 x i16> %a, %b
2030  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2031  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2032  ret void
2033}
2034
2035; GCN-LABEL: @select_ule_3xi16(
2036; SI: %cmp = icmp ule <3 x i16> %a, %b
2037; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2038; SI-NEXT: store volatile <3 x i16> %sel
2039; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2040; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2041; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
2042; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2043; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
2044; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2045; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2046; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2047define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
2048  %cmp = icmp ule <3 x i16> %a, %b
2049  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2050  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2051  ret void
2052}
2053
2054; GCN-LABEL: @select_sgt_3xi16(
2055; SI: %cmp = icmp sgt <3 x i16> %a, %b
2056; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2057; SI-NEXT: store volatile <3 x i16> %sel
2058; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2059; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2060; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
2061; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2062; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2063; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2064; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2065; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2066define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2067  %cmp = icmp sgt <3 x i16> %a, %b
2068  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2069  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2070  ret void
2071}
2072
2073; GCN-LABEL: @select_sge_3xi16(
2074; SI: %cmp = icmp sge <3 x i16> %a, %b
2075; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2076; SI-NEXT: store volatile <3 x i16> %sel
2077; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2078; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2079; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
2080; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2081; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2082; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2083; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2084; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2085define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2086  %cmp = icmp sge <3 x i16> %a, %b
2087  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2088  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2089  ret void
2090}
2091
2092; GCN-LABEL: @select_slt_3xi16(
2093; SI: %cmp = icmp slt <3 x i16> %a, %b
2094; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2095; SI-NEXT: store volatile <3 x i16> %sel
2096; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2097; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2098; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
2099; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2100; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2101; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2102; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2103; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2104define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2105  %cmp = icmp slt <3 x i16> %a, %b
2106  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2107  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2108  ret void
2109}
2110
2111; GCN-LABEL: @select_sle_3xi16(
2112; SI: %cmp = icmp sle <3 x i16> %a, %b
2113; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2114; SI-NEXT: store volatile <3 x i16> %sel
2115; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2116; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2117; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
2118; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
2119; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
2120; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
2121; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
2122; VI-NEXT: store volatile <3 x i16> %[[SEL_16]]
2123define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
2124  %cmp = icmp sle <3 x i16> %a, %b
2125  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2126  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2127  ret void
2128}
2129
2130declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
2131
2132; GCN-LABEL: @bitreverse_3xi16(
2133; SI: %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
2134; SI-NEXT: store volatile <3 x i16> %brev
2135; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
2136; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]])
2137; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 16, i32 16, i32 16>
2138; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i16>
2139; VI-NEXT: store volatile <3 x i16> %[[R_16]]
2140define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
2141  %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
2142  store volatile <3 x i16> %brev, <3 x i16> addrspace(1)* undef
2143  ret void
2144}
2145