• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5
6define i8 @v_lshr_i8(i8 %value, i8 %amount) {
7; GFX6-LABEL: v_lshr_i8:
8; GFX6:       ; %bb.0:
9; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX6-NEXT:    s_movk_i32 s4, 0xff
11; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
12; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
13; GFX6-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
14; GFX6-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX8-LABEL: v_lshr_i8:
17; GFX8:       ; %bb.0:
18; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
20; GFX8-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX9-LABEL: v_lshr_i8:
23; GFX9:       ; %bb.0:
24; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX9-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
26; GFX9-NEXT:    s_setpc_b64 s[30:31]
27  %result = lshr i8 %value, %amount
28  ret i8 %result
29}
30
31define i8 @v_lshr_i8_7(i8 %value) {
32; GFX6-LABEL: v_lshr_i8_7:
33; GFX6:       ; %bb.0:
34; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
36; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 7, v0
37; GFX6-NEXT:    s_setpc_b64 s[30:31]
38;
39; GFX8-LABEL: v_lshr_i8_7:
40; GFX8:       ; %bb.0:
41; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; GFX8-NEXT:    v_mov_b32_e32 v1, 7
43; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
44; GFX8-NEXT:    s_setpc_b64 s[30:31]
45;
46; GFX9-LABEL: v_lshr_i8_7:
47; GFX9:       ; %bb.0:
48; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX9-NEXT:    v_mov_b32_e32 v1, 7
50; GFX9-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
51; GFX9-NEXT:    s_setpc_b64 s[30:31]
52  %result = lshr i8 %value, 7
53  ret i8 %result
54}
55
56define amdgpu_ps i8 @s_lshr_i8(i8 inreg %value, i8 inreg %amount) {
57; GFX6-LABEL: s_lshr_i8:
58; GFX6:       ; %bb.0:
59; GFX6-NEXT:    s_movk_i32 s2, 0xff
60; GFX6-NEXT:    s_and_b32 s1, s1, s2
61; GFX6-NEXT:    s_and_b32 s0, s0, s2
62; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
63; GFX6-NEXT:    ; return to shader part epilog
64;
65; GFX8-LABEL: s_lshr_i8:
66; GFX8:       ; %bb.0:
67; GFX8-NEXT:    s_movk_i32 s2, 0xff
68; GFX8-NEXT:    s_and_b32 s0, s0, s2
69; GFX8-NEXT:    s_and_b32 s1, s1, s2
70; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
71; GFX8-NEXT:    ; return to shader part epilog
72;
73; GFX9-LABEL: s_lshr_i8:
74; GFX9:       ; %bb.0:
75; GFX9-NEXT:    s_movk_i32 s2, 0xff
76; GFX9-NEXT:    s_and_b32 s0, s0, s2
77; GFX9-NEXT:    s_and_b32 s1, s1, s2
78; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
79; GFX9-NEXT:    ; return to shader part epilog
80  %result = lshr i8 %value, %amount
81  ret i8 %result
82}
83
84define amdgpu_ps i8 @s_lshr_i8_7(i8 inreg %value) {
85; GCN-LABEL: s_lshr_i8_7:
86; GCN:       ; %bb.0:
87; GCN-NEXT:    s_and_b32 s0, s0, 0xff
88; GCN-NEXT:    s_lshr_b32 s0, s0, 7
89; GCN-NEXT:    ; return to shader part epilog
90  %result = lshr i8 %value, 7
91  ret i8 %result
92}
93
94
95define i24 @v_lshr_i24(i24 %value, i24 %amount) {
96; GCN-LABEL: v_lshr_i24:
97; GCN:       ; %bb.0:
98; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GCN-NEXT:    s_mov_b32 s4, 0xffffff
100; GCN-NEXT:    v_and_b32_e32 v1, s4, v1
101; GCN-NEXT:    v_and_b32_e32 v0, s4, v0
102; GCN-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
103; GCN-NEXT:    s_setpc_b64 s[30:31]
104  %result = lshr i24 %value, %amount
105  ret i24 %result
106}
107
108define i24 @v_lshr_i24_7(i24 %value) {
109; GCN-LABEL: v_lshr_i24_7:
110; GCN:       ; %bb.0:
111; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GCN-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
113; GCN-NEXT:    v_lshrrev_b32_e32 v0, 7, v0
114; GCN-NEXT:    s_setpc_b64 s[30:31]
115  %result = lshr i24 %value, 7
116  ret i24 %result
117}
118
119define amdgpu_ps i24 @s_lshr_i24(i24 inreg %value, i24 inreg %amount) {
120; GCN-LABEL: s_lshr_i24:
121; GCN:       ; %bb.0:
122; GCN-NEXT:    s_mov_b32 s2, 0xffffff
123; GCN-NEXT:    s_and_b32 s1, s1, s2
124; GCN-NEXT:    s_and_b32 s0, s0, s2
125; GCN-NEXT:    s_lshr_b32 s0, s0, s1
126; GCN-NEXT:    ; return to shader part epilog
127  %result = lshr i24 %value, %amount
128  ret i24 %result
129}
130
131define amdgpu_ps i24 @s_lshr_i24_7(i24 inreg %value) {
132; GCN-LABEL: s_lshr_i24_7:
133; GCN:       ; %bb.0:
134; GCN-NEXT:    s_and_b32 s0, s0, 0xffffff
135; GCN-NEXT:    s_lshr_b32 s0, s0, 7
136; GCN-NEXT:    ; return to shader part epilog
137  %result = lshr i24 %value, 7
138  ret i24 %result
139}
140
141define i32 @v_lshr_i32(i32 %value, i32 %amount) {
142; GCN-LABEL: v_lshr_i32:
143; GCN:       ; %bb.0:
144; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GCN-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
146; GCN-NEXT:    s_setpc_b64 s[30:31]
147  %result = lshr i32 %value, %amount
148  ret i32 %result
149}
150
151define i32 @v_lshr_i32_31(i32 %value) {
152; GCN-LABEL: v_lshr_i32_31:
153; GCN:       ; %bb.0:
154; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GCN-NEXT:    v_lshrrev_b32_e32 v0, 31, v0
156; GCN-NEXT:    s_setpc_b64 s[30:31]
157  %result = lshr i32 %value, 31
158  ret i32 %result
159}
160
161define amdgpu_ps i32 @s_lshr_i32(i32 inreg %value, i32 inreg %amount) {
162; GCN-LABEL: s_lshr_i32:
163; GCN:       ; %bb.0:
164; GCN-NEXT:    s_lshr_b32 s0, s0, s1
165; GCN-NEXT:    ; return to shader part epilog
166  %result = lshr i32 %value, %amount
167  ret i32 %result
168}
169
170define amdgpu_ps i32 @s_lshr_i32_31(i32 inreg %value) {
171; GCN-LABEL: s_lshr_i32_31:
172; GCN:       ; %bb.0:
173; GCN-NEXT:    s_lshr_b32 s0, s0, 31
174; GCN-NEXT:    ; return to shader part epilog
175  %result = lshr i32 %value, 31
176  ret i32 %result
177}
178
179define amdgpu_ps float @lshr_i32_sv(i32 inreg %value, i32 %amount) {
180; GFX6-LABEL: lshr_i32_sv:
181; GFX6:       ; %bb.0:
182; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
183; GFX6-NEXT:    ; return to shader part epilog
184;
185; GFX8-LABEL: lshr_i32_sv:
186; GFX8:       ; %bb.0:
187; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
188; GFX8-NEXT:    ; return to shader part epilog
189;
190; GFX9-LABEL: lshr_i32_sv:
191; GFX9:       ; %bb.0:
192; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
193; GFX9-NEXT:    ; return to shader part epilog
194  %result = lshr i32 %value, %amount
195  %cast = bitcast i32 %result to float
196  ret float %cast
197}
198
199define amdgpu_ps float @lshr_i32_vs(i32 %value, i32 inreg %amount) {
200; GCN-LABEL: lshr_i32_vs:
201; GCN:       ; %bb.0:
202; GCN-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
203; GCN-NEXT:    ; return to shader part epilog
204  %result = lshr i32 %value, %amount
205  %cast = bitcast i32 %result to float
206  ret float %cast
207}
208
209define <2 x i32> @v_lshr_v2i32(<2 x i32> %value, <2 x i32> %amount) {
210; GCN-LABEL: v_lshr_v2i32:
211; GCN:       ; %bb.0:
212; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213; GCN-NEXT:    v_lshrrev_b32_e32 v0, v2, v0
214; GCN-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
215; GCN-NEXT:    s_setpc_b64 s[30:31]
216  %result = lshr <2 x i32> %value, %amount
217  ret <2 x i32> %result
218}
219
220define <2 x i32> @v_lshr_v2i32_31(<2 x i32> %value) {
221; GCN-LABEL: v_lshr_v2i32_31:
222; GCN:       ; %bb.0:
223; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224; GCN-NEXT:    v_lshrrev_b32_e32 v0, 31, v0
225; GCN-NEXT:    v_lshrrev_b32_e32 v1, 31, v1
226; GCN-NEXT:    s_setpc_b64 s[30:31]
227  %result = lshr <2 x i32> %value, <i32 31, i32 31>
228  ret <2 x i32> %result
229}
230
231define amdgpu_ps <2 x i32> @s_lshr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
232; GCN-LABEL: s_lshr_v2i32:
233; GCN:       ; %bb.0:
234; GCN-NEXT:    s_lshr_b32 s0, s0, s2
235; GCN-NEXT:    s_lshr_b32 s1, s1, s3
236; GCN-NEXT:    ; return to shader part epilog
237  %result = lshr <2 x i32> %value, %amount
238  ret <2 x i32> %result
239}
240
241define <3 x i32> @v_lshr_v3i32(<3 x i32> %value, <3 x i32> %amount) {
242; GCN-LABEL: v_lshr_v3i32:
243; GCN:       ; %bb.0:
244; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GCN-NEXT:    v_lshrrev_b32_e32 v0, v3, v0
246; GCN-NEXT:    v_lshrrev_b32_e32 v1, v4, v1
247; GCN-NEXT:    v_lshrrev_b32_e32 v2, v5, v2
248; GCN-NEXT:    s_setpc_b64 s[30:31]
249  %result = lshr <3 x i32> %value, %amount
250  ret <3 x i32> %result
251}
252
253define amdgpu_ps <3 x i32> @s_lshr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
254; GCN-LABEL: s_lshr_v3i32:
255; GCN:       ; %bb.0:
256; GCN-NEXT:    s_lshr_b32 s0, s0, s3
257; GCN-NEXT:    s_lshr_b32 s1, s1, s4
258; GCN-NEXT:    s_lshr_b32 s2, s2, s5
259; GCN-NEXT:    ; return to shader part epilog
260  %result = lshr <3 x i32> %value, %amount
261  ret <3 x i32> %result
262}
263
264define <4 x i32> @v_lshr_v4i32(<4 x i32> %value, <4 x i32> %amount) {
265; GCN-LABEL: v_lshr_v4i32:
266; GCN:       ; %bb.0:
267; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GCN-NEXT:    v_lshrrev_b32_e32 v0, v4, v0
269; GCN-NEXT:    v_lshrrev_b32_e32 v1, v5, v1
270; GCN-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
271; GCN-NEXT:    v_lshrrev_b32_e32 v3, v7, v3
272; GCN-NEXT:    s_setpc_b64 s[30:31]
273  %result = lshr <4 x i32> %value, %amount
274  ret <4 x i32> %result
275}
276
277define amdgpu_ps <4 x i32> @s_lshr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
278; GCN-LABEL: s_lshr_v4i32:
279; GCN:       ; %bb.0:
280; GCN-NEXT:    s_lshr_b32 s0, s0, s4
281; GCN-NEXT:    s_lshr_b32 s1, s1, s5
282; GCN-NEXT:    s_lshr_b32 s2, s2, s6
283; GCN-NEXT:    s_lshr_b32 s3, s3, s7
284; GCN-NEXT:    ; return to shader part epilog
285  %result = lshr <4 x i32> %value, %amount
286  ret <4 x i32> %result
287}
288
289define <5 x i32> @v_lshr_v5i32(<5 x i32> %value, <5 x i32> %amount) {
290; GCN-LABEL: v_lshr_v5i32:
291; GCN:       ; %bb.0:
292; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; GCN-NEXT:    v_lshrrev_b32_e32 v0, v5, v0
294; GCN-NEXT:    v_lshrrev_b32_e32 v1, v6, v1
295; GCN-NEXT:    v_lshrrev_b32_e32 v2, v7, v2
296; GCN-NEXT:    v_lshrrev_b32_e32 v3, v8, v3
297; GCN-NEXT:    v_lshrrev_b32_e32 v4, v9, v4
298; GCN-NEXT:    s_setpc_b64 s[30:31]
299  %result = lshr <5 x i32> %value, %amount
300  ret <5 x i32> %result
301}
302
303define amdgpu_ps <5 x i32> @s_lshr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
304; GCN-LABEL: s_lshr_v5i32:
305; GCN:       ; %bb.0:
306; GCN-NEXT:    s_lshr_b32 s0, s0, s5
307; GCN-NEXT:    s_lshr_b32 s1, s1, s6
308; GCN-NEXT:    s_lshr_b32 s2, s2, s7
309; GCN-NEXT:    s_lshr_b32 s3, s3, s8
310; GCN-NEXT:    s_lshr_b32 s4, s4, s9
311; GCN-NEXT:    ; return to shader part epilog
312  %result = lshr <5 x i32> %value, %amount
313  ret <5 x i32> %result
314}
315
316define <16 x i32> @v_lshr_v16i32(<16 x i32> %value, <16 x i32> %amount) {
317; GCN-LABEL: v_lshr_v16i32:
318; GCN:       ; %bb.0:
319; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
320; GCN-NEXT:    v_lshrrev_b32_e32 v0, v16, v0
321; GCN-NEXT:    v_lshrrev_b32_e32 v1, v17, v1
322; GCN-NEXT:    v_lshrrev_b32_e32 v2, v18, v2
323; GCN-NEXT:    v_lshrrev_b32_e32 v3, v19, v3
324; GCN-NEXT:    v_lshrrev_b32_e32 v4, v20, v4
325; GCN-NEXT:    v_lshrrev_b32_e32 v5, v21, v5
326; GCN-NEXT:    v_lshrrev_b32_e32 v6, v22, v6
327; GCN-NEXT:    v_lshrrev_b32_e32 v7, v23, v7
328; GCN-NEXT:    v_lshrrev_b32_e32 v8, v24, v8
329; GCN-NEXT:    v_lshrrev_b32_e32 v9, v25, v9
330; GCN-NEXT:    v_lshrrev_b32_e32 v10, v26, v10
331; GCN-NEXT:    v_lshrrev_b32_e32 v11, v27, v11
332; GCN-NEXT:    v_lshrrev_b32_e32 v12, v28, v12
333; GCN-NEXT:    v_lshrrev_b32_e32 v13, v29, v13
334; GCN-NEXT:    v_lshrrev_b32_e32 v14, v30, v14
335; GCN-NEXT:    v_lshrrev_b32_e32 v15, v31, v15
336; GCN-NEXT:    s_setpc_b64 s[30:31]
337  %result = lshr <16 x i32> %value, %amount
338  ret <16 x i32> %result
339}
340
341define amdgpu_ps <16 x i32> @s_lshr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
342; GCN-LABEL: s_lshr_v16i32:
343; GCN:       ; %bb.0:
344; GCN-NEXT:    s_lshr_b32 s0, s0, s16
345; GCN-NEXT:    s_lshr_b32 s1, s1, s17
346; GCN-NEXT:    s_lshr_b32 s2, s2, s18
347; GCN-NEXT:    s_lshr_b32 s3, s3, s19
348; GCN-NEXT:    s_lshr_b32 s4, s4, s20
349; GCN-NEXT:    s_lshr_b32 s5, s5, s21
350; GCN-NEXT:    s_lshr_b32 s6, s6, s22
351; GCN-NEXT:    s_lshr_b32 s7, s7, s23
352; GCN-NEXT:    s_lshr_b32 s8, s8, s24
353; GCN-NEXT:    s_lshr_b32 s9, s9, s25
354; GCN-NEXT:    s_lshr_b32 s10, s10, s26
355; GCN-NEXT:    s_lshr_b32 s11, s11, s27
356; GCN-NEXT:    s_lshr_b32 s12, s12, s28
357; GCN-NEXT:    s_lshr_b32 s13, s13, s29
358; GCN-NEXT:    s_lshr_b32 s14, s14, s30
359; GCN-NEXT:    s_lshr_b32 s15, s15, s31
360; GCN-NEXT:    ; return to shader part epilog
361  %result = lshr <16 x i32> %value, %amount
362  ret <16 x i32> %result
363}
364
365define i16 @v_lshr_i16(i16 %value, i16 %amount) {
366; GFX6-LABEL: v_lshr_i16:
367; GFX6:       ; %bb.0:
368; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369; GFX6-NEXT:    s_mov_b32 s4, 0xffff
370; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
371; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
372; GFX6-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
373; GFX6-NEXT:    s_setpc_b64 s[30:31]
374;
375; GFX8-LABEL: v_lshr_i16:
376; GFX8:       ; %bb.0:
377; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378; GFX8-NEXT:    v_lshrrev_b16_e32 v0, v1, v0
379; GFX8-NEXT:    s_setpc_b64 s[30:31]
380;
381; GFX9-LABEL: v_lshr_i16:
382; GFX9:       ; %bb.0:
383; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GFX9-NEXT:    v_lshrrev_b16_e32 v0, v1, v0
385; GFX9-NEXT:    s_setpc_b64 s[30:31]
386  %result = lshr i16 %value, %amount
387  ret i16 %result
388}
389
390define i16 @v_lshr_i16_31(i16 %value) {
391; GCN-LABEL: v_lshr_i16_31:
392; GCN:       ; %bb.0:
393; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394; GCN-NEXT:    s_setpc_b64 s[30:31]
395  %result = lshr i16 %value, 31
396  ret i16 %result
397}
398
399define amdgpu_ps i16 @s_lshr_i16(i16 inreg %value, i16 inreg %amount) {
400; GFX6-LABEL: s_lshr_i16:
401; GFX6:       ; %bb.0:
402; GFX6-NEXT:    s_mov_b32 s2, 0xffff
403; GFX6-NEXT:    s_and_b32 s1, s1, s2
404; GFX6-NEXT:    s_and_b32 s0, s0, s2
405; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
406; GFX6-NEXT:    ; return to shader part epilog
407;
408; GFX8-LABEL: s_lshr_i16:
409; GFX8:       ; %bb.0:
410; GFX8-NEXT:    s_mov_b32 s2, 0xffff
411; GFX8-NEXT:    s_and_b32 s0, s0, s2
412; GFX8-NEXT:    s_and_b32 s1, s1, s2
413; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
414; GFX8-NEXT:    ; return to shader part epilog
415;
416; GFX9-LABEL: s_lshr_i16:
417; GFX9:       ; %bb.0:
418; GFX9-NEXT:    s_mov_b32 s2, 0xffff
419; GFX9-NEXT:    s_and_b32 s0, s0, s2
420; GFX9-NEXT:    s_and_b32 s1, s1, s2
421; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
422; GFX9-NEXT:    ; return to shader part epilog
423  %result = lshr i16 %value, %amount
424  ret i16 %result
425}
426
427define amdgpu_ps i16 @s_lshr_i16_15(i16 inreg %value) {
428; GCN-LABEL: s_lshr_i16_15:
429; GCN:       ; %bb.0:
430; GCN-NEXT:    s_and_b32 s0, s0, 0xffff
431; GCN-NEXT:    s_lshr_b32 s0, s0, 15
432; GCN-NEXT:    ; return to shader part epilog
433  %result = lshr i16 %value, 15
434  ret i16 %result
435}
436
437define amdgpu_ps half @lshr_i16_sv(i16 inreg %value, i16 %amount) {
438; GFX6-LABEL: lshr_i16_sv:
439; GFX6:       ; %bb.0:
440; GFX6-NEXT:    s_mov_b32 s1, 0xffff
441; GFX6-NEXT:    v_and_b32_e32 v0, s1, v0
442; GFX6-NEXT:    s_and_b32 s0, s0, s1
443; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
444; GFX6-NEXT:    ; return to shader part epilog
445;
446; GFX8-LABEL: lshr_i16_sv:
447; GFX8:       ; %bb.0:
448; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
449; GFX8-NEXT:    ; return to shader part epilog
450;
451; GFX9-LABEL: lshr_i16_sv:
452; GFX9:       ; %bb.0:
453; GFX9-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
454; GFX9-NEXT:    ; return to shader part epilog
455  %result = lshr i16 %value, %amount
456  %cast = bitcast i16 %result to half
457  ret half %cast
458}
459
460define amdgpu_ps half @lshr_i16_vs(i16 %value, i16 inreg %amount) {
461; GFX6-LABEL: lshr_i16_vs:
462; GFX6:       ; %bb.0:
463; GFX6-NEXT:    s_mov_b32 s1, 0xffff
464; GFX6-NEXT:    s_and_b32 s0, s0, s1
465; GFX6-NEXT:    v_and_b32_e32 v0, s1, v0
466; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
467; GFX6-NEXT:    ; return to shader part epilog
468;
469; GFX8-LABEL: lshr_i16_vs:
470; GFX8:       ; %bb.0:
471; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s0, v0
472; GFX8-NEXT:    ; return to shader part epilog
473;
474; GFX9-LABEL: lshr_i16_vs:
475; GFX9:       ; %bb.0:
476; GFX9-NEXT:    v_lshrrev_b16_e32 v0, s0, v0
477; GFX9-NEXT:    ; return to shader part epilog
478  %result = lshr i16 %value, %amount
479  %cast = bitcast i16 %result to half
480  ret half %cast
481}
482
483define <2 x i16> @v_lshr_v2i16(<2 x i16> %value, <2 x i16> %amount) {
484; GFX6-LABEL: v_lshr_v2i16:
485; GFX6:       ; %bb.0:
486; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487; GFX6-NEXT:    s_mov_b32 s4, 0xffff
488; GFX6-NEXT:    v_and_b32_e32 v2, s4, v2
489; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
490; GFX6-NEXT:    v_lshrrev_b32_e32 v0, v2, v0
491; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
492; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
493; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
494; GFX6-NEXT:    s_setpc_b64 s[30:31]
495;
496; GFX8-LABEL: v_lshr_v2i16:
497; GFX8:       ; %bb.0:
498; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v1, v0
500; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
501; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
502; GFX8-NEXT:    s_setpc_b64 s[30:31]
503;
504; GFX9-LABEL: v_lshr_v2i16:
505; GFX9:       ; %bb.0:
506; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v1, v0
508; GFX9-NEXT:    s_setpc_b64 s[30:31]
509  %result = lshr <2 x i16> %value, %amount
510  ret <2 x i16> %result
511}
512
513define <2 x i16> @v_lshr_v2i16_15(<2 x i16> %value) {
514; GFX6-LABEL: v_lshr_v2i16_15:
515; GFX6:       ; %bb.0:
516; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517; GFX6-NEXT:    s_mov_b32 s4, 0xffff
518; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
519; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
520; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 15, v0
521; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 15, v1
522; GFX6-NEXT:    s_setpc_b64 s[30:31]
523;
524; GFX8-LABEL: v_lshr_v2i16_15:
525; GFX8:       ; %bb.0:
526; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527; GFX8-NEXT:    v_mov_b32_e32 v2, 15
528; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 15, v0
529; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
530; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
531; GFX8-NEXT:    s_setpc_b64 s[30:31]
532;
533; GFX9-LABEL: v_lshr_v2i16_15:
534; GFX9:       ; %bb.0:
535; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; GFX9-NEXT:    v_pk_lshrrev_b16 v0, 15, v0 op_sel_hi:[0,1]
537; GFX9-NEXT:    s_setpc_b64 s[30:31]
538  %result = lshr <2 x i16> %value, <i16 15, i16 15>
539  ret <2 x i16> %result
540}
541
542define amdgpu_ps i32 @s_lshr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
543; GFX6-LABEL: s_lshr_v2i16:
544; GFX6:       ; %bb.0:
545; GFX6-NEXT:    s_mov_b32 s4, 0xffff
546; GFX6-NEXT:    s_and_b32 s2, s2, s4
547; GFX6-NEXT:    s_and_b32 s0, s0, s4
548; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
549; GFX6-NEXT:    s_and_b32 s2, s3, s4
550; GFX6-NEXT:    s_and_b32 s1, s1, s4
551; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
552; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
553; GFX6-NEXT:    s_or_b32 s0, s0, s1
554; GFX6-NEXT:    ; return to shader part epilog
555;
556; GFX8-LABEL: s_lshr_v2i16:
557; GFX8:       ; %bb.0:
558; GFX8-NEXT:    s_mov_b32 s3, 0xffff
559; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
560; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
561; GFX8-NEXT:    s_and_b32 s0, s0, s3
562; GFX8-NEXT:    s_and_b32 s1, s1, s3
563; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
564; GFX8-NEXT:    s_lshr_b32 s1, s2, s4
565; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
566; GFX8-NEXT:    s_and_b32 s0, s0, s3
567; GFX8-NEXT:    s_or_b32 s0, s1, s0
568; GFX8-NEXT:    ; return to shader part epilog
569;
570; GFX9-LABEL: s_lshr_v2i16:
571; GFX9:       ; %bb.0:
572; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
573; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
574; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
575; GFX9-NEXT:    s_lshr_b32 s1, s2, s3
576; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
577; GFX9-NEXT:    ; return to shader part epilog
578  %result = lshr <2 x i16> %value, %amount
579  %cast = bitcast <2 x i16> %result to i32
580  ret i32 %cast
581}
582
583define amdgpu_ps float @lshr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
584; GFX6-LABEL: lshr_v2i16_sv:
585; GFX6:       ; %bb.0:
586; GFX6-NEXT:    s_mov_b32 s2, 0xffff
587; GFX6-NEXT:    v_and_b32_e32 v0, s2, v0
588; GFX6-NEXT:    s_and_b32 s0, s0, s2
589; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
590; GFX6-NEXT:    v_and_b32_e32 v1, s2, v1
591; GFX6-NEXT:    s_and_b32 s0, s1, s2
592; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
593; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
594; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
595; GFX6-NEXT:    ; return to shader part epilog
596;
597; GFX8-LABEL: lshr_v2i16_sv:
598; GFX8:       ; %bb.0:
599; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
600; GFX8-NEXT:    v_mov_b32_e32 v2, s1
601; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v0, s0
602; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
603; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
604; GFX8-NEXT:    ; return to shader part epilog
605;
606; GFX9-LABEL: lshr_v2i16_sv:
607; GFX9:       ; %bb.0:
608; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v0, s0
609; GFX9-NEXT:    ; return to shader part epilog
610  %result = lshr <2 x i16> %value, %amount
611  %cast = bitcast <2 x i16> %result to float
612  ret float %cast
613}
614
615define amdgpu_ps float @lshr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
616; GFX6-LABEL: lshr_v2i16_vs:
617; GFX6:       ; %bb.0:
618; GFX6-NEXT:    s_mov_b32 s2, 0xffff
619; GFX6-NEXT:    s_and_b32 s0, s0, s2
620; GFX6-NEXT:    v_and_b32_e32 v0, s2, v0
621; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
622; GFX6-NEXT:    s_and_b32 s0, s1, s2
623; GFX6-NEXT:    v_and_b32_e32 v1, s2, v1
624; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s0, v1
625; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
626; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
627; GFX6-NEXT:    ; return to shader part epilog
628;
629; GFX8-LABEL: lshr_v2i16_vs:
630; GFX8:       ; %bb.0:
631; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
632; GFX8-NEXT:    v_mov_b32_e32 v2, s1
633; GFX8-NEXT:    v_lshrrev_b16_e32 v1, s0, v0
634; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
635; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
636; GFX8-NEXT:    ; return to shader part epilog
637;
638; GFX9-LABEL: lshr_v2i16_vs:
639; GFX9:       ; %bb.0:
640; GFX9-NEXT:    v_pk_lshrrev_b16 v0, s0, v0
641; GFX9-NEXT:    ; return to shader part epilog
642  %result = lshr <2 x i16> %value, %amount
643  %cast = bitcast <2 x i16> %result to float
644  ret float %cast
645}
646
647; FIXME
648; define <3 x i16> @v_lshr_v3i16(<3 x i16> %value, <3 x i16> %amount) {
649;   %result = lshr <3 x i16> %value, %amount
650;   ret <3 x i16> %result
651; }
652
653; define amdgpu_ps <3 x i16> @s_lshr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
654;   %result = lshr <3 x i16> %value, %amount
655;   ret <3 x i16> %result
656; }
657
658define <2 x float> @v_lshr_v4i16(<4 x i16> %value, <4 x i16> %amount) {
659; GFX6-LABEL: v_lshr_v4i16:
660; GFX6:       ; %bb.0:
661; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
662; GFX6-NEXT:    s_mov_b32 s4, 0xffff
663; GFX6-NEXT:    v_and_b32_e32 v4, s4, v4
664; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
665; GFX6-NEXT:    v_lshrrev_b32_e32 v0, v4, v0
666; GFX6-NEXT:    v_and_b32_e32 v4, s4, v5
667; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
668; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v4, v1
669; GFX6-NEXT:    v_and_b32_e32 v4, s4, v6
670; GFX6-NEXT:    v_and_b32_e32 v2, s4, v2
671; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
672; GFX6-NEXT:    v_and_b32_e32 v4, s4, v7
673; GFX6-NEXT:    v_and_b32_e32 v3, s4, v3
674; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
675; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v4, v3
676; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
677; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
678; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
679; GFX6-NEXT:    s_setpc_b64 s[30:31]
680;
681; GFX8-LABEL: v_lshr_v4i16:
682; GFX8:       ; %bb.0:
683; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v2, v0
685; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
686; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v3, v1
687; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
688; GFX8-NEXT:    v_or_b32_e32 v0, v4, v0
689; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
690; GFX8-NEXT:    s_setpc_b64 s[30:31]
691;
692; GFX9-LABEL: v_lshr_v4i16:
693; GFX9:       ; %bb.0:
694; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v2, v0
696; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
697; GFX9-NEXT:    s_setpc_b64 s[30:31]
698  %result = lshr <4 x i16> %value, %amount
699  %cast = bitcast <4 x i16> %result to <2 x float>
700  ret <2 x float> %cast
701}
702
703define amdgpu_ps <2 x i32> @s_lshr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
704; GFX6-LABEL: s_lshr_v4i16:
705; GFX6:       ; %bb.0:
706; GFX6-NEXT:    s_mov_b32 s8, 0xffff
707; GFX6-NEXT:    s_and_b32 s4, s4, s8
708; GFX6-NEXT:    s_and_b32 s0, s0, s8
709; GFX6-NEXT:    s_lshr_b32 s0, s0, s4
710; GFX6-NEXT:    s_and_b32 s4, s5, s8
711; GFX6-NEXT:    s_and_b32 s1, s1, s8
712; GFX6-NEXT:    s_lshr_b32 s1, s1, s4
713; GFX6-NEXT:    s_and_b32 s4, s6, s8
714; GFX6-NEXT:    s_and_b32 s2, s2, s8
715; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
716; GFX6-NEXT:    s_and_b32 s4, s7, s8
717; GFX6-NEXT:    s_and_b32 s3, s3, s8
718; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
719; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
720; GFX6-NEXT:    s_or_b32 s0, s0, s1
721; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
722; GFX6-NEXT:    s_or_b32 s1, s2, s1
723; GFX6-NEXT:    ; return to shader part epilog
724;
725; GFX8-LABEL: s_lshr_v4i16:
726; GFX8:       ; %bb.0:
727; GFX8-NEXT:    s_mov_b32 s6, 0xffff
728; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
729; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
730; GFX8-NEXT:    s_and_b32 s0, s0, s6
731; GFX8-NEXT:    s_and_b32 s2, s2, s6
732; GFX8-NEXT:    s_lshr_b32 s0, s0, s2
733; GFX8-NEXT:    s_lshr_b32 s2, s4, s7
734; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
735; GFX8-NEXT:    s_lshr_b32 s8, s3, 16
736; GFX8-NEXT:    s_and_b32 s1, s1, s6
737; GFX8-NEXT:    s_and_b32 s3, s3, s6
738; GFX8-NEXT:    s_lshr_b32 s1, s1, s3
739; GFX8-NEXT:    s_lshr_b32 s3, s5, s8
740; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
741; GFX8-NEXT:    s_and_b32 s0, s0, s6
742; GFX8-NEXT:    s_or_b32 s0, s2, s0
743; GFX8-NEXT:    s_lshl_b32 s2, s3, 16
744; GFX8-NEXT:    s_and_b32 s1, s1, s6
745; GFX8-NEXT:    s_or_b32 s1, s2, s1
746; GFX8-NEXT:    ; return to shader part epilog
747;
748; GFX9-LABEL: s_lshr_v4i16:
749; GFX9:       ; %bb.0:
750; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
751; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
752; GFX9-NEXT:    s_lshr_b32 s0, s0, s2
753; GFX9-NEXT:    s_lshr_b32 s2, s4, s5
754; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
755; GFX9-NEXT:    s_lshr_b32 s2, s1, 16
756; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
757; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
758; GFX9-NEXT:    s_lshr_b32 s2, s2, s4
759; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
760; GFX9-NEXT:    ; return to shader part epilog
761  %result = lshr <4 x i16> %value, %amount
762  %cast = bitcast <4 x i16> %result to <2 x i32>
763  ret <2 x i32> %cast
764}
765
766; FIXME
767; define <5 x i16> @v_lshr_v5i16(<5 x i16> %value, <5 x i16> %amount) {
768;   %result = lshr <5 x i16> %value, %amount
769;   ret <5 x i16> %result
770; }
771
772; define amdgpu_ps <5 x i16> @s_lshr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
773;   %result = lshr <5 x i16> %value, %amount
774;   ret <5 x i16> %result
775; }
776
777; define <3 x float> @v_lshr_v6i16(<6 x i16> %value, <6 x i16> %amount) {
778;   %result = lshr <6 x i16> %value, %amount
779;   %cast = bitcast <6 x i16> %result to <3 x float>
780;   ret <3 x float> %cast
781; }
782
783; define amdgpu_ps <3 x i32> @s_lshr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
784;   %result = lshr <6 x i16> %value, %amount
785;   %cast = bitcast <6 x i16> %result to <3 x i32>
786;   ret <3 x i32> %cast
787; }
788
789define <4 x float> @v_lshr_v8i16(<8 x i16> %value, <8 x i16> %amount) {
790; GFX6-LABEL: v_lshr_v8i16:
791; GFX6:       ; %bb.0:
792; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GFX6-NEXT:    s_mov_b32 s4, 0xffff
794; GFX6-NEXT:    v_and_b32_e32 v8, s4, v8
795; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
796; GFX6-NEXT:    v_lshrrev_b32_e32 v0, v8, v0
797; GFX6-NEXT:    v_and_b32_e32 v8, s4, v9
798; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
799; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v8, v1
800; GFX6-NEXT:    v_and_b32_e32 v8, s4, v10
801; GFX6-NEXT:    v_and_b32_e32 v2, s4, v2
802; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v8, v2
803; GFX6-NEXT:    v_and_b32_e32 v8, s4, v11
804; GFX6-NEXT:    v_and_b32_e32 v3, s4, v3
805; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v8, v3
806; GFX6-NEXT:    v_and_b32_e32 v8, s4, v12
807; GFX6-NEXT:    v_and_b32_e32 v4, s4, v4
808; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
809; GFX6-NEXT:    v_and_b32_e32 v8, s4, v13
810; GFX6-NEXT:    v_and_b32_e32 v5, s4, v5
811; GFX6-NEXT:    v_mov_b32_e32 v16, 0xffff
812; GFX6-NEXT:    v_lshrrev_b32_e32 v5, v8, v5
813; GFX6-NEXT:    v_and_b32_e32 v8, s4, v14
814; GFX6-NEXT:    v_and_b32_e32 v6, s4, v6
815; GFX6-NEXT:    v_lshrrev_b32_e32 v6, v8, v6
816; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
817; GFX6-NEXT:    v_and_b32_e32 v8, v15, v16
818; GFX6-NEXT:    v_and_b32_e32 v7, v7, v16
819; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
820; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
821; GFX6-NEXT:    v_lshrrev_b32_e32 v7, v8, v7
822; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
823; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
824; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
825; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
826; GFX6-NEXT:    v_or_b32_e32 v3, v6, v3
827; GFX6-NEXT:    s_setpc_b64 s[30:31]
828;
829; GFX8-LABEL: v_lshr_v8i16:
830; GFX8:       ; %bb.0:
831; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832; GFX8-NEXT:    v_lshrrev_b16_e32 v8, v4, v0
833; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
834; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v5, v1
835; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
836; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
837; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v6, v2
838; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
839; GFX8-NEXT:    v_or_b32_e32 v2, v4, v2
840; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v7, v3
841; GFX8-NEXT:    v_lshrrev_b16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
842; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
843; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
844; GFX8-NEXT:    s_setpc_b64 s[30:31]
845;
846; GFX9-LABEL: v_lshr_v8i16:
847; GFX9:       ; %bb.0:
848; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v4, v0
850; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v5, v1
851; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
852; GFX9-NEXT:    v_pk_lshrrev_b16 v3, v7, v3
853; GFX9-NEXT:    s_setpc_b64 s[30:31]
854  %result = lshr <8 x i16> %value, %amount
855  %cast = bitcast <8 x i16> %result to <4 x float>
856  ret <4 x float> %cast
857}
858
859define amdgpu_ps <4 x i32> @s_lshr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
860; GFX6-LABEL: s_lshr_v8i16:
861; GFX6:       ; %bb.0:
862; GFX6-NEXT:    s_mov_b32 s16, 0xffff
863; GFX6-NEXT:    s_and_b32 s8, s8, s16
864; GFX6-NEXT:    s_and_b32 s0, s0, s16
865; GFX6-NEXT:    s_lshr_b32 s0, s0, s8
866; GFX6-NEXT:    s_and_b32 s8, s9, s16
867; GFX6-NEXT:    s_and_b32 s1, s1, s16
868; GFX6-NEXT:    s_lshr_b32 s1, s1, s8
869; GFX6-NEXT:    s_and_b32 s8, s10, s16
870; GFX6-NEXT:    s_and_b32 s2, s2, s16
871; GFX6-NEXT:    s_lshr_b32 s2, s2, s8
872; GFX6-NEXT:    s_and_b32 s8, s11, s16
873; GFX6-NEXT:    s_and_b32 s3, s3, s16
874; GFX6-NEXT:    s_lshr_b32 s3, s3, s8
875; GFX6-NEXT:    s_and_b32 s8, s12, s16
876; GFX6-NEXT:    s_and_b32 s4, s4, s16
877; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
878; GFX6-NEXT:    s_and_b32 s8, s13, s16
879; GFX6-NEXT:    s_and_b32 s5, s5, s16
880; GFX6-NEXT:    s_lshr_b32 s5, s5, s8
881; GFX6-NEXT:    s_and_b32 s8, s14, s16
882; GFX6-NEXT:    s_and_b32 s6, s6, s16
883; GFX6-NEXT:    s_lshr_b32 s6, s6, s8
884; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
885; GFX6-NEXT:    s_and_b32 s8, s15, s16
886; GFX6-NEXT:    s_and_b32 s7, s7, s16
887; GFX6-NEXT:    s_or_b32 s0, s0, s1
888; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
889; GFX6-NEXT:    s_lshr_b32 s7, s7, s8
890; GFX6-NEXT:    s_lshl_b32 s3, s7, 16
891; GFX6-NEXT:    s_or_b32 s1, s2, s1
892; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
893; GFX6-NEXT:    s_or_b32 s2, s4, s2
894; GFX6-NEXT:    s_or_b32 s3, s6, s3
895; GFX6-NEXT:    ; return to shader part epilog
896;
897; GFX8-LABEL: s_lshr_v8i16:
898; GFX8:       ; %bb.0:
899; GFX8-NEXT:    s_mov_b32 s12, 0xffff
900; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
901; GFX8-NEXT:    s_lshr_b32 s13, s4, 16
902; GFX8-NEXT:    s_and_b32 s0, s0, s12
903; GFX8-NEXT:    s_and_b32 s4, s4, s12
904; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
905; GFX8-NEXT:    s_lshr_b32 s4, s8, s13
906; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
907; GFX8-NEXT:    s_lshr_b32 s14, s5, 16
908; GFX8-NEXT:    s_and_b32 s1, s1, s12
909; GFX8-NEXT:    s_and_b32 s5, s5, s12
910; GFX8-NEXT:    s_lshr_b32 s1, s1, s5
911; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
912; GFX8-NEXT:    s_lshr_b32 s15, s6, 16
913; GFX8-NEXT:    s_and_b32 s2, s2, s12
914; GFX8-NEXT:    s_and_b32 s6, s6, s12
915; GFX8-NEXT:    s_lshr_b32 s5, s9, s14
916; GFX8-NEXT:    s_lshl_b32 s4, s4, 16
917; GFX8-NEXT:    s_and_b32 s0, s0, s12
918; GFX8-NEXT:    s_lshr_b32 s2, s2, s6
919; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
920; GFX8-NEXT:    s_lshr_b32 s16, s7, 16
921; GFX8-NEXT:    s_or_b32 s0, s4, s0
922; GFX8-NEXT:    s_and_b32 s3, s3, s12
923; GFX8-NEXT:    s_and_b32 s7, s7, s12
924; GFX8-NEXT:    s_lshr_b32 s6, s10, s15
925; GFX8-NEXT:    s_lshl_b32 s4, s5, 16
926; GFX8-NEXT:    s_and_b32 s1, s1, s12
927; GFX8-NEXT:    s_lshr_b32 s3, s3, s7
928; GFX8-NEXT:    s_or_b32 s1, s4, s1
929; GFX8-NEXT:    s_lshr_b32 s7, s11, s16
930; GFX8-NEXT:    s_lshl_b32 s4, s6, 16
931; GFX8-NEXT:    s_and_b32 s2, s2, s12
932; GFX8-NEXT:    s_or_b32 s2, s4, s2
933; GFX8-NEXT:    s_lshl_b32 s4, s7, 16
934; GFX8-NEXT:    s_and_b32 s3, s3, s12
935; GFX8-NEXT:    s_or_b32 s3, s4, s3
936; GFX8-NEXT:    ; return to shader part epilog
937;
938; GFX9-LABEL: s_lshr_v8i16:
939; GFX9:       ; %bb.0:
940; GFX9-NEXT:    s_lshr_b32 s8, s0, 16
941; GFX9-NEXT:    s_lshr_b32 s9, s4, 16
942; GFX9-NEXT:    s_lshr_b32 s0, s0, s4
943; GFX9-NEXT:    s_lshr_b32 s4, s8, s9
944; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
945; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
946; GFX9-NEXT:    s_lshr_b32 s8, s5, 16
947; GFX9-NEXT:    s_lshr_b32 s1, s1, s5
948; GFX9-NEXT:    s_lshr_b32 s4, s4, s8
949; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
950; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
951; GFX9-NEXT:    s_lshr_b32 s5, s6, 16
952; GFX9-NEXT:    s_lshr_b32 s4, s4, s5
953; GFX9-NEXT:    s_lshr_b32 s2, s2, s6
954; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
955; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
956; GFX9-NEXT:    s_lshr_b32 s5, s7, 16
957; GFX9-NEXT:    s_lshr_b32 s3, s3, s7
958; GFX9-NEXT:    s_lshr_b32 s4, s4, s5
959; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s3, s4
960; GFX9-NEXT:    ; return to shader part epilog
961  %result = lshr <8 x i16> %value, %amount
962  %cast = bitcast <8 x i16> %result to <4 x i32>
963  ret <4 x i32> %cast
964}
965
966define i64 @v_lshr_i64(i64 %value, i64 %amount) {
967; GFX6-LABEL: v_lshr_i64:
968; GFX6:       ; %bb.0:
969; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], v2
971; GFX6-NEXT:    s_setpc_b64 s[30:31]
972;
973; GFX8-LABEL: v_lshr_i64:
974; GFX8:       ; %bb.0:
975; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v2, v[0:1]
977; GFX8-NEXT:    s_setpc_b64 s[30:31]
978;
979; GFX9-LABEL: v_lshr_i64:
980; GFX9:       ; %bb.0:
981; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v2, v[0:1]
983; GFX9-NEXT:    s_setpc_b64 s[30:31]
984  %result = lshr i64 %value, %amount
985  ret i64 %result
986}
987
988define i64 @v_lshr_i64_63(i64 %value) {
989; GCN-LABEL: v_lshr_i64_63:
990; GCN:       ; %bb.0:
991; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
992; GCN-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
993; GCN-NEXT:    v_mov_b32_e32 v1, 0
994; GCN-NEXT:    s_setpc_b64 s[30:31]
995  %result = lshr i64 %value, 63
996  ret i64 %result
997}
998
999define i64 @v_lshr_i64_33(i64 %value) {
1000; GCN-LABEL: v_lshr_i64_33:
1001; GCN:       ; %bb.0:
1002; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003; GCN-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
1004; GCN-NEXT:    v_mov_b32_e32 v1, 0
1005; GCN-NEXT:    s_setpc_b64 s[30:31]
1006  %result = lshr i64 %value, 33
1007  ret i64 %result
1008}
1009
1010define i64 @v_lshr_i64_32(i64 %value) {
1011; GCN-LABEL: v_lshr_i64_32:
1012; GCN:       ; %bb.0:
1013; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014; GCN-NEXT:    v_mov_b32_e32 v0, v1
1015; GCN-NEXT:    v_mov_b32_e32 v1, 0
1016; GCN-NEXT:    s_setpc_b64 s[30:31]
1017  %result = lshr i64 %value, 32
1018  ret i64 %result
1019}
1020
1021define i64 @v_lshr_i64_31(i64 %value) {
1022; GFX6-LABEL: v_lshr_i64_31:
1023; GFX6:       ; %bb.0:
1024; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1025; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], 31
1026; GFX6-NEXT:    s_setpc_b64 s[30:31]
1027;
1028; GFX8-LABEL: v_lshr_i64_31:
1029; GFX8:       ; %bb.0:
1030; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 31, v[0:1]
1032; GFX8-NEXT:    s_setpc_b64 s[30:31]
1033;
1034; GFX9-LABEL: v_lshr_i64_31:
1035; GFX9:       ; %bb.0:
1036; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 31, v[0:1]
1038; GFX9-NEXT:    s_setpc_b64 s[30:31]
1039  %result = lshr i64 %value, 31
1040  ret i64 %result
1041}
1042
1043define amdgpu_ps i64 @s_lshr_i64(i64 inreg %value, i64 inreg %amount) {
1044; GCN-LABEL: s_lshr_i64:
1045; GCN:       ; %bb.0:
1046; GCN-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
1047; GCN-NEXT:    ; return to shader part epilog
1048  %result = lshr i64 %value, %amount
1049  ret i64 %result
1050}
1051
1052define amdgpu_ps i64 @s_lshr_i64_63(i64 inreg %value) {
1053; GCN-LABEL: s_lshr_i64_63:
1054; GCN:       ; %bb.0:
1055; GCN-NEXT:    s_lshr_b32 s0, s1, 31
1056; GCN-NEXT:    s_mov_b32 s1, 0
1057; GCN-NEXT:    ; return to shader part epilog
1058  %result = lshr i64 %value, 63
1059  ret i64 %result
1060}
1061
1062define amdgpu_ps i64 @s_lshr_i64_33(i64 inreg %value) {
1063; GCN-LABEL: s_lshr_i64_33:
1064; GCN:       ; %bb.0:
1065; GCN-NEXT:    s_lshr_b32 s0, s1, 1
1066; GCN-NEXT:    s_mov_b32 s1, 0
1067; GCN-NEXT:    ; return to shader part epilog
1068  %result = lshr i64 %value, 33
1069  ret i64 %result
1070}
1071
1072define amdgpu_ps i64 @s_lshr_i64_32(i64 inreg %value) {
1073; GCN-LABEL: s_lshr_i64_32:
1074; GCN:       ; %bb.0:
1075; GCN-NEXT:    s_mov_b32 s0, s1
1076; GCN-NEXT:    s_mov_b32 s1, 0
1077; GCN-NEXT:    ; return to shader part epilog
1078  %result = lshr i64 %value, 32
1079  ret i64 %result
1080}
1081
1082define amdgpu_ps i64 @s_lshr_i64_31(i64 inreg %value) {
1083; GCN-LABEL: s_lshr_i64_31:
1084; GCN:       ; %bb.0:
1085; GCN-NEXT:    s_lshr_b64 s[0:1], s[0:1], 31
1086; GCN-NEXT:    ; return to shader part epilog
1087  %result = lshr i64 %value, 31
1088  ret i64 %result
1089}
1090
1091define amdgpu_ps <2 x float> @lshr_i64_sv(i64 inreg %value, i64 %amount) {
1092; GFX6-LABEL: lshr_i64_sv:
1093; GFX6:       ; %bb.0:
1094; GFX6-NEXT:    v_lshr_b64 v[0:1], s[0:1], v0
1095; GFX6-NEXT:    ; return to shader part epilog
1096;
1097; GFX8-LABEL: lshr_i64_sv:
1098; GFX8:       ; %bb.0:
1099; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, s[0:1]
1100; GFX8-NEXT:    ; return to shader part epilog
1101;
1102; GFX9-LABEL: lshr_i64_sv:
1103; GFX9:       ; %bb.0:
1104; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, s[0:1]
1105; GFX9-NEXT:    ; return to shader part epilog
1106  %result = lshr i64 %value, %amount
1107  %cast = bitcast i64 %result to <2 x float>
1108  ret <2 x float> %cast
1109}
1110
1111define amdgpu_ps <2 x float> @lshr_i64_vs(i64 %value, i64 inreg %amount) {
1112; GFX6-LABEL: lshr_i64_vs:
1113; GFX6:       ; %bb.0:
1114; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], s0
1115; GFX6-NEXT:    ; return to shader part epilog
1116;
1117; GFX8-LABEL: lshr_i64_vs:
1118; GFX8:       ; %bb.0:
1119; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s0, v[0:1]
1120; GFX8-NEXT:    ; return to shader part epilog
1121;
1122; GFX9-LABEL: lshr_i64_vs:
1123; GFX9:       ; %bb.0:
1124; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s0, v[0:1]
1125; GFX9-NEXT:    ; return to shader part epilog
1126  %result = lshr i64 %value, %amount
1127  %cast = bitcast i64 %result to <2 x float>
1128  ret <2 x float> %cast
1129}
1130
1131define <2 x i64> @v_lshr_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1132; GFX6-LABEL: v_lshr_v2i64:
1133; GFX6:       ; %bb.0:
1134; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1135; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], v4
1136; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v6
1137; GFX6-NEXT:    s_setpc_b64 s[30:31]
1138;
1139; GFX8-LABEL: v_lshr_v2i64:
1140; GFX8:       ; %bb.0:
1141; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v4, v[0:1]
1143; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v6, v[2:3]
1144; GFX8-NEXT:    s_setpc_b64 s[30:31]
1145;
1146; GFX9-LABEL: v_lshr_v2i64:
1147; GFX9:       ; %bb.0:
1148; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v4, v[0:1]
1150; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v6, v[2:3]
1151; GFX9-NEXT:    s_setpc_b64 s[30:31]
1152  %result = lshr <2 x i64> %value, %amount
1153  ret <2 x i64> %result
1154}
1155
1156define <2 x i64> @v_lshr_v2i64_31(<2 x i64> %value) {
1157; GFX6-LABEL: v_lshr_v2i64_31:
1158; GFX6:       ; %bb.0:
1159; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1160; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], 31
1161; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], 31
1162; GFX6-NEXT:    s_setpc_b64 s[30:31]
1163;
1164; GFX8-LABEL: v_lshr_v2i64_31:
1165; GFX8:       ; %bb.0:
1166; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1167; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 31, v[0:1]
1168; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 31, v[2:3]
1169; GFX8-NEXT:    s_setpc_b64 s[30:31]
1170;
1171; GFX9-LABEL: v_lshr_v2i64_31:
1172; GFX9:       ; %bb.0:
1173; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1174; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 31, v[0:1]
1175; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 31, v[2:3]
1176; GFX9-NEXT:    s_setpc_b64 s[30:31]
1177  %result = lshr <2 x i64> %value, <i64 31, i64 31>
1178  ret <2 x i64> %result
1179}
1180
1181define amdgpu_ps <2 x i64> @s_lshr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1182; GCN-LABEL: s_lshr_v2i64:
1183; GCN:       ; %bb.0:
1184; GCN-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
1185; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
1186; GCN-NEXT:    ; return to shader part epilog
1187  %result = lshr <2 x i64> %value, %amount
1188  ret <2 x i64> %result
1189}
1190