• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5
6define i8 @v_ashr_i8(i8 %value, i8 %amount) {
7; GFX6-LABEL: v_ashr_i8:
8; GFX6:       ; %bb.0:
9; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
11; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
12; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
13; GFX6-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX8-LABEL: v_ashr_i8:
16; GFX8:       ; %bb.0:
17; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
19; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_1
20; GFX8-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX9-LABEL: v_ashr_i8:
23; GFX9:       ; %bb.0:
24; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX9-NEXT:    v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
26; GFX9-NEXT:    s_setpc_b64 s[30:31]
27  %result = ashr i8 %value, %amount
28  ret i8 %result
29}
30
31define i8 @v_ashr_i8_7(i8 %value) {
32; GFX6-LABEL: v_ashr_i8_7:
33; GFX6:       ; %bb.0:
34; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
36; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
37; GFX6-NEXT:    s_setpc_b64 s[30:31]
38;
39; GFX8-LABEL: v_ashr_i8_7:
40; GFX8:       ; %bb.0:
41; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
43; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v0
44; GFX8-NEXT:    s_setpc_b64 s[30:31]
45;
46; GFX9-LABEL: v_ashr_i8_7:
47; GFX9:       ; %bb.0:
48; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX9-NEXT:    v_mov_b32_e32 v1, 7
50; GFX9-NEXT:    v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
51; GFX9-NEXT:    s_setpc_b64 s[30:31]
52  %result = ashr i8 %value, 7
53  ret i8 %result
54}
55
56define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) {
57; GFX6-LABEL: s_ashr_i8:
58; GFX6:       ; %bb.0:
59; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
60; GFX6-NEXT:    s_sext_i32_i8 s0, s0
61; GFX6-NEXT:    s_ashr_i32 s0, s0, s1
62; GFX6-NEXT:    ; return to shader part epilog
63;
64; GFX8-LABEL: s_ashr_i8:
65; GFX8:       ; %bb.0:
66; GFX8-NEXT:    s_sext_i32_i8 s0, s0
67; GFX8-NEXT:    s_sext_i32_i8 s1, s1
68; GFX8-NEXT:    s_ashr_i32 s0, s0, s1
69; GFX8-NEXT:    ; return to shader part epilog
70;
71; GFX9-LABEL: s_ashr_i8:
72; GFX9:       ; %bb.0:
73; GFX9-NEXT:    s_sext_i32_i8 s0, s0
74; GFX9-NEXT:    s_sext_i32_i8 s1, s1
75; GFX9-NEXT:    s_ashr_i32 s0, s0, s1
76; GFX9-NEXT:    ; return to shader part epilog
77  %result = ashr i8 %value, %amount
78  ret i8 %result
79}
80
81define amdgpu_ps i8 @s_ashr_i8_7(i8 inreg %value) {
82; GCN-LABEL: s_ashr_i8_7:
83; GCN:       ; %bb.0:
84; GCN-NEXT:    s_sext_i32_i8 s0, s0
85; GCN-NEXT:    s_ashr_i32 s0, s0, 7
86; GCN-NEXT:    ; return to shader part epilog
87  %result = ashr i8 %value, 7
88  ret i8 %result
89}
90
91
92define i24 @v_ashr_i24(i24 %value, i24 %amount) {
93; GCN-LABEL: v_ashr_i24:
94; GCN:       ; %bb.0:
95; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GCN-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
97; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
98; GCN-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
99; GCN-NEXT:    s_setpc_b64 s[30:31]
100  %result = ashr i24 %value, %amount
101  ret i24 %result
102}
103
104define i24 @v_ashr_i24_7(i24 %value) {
105; GCN-LABEL: v_ashr_i24_7:
106; GCN:       ; %bb.0:
107; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
109; GCN-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
110; GCN-NEXT:    s_setpc_b64 s[30:31]
111  %result = ashr i24 %value, 7
112  ret i24 %result
113}
114
115define amdgpu_ps i24 @s_ashr_i24(i24 inreg %value, i24 inreg %amount) {
116; GCN-LABEL: s_ashr_i24:
117; GCN:       ; %bb.0:
118; GCN-NEXT:    s_and_b32 s1, s1, 0xffffff
119; GCN-NEXT:    s_bfe_i32 s0, s0, 0x180000
120; GCN-NEXT:    s_ashr_i32 s0, s0, s1
121; GCN-NEXT:    ; return to shader part epilog
122  %result = ashr i24 %value, %amount
123  ret i24 %result
124}
125
126define amdgpu_ps i24 @s_ashr_i24_7(i24 inreg %value) {
127; GCN-LABEL: s_ashr_i24_7:
128; GCN:       ; %bb.0:
129; GCN-NEXT:    s_bfe_i32 s0, s0, 0x180000
130; GCN-NEXT:    s_ashr_i32 s0, s0, 7
131; GCN-NEXT:    ; return to shader part epilog
132  %result = ashr i24 %value, 7
133  ret i24 %result
134}
135
136define i32 @v_ashr_i32(i32 %value, i32 %amount) {
137; GCN-LABEL: v_ashr_i32:
138; GCN:       ; %bb.0:
139; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GCN-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
141; GCN-NEXT:    s_setpc_b64 s[30:31]
142  %result = ashr i32 %value, %amount
143  ret i32 %result
144}
145
146define i32 @v_ashr_i32_31(i32 %value) {
147; GCN-LABEL: v_ashr_i32_31:
148; GCN:       ; %bb.0:
149; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
151; GCN-NEXT:    s_setpc_b64 s[30:31]
152  %result = ashr i32 %value, 31
153  ret i32 %result
154}
155
156define amdgpu_ps i32 @s_ashr_i32(i32 inreg %value, i32 inreg %amount) {
157; GCN-LABEL: s_ashr_i32:
158; GCN:       ; %bb.0:
159; GCN-NEXT:    s_ashr_i32 s0, s0, s1
160; GCN-NEXT:    ; return to shader part epilog
161  %result = ashr i32 %value, %amount
162  ret i32 %result
163}
164
165define amdgpu_ps i32 @s_ashr_i32_31(i32 inreg %value) {
166; GCN-LABEL: s_ashr_i32_31:
167; GCN:       ; %bb.0:
168; GCN-NEXT:    s_ashr_i32 s0, s0, 31
169; GCN-NEXT:    ; return to shader part epilog
170  %result = ashr i32 %value, 31
171  ret i32 %result
172}
173
174define amdgpu_ps float @ashr_i32_sv(i32 inreg %value, i32 %amount) {
175; GFX6-LABEL: ashr_i32_sv:
176; GFX6:       ; %bb.0:
177; GFX6-NEXT:    v_ashr_i32_e32 v0, s0, v0
178; GFX6-NEXT:    ; return to shader part epilog
179;
180; GFX8-LABEL: ashr_i32_sv:
181; GFX8:       ; %bb.0:
182; GFX8-NEXT:    v_ashrrev_i32_e64 v0, v0, s0
183; GFX8-NEXT:    ; return to shader part epilog
184;
185; GFX9-LABEL: ashr_i32_sv:
186; GFX9:       ; %bb.0:
187; GFX9-NEXT:    v_ashrrev_i32_e64 v0, v0, s0
188; GFX9-NEXT:    ; return to shader part epilog
189  %result = ashr i32 %value, %amount
190  %cast = bitcast i32 %result to float
191  ret float %cast
192}
193
194define amdgpu_ps float @ashr_i32_vs(i32 %value, i32 inreg %amount) {
195; GCN-LABEL: ashr_i32_vs:
196; GCN:       ; %bb.0:
197; GCN-NEXT:    v_ashrrev_i32_e32 v0, s0, v0
198; GCN-NEXT:    ; return to shader part epilog
199  %result = ashr i32 %value, %amount
200  %cast = bitcast i32 %result to float
201  ret float %cast
202}
203
204define <2 x i32> @v_ashr_v2i32(<2 x i32> %value, <2 x i32> %amount) {
205; GCN-LABEL: v_ashr_v2i32:
206; GCN:       ; %bb.0:
207; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; GCN-NEXT:    v_ashrrev_i32_e32 v0, v2, v0
209; GCN-NEXT:    v_ashrrev_i32_e32 v1, v3, v1
210; GCN-NEXT:    s_setpc_b64 s[30:31]
211  %result = ashr <2 x i32> %value, %amount
212  ret <2 x i32> %result
213}
214
215define <2 x i32> @v_ashr_v2i32_31(<2 x i32> %value) {
216; GCN-LABEL: v_ashr_v2i32_31:
217; GCN:       ; %bb.0:
218; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
220; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v1
221; GCN-NEXT:    s_setpc_b64 s[30:31]
222  %result = ashr <2 x i32> %value, <i32 31, i32 31>
223  ret <2 x i32> %result
224}
225
226define amdgpu_ps <2 x i32> @s_ashr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
227; GCN-LABEL: s_ashr_v2i32:
228; GCN:       ; %bb.0:
229; GCN-NEXT:    s_ashr_i32 s0, s0, s2
230; GCN-NEXT:    s_ashr_i32 s1, s1, s3
231; GCN-NEXT:    ; return to shader part epilog
232  %result = ashr <2 x i32> %value, %amount
233  ret <2 x i32> %result
234}
235
236define <3 x i32> @v_ashr_v3i32(<3 x i32> %value, <3 x i32> %amount) {
237; GCN-LABEL: v_ashr_v3i32:
238; GCN:       ; %bb.0:
239; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GCN-NEXT:    v_ashrrev_i32_e32 v0, v3, v0
241; GCN-NEXT:    v_ashrrev_i32_e32 v1, v4, v1
242; GCN-NEXT:    v_ashrrev_i32_e32 v2, v5, v2
243; GCN-NEXT:    s_setpc_b64 s[30:31]
244  %result = ashr <3 x i32> %value, %amount
245  ret <3 x i32> %result
246}
247
248define amdgpu_ps <3 x i32> @s_ashr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
249; GCN-LABEL: s_ashr_v3i32:
250; GCN:       ; %bb.0:
251; GCN-NEXT:    s_ashr_i32 s0, s0, s3
252; GCN-NEXT:    s_ashr_i32 s1, s1, s4
253; GCN-NEXT:    s_ashr_i32 s2, s2, s5
254; GCN-NEXT:    ; return to shader part epilog
255  %result = ashr <3 x i32> %value, %amount
256  ret <3 x i32> %result
257}
258
259define <4 x i32> @v_ashr_v4i32(<4 x i32> %value, <4 x i32> %amount) {
260; GCN-LABEL: v_ashr_v4i32:
261; GCN:       ; %bb.0:
262; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GCN-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
264; GCN-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
265; GCN-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
266; GCN-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
267; GCN-NEXT:    s_setpc_b64 s[30:31]
268  %result = ashr <4 x i32> %value, %amount
269  ret <4 x i32> %result
270}
271
272define amdgpu_ps <4 x i32> @s_ashr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
273; GCN-LABEL: s_ashr_v4i32:
274; GCN:       ; %bb.0:
275; GCN-NEXT:    s_ashr_i32 s0, s0, s4
276; GCN-NEXT:    s_ashr_i32 s1, s1, s5
277; GCN-NEXT:    s_ashr_i32 s2, s2, s6
278; GCN-NEXT:    s_ashr_i32 s3, s3, s7
279; GCN-NEXT:    ; return to shader part epilog
280  %result = ashr <4 x i32> %value, %amount
281  ret <4 x i32> %result
282}
283
284define <5 x i32> @v_ashr_v5i32(<5 x i32> %value, <5 x i32> %amount) {
285; GCN-LABEL: v_ashr_v5i32:
286; GCN:       ; %bb.0:
287; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288; GCN-NEXT:    v_ashrrev_i32_e32 v0, v5, v0
289; GCN-NEXT:    v_ashrrev_i32_e32 v1, v6, v1
290; GCN-NEXT:    v_ashrrev_i32_e32 v2, v7, v2
291; GCN-NEXT:    v_ashrrev_i32_e32 v3, v8, v3
292; GCN-NEXT:    v_ashrrev_i32_e32 v4, v9, v4
293; GCN-NEXT:    s_setpc_b64 s[30:31]
294  %result = ashr <5 x i32> %value, %amount
295  ret <5 x i32> %result
296}
297
298define amdgpu_ps <5 x i32> @s_ashr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
299; GCN-LABEL: s_ashr_v5i32:
300; GCN:       ; %bb.0:
301; GCN-NEXT:    s_ashr_i32 s0, s0, s5
302; GCN-NEXT:    s_ashr_i32 s1, s1, s6
303; GCN-NEXT:    s_ashr_i32 s2, s2, s7
304; GCN-NEXT:    s_ashr_i32 s3, s3, s8
305; GCN-NEXT:    s_ashr_i32 s4, s4, s9
306; GCN-NEXT:    ; return to shader part epilog
307  %result = ashr <5 x i32> %value, %amount
308  ret <5 x i32> %result
309}
310
311define <16 x i32> @v_ashr_v16i32(<16 x i32> %value, <16 x i32> %amount) {
312; GCN-LABEL: v_ashr_v16i32:
313; GCN:       ; %bb.0:
314; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GCN-NEXT:    v_ashrrev_i32_e32 v0, v16, v0
316; GCN-NEXT:    v_ashrrev_i32_e32 v1, v17, v1
317; GCN-NEXT:    v_ashrrev_i32_e32 v2, v18, v2
318; GCN-NEXT:    v_ashrrev_i32_e32 v3, v19, v3
319; GCN-NEXT:    v_ashrrev_i32_e32 v4, v20, v4
320; GCN-NEXT:    v_ashrrev_i32_e32 v5, v21, v5
321; GCN-NEXT:    v_ashrrev_i32_e32 v6, v22, v6
322; GCN-NEXT:    v_ashrrev_i32_e32 v7, v23, v7
323; GCN-NEXT:    v_ashrrev_i32_e32 v8, v24, v8
324; GCN-NEXT:    v_ashrrev_i32_e32 v9, v25, v9
325; GCN-NEXT:    v_ashrrev_i32_e32 v10, v26, v10
326; GCN-NEXT:    v_ashrrev_i32_e32 v11, v27, v11
327; GCN-NEXT:    v_ashrrev_i32_e32 v12, v28, v12
328; GCN-NEXT:    v_ashrrev_i32_e32 v13, v29, v13
329; GCN-NEXT:    v_ashrrev_i32_e32 v14, v30, v14
330; GCN-NEXT:    v_ashrrev_i32_e32 v15, v31, v15
331; GCN-NEXT:    s_setpc_b64 s[30:31]
332  %result = ashr <16 x i32> %value, %amount
333  ret <16 x i32> %result
334}
335
336define amdgpu_ps <16 x i32> @s_ashr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
337; GCN-LABEL: s_ashr_v16i32:
338; GCN:       ; %bb.0:
339; GCN-NEXT:    s_ashr_i32 s0, s0, s16
340; GCN-NEXT:    s_ashr_i32 s1, s1, s17
341; GCN-NEXT:    s_ashr_i32 s2, s2, s18
342; GCN-NEXT:    s_ashr_i32 s3, s3, s19
343; GCN-NEXT:    s_ashr_i32 s4, s4, s20
344; GCN-NEXT:    s_ashr_i32 s5, s5, s21
345; GCN-NEXT:    s_ashr_i32 s6, s6, s22
346; GCN-NEXT:    s_ashr_i32 s7, s7, s23
347; GCN-NEXT:    s_ashr_i32 s8, s8, s24
348; GCN-NEXT:    s_ashr_i32 s9, s9, s25
349; GCN-NEXT:    s_ashr_i32 s10, s10, s26
350; GCN-NEXT:    s_ashr_i32 s11, s11, s27
351; GCN-NEXT:    s_ashr_i32 s12, s12, s28
352; GCN-NEXT:    s_ashr_i32 s13, s13, s29
353; GCN-NEXT:    s_ashr_i32 s14, s14, s30
354; GCN-NEXT:    s_ashr_i32 s15, s15, s31
355; GCN-NEXT:    ; return to shader part epilog
356  %result = ashr <16 x i32> %value, %amount
357  ret <16 x i32> %result
358}
359
360define i16 @v_ashr_i16(i16 %value, i16 %amount) {
361; GFX6-LABEL: v_ashr_i16:
362; GFX6:       ; %bb.0:
363; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
365; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
366; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
367; GFX6-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX8-LABEL: v_ashr_i16:
370; GFX8:       ; %bb.0:
371; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX8-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
373; GFX8-NEXT:    s_setpc_b64 s[30:31]
374;
375; GFX9-LABEL: v_ashr_i16:
376; GFX9:       ; %bb.0:
377; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378; GFX9-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
379; GFX9-NEXT:    s_setpc_b64 s[30:31]
380  %result = ashr i16 %value, %amount
381  ret i16 %result
382}
383
384define i16 @v_ashr_i16_31(i16 %value) {
385; GCN-LABEL: v_ashr_i16_31:
386; GCN:       ; %bb.0:
387; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388; GCN-NEXT:    s_setpc_b64 s[30:31]
389  %result = ashr i16 %value, 31
390  ret i16 %result
391}
392
393define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) {
394; GFX6-LABEL: s_ashr_i16:
395; GFX6:       ; %bb.0:
396; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
397; GFX6-NEXT:    s_sext_i32_i16 s0, s0
398; GFX6-NEXT:    s_ashr_i32 s0, s0, s1
399; GFX6-NEXT:    ; return to shader part epilog
400;
401; GFX8-LABEL: s_ashr_i16:
402; GFX8:       ; %bb.0:
403; GFX8-NEXT:    s_sext_i32_i16 s0, s0
404; GFX8-NEXT:    s_sext_i32_i16 s1, s1
405; GFX8-NEXT:    s_ashr_i32 s0, s0, s1
406; GFX8-NEXT:    ; return to shader part epilog
407;
408; GFX9-LABEL: s_ashr_i16:
409; GFX9:       ; %bb.0:
410; GFX9-NEXT:    s_sext_i32_i16 s0, s0
411; GFX9-NEXT:    s_sext_i32_i16 s1, s1
412; GFX9-NEXT:    s_ashr_i32 s0, s0, s1
413; GFX9-NEXT:    ; return to shader part epilog
414  %result = ashr i16 %value, %amount
415  ret i16 %result
416}
417
418define amdgpu_ps i16 @s_ashr_i16_15(i16 inreg %value) {
419; GCN-LABEL: s_ashr_i16_15:
420; GCN:       ; %bb.0:
421; GCN-NEXT:    s_sext_i32_i16 s0, s0
422; GCN-NEXT:    s_ashr_i32 s0, s0, 15
423; GCN-NEXT:    ; return to shader part epilog
424  %result = ashr i16 %value, 15
425  ret i16 %result
426}
427
428define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) {
429; GFX6-LABEL: ashr_i16_sv:
430; GFX6:       ; %bb.0:
431; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
432; GFX6-NEXT:    s_sext_i32_i16 s0, s0
433; GFX6-NEXT:    v_ashr_i32_e32 v0, s0, v0
434; GFX6-NEXT:    ; return to shader part epilog
435;
436; GFX8-LABEL: ashr_i16_sv:
437; GFX8:       ; %bb.0:
438; GFX8-NEXT:    v_ashrrev_i16_e64 v0, v0, s0
439; GFX8-NEXT:    ; return to shader part epilog
440;
441; GFX9-LABEL: ashr_i16_sv:
442; GFX9:       ; %bb.0:
443; GFX9-NEXT:    v_ashrrev_i16_e64 v0, v0, s0
444; GFX9-NEXT:    ; return to shader part epilog
445  %result = ashr i16 %value, %amount
446  %cast = bitcast i16 %result to half
447  ret half %cast
448}
449
450define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) {
451; GFX6-LABEL: ashr_i16_vs:
452; GFX6:       ; %bb.0:
453; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
454; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
455; GFX6-NEXT:    v_ashrrev_i32_e32 v0, s0, v0
456; GFX6-NEXT:    ; return to shader part epilog
457;
458; GFX8-LABEL: ashr_i16_vs:
459; GFX8:       ; %bb.0:
460; GFX8-NEXT:    v_ashrrev_i16_e32 v0, s0, v0
461; GFX8-NEXT:    ; return to shader part epilog
462;
463; GFX9-LABEL: ashr_i16_vs:
464; GFX9:       ; %bb.0:
465; GFX9-NEXT:    v_ashrrev_i16_e32 v0, s0, v0
466; GFX9-NEXT:    ; return to shader part epilog
467  %result = ashr i16 %value, %amount
468  %cast = bitcast i16 %result to half
469  ret half %cast
470}
471
472define <2 x i16> @v_ashr_v2i16(<2 x i16> %value, <2 x i16> %amount) {
473; GFX6-LABEL: v_ashr_v2i16:
474; GFX6:       ; %bb.0:
475; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX6-NEXT:    s_mov_b32 s4, 0xffff
477; GFX6-NEXT:    v_and_b32_e32 v2, s4, v2
478; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
479; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v2, v0
480; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
481; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
482; GFX6-NEXT:    v_ashrrev_i32_e32 v1, v2, v1
483; GFX6-NEXT:    s_setpc_b64 s[30:31]
484;
485; GFX8-LABEL: v_ashr_v2i16:
486; GFX8:       ; %bb.0:
487; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; GFX8-NEXT:    v_ashrrev_i16_e32 v2, v1, v0
489; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
490; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
491; GFX8-NEXT:    s_setpc_b64 s[30:31]
492;
493; GFX9-LABEL: v_ashr_v2i16:
494; GFX9:       ; %bb.0:
495; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v1, v0
497; GFX9-NEXT:    s_setpc_b64 s[30:31]
498  %result = ashr <2 x i16> %value, %amount
499  ret <2 x i16> %result
500}
501
502define <2 x i16> @v_ashr_v2i16_15(<2 x i16> %value) {
503; GFX6-LABEL: v_ashr_v2i16_15:
504; GFX6:       ; %bb.0:
505; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
506; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
507; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
508; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v0
509; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v1
510; GFX6-NEXT:    s_setpc_b64 s[30:31]
511;
512; GFX8-LABEL: v_ashr_v2i16_15:
513; GFX8:       ; %bb.0:
514; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515; GFX8-NEXT:    v_mov_b32_e32 v2, 15
516; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 15, v0
517; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
518; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
519; GFX8-NEXT:    s_setpc_b64 s[30:31]
520;
521; GFX9-LABEL: v_ashr_v2i16_15:
522; GFX9:       ; %bb.0:
523; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
525; GFX9-NEXT:    s_setpc_b64 s[30:31]
526  %result = ashr <2 x i16> %value, <i16 15, i16 15>
527  ret <2 x i16> %result
528}
529
530define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
531; GFX6-LABEL: s_ashr_v2i16:
532; GFX6:       ; %bb.0:
533; GFX6-NEXT:    s_mov_b32 s4, 0xffff
534; GFX6-NEXT:    s_and_b32 s2, s2, s4
535; GFX6-NEXT:    s_sext_i32_i16 s0, s0
536; GFX6-NEXT:    s_ashr_i32 s0, s0, s2
537; GFX6-NEXT:    s_and_b32 s2, s3, s4
538; GFX6-NEXT:    s_sext_i32_i16 s1, s1
539; GFX6-NEXT:    s_ashr_i32 s1, s1, s2
540; GFX6-NEXT:    s_and_b32 s1, s1, s4
541; GFX6-NEXT:    s_and_b32 s0, s0, s4
542; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
543; GFX6-NEXT:    s_or_b32 s0, s0, s1
544; GFX6-NEXT:    ; return to shader part epilog
545;
546; GFX8-LABEL: s_ashr_v2i16:
547; GFX8:       ; %bb.0:
548; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
549; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
550; GFX8-NEXT:    s_sext_i32_i16 s0, s0
551; GFX8-NEXT:    s_sext_i32_i16 s1, s1
552; GFX8-NEXT:    s_sext_i32_i16 s2, s2
553; GFX8-NEXT:    s_sext_i32_i16 s3, s3
554; GFX8-NEXT:    s_ashr_i32 s0, s0, s1
555; GFX8-NEXT:    s_ashr_i32 s1, s2, s3
556; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
557; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
558; GFX8-NEXT:    s_or_b32 s0, s1, s0
559; GFX8-NEXT:    ; return to shader part epilog
560;
561; GFX9-LABEL: s_ashr_v2i16:
562; GFX9:       ; %bb.0:
563; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
564; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
565; GFX9-NEXT:    s_ashr_i32 s0, s0, s1
566; GFX9-NEXT:    s_ashr_i32 s1, s2, s3
567; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
568; GFX9-NEXT:    ; return to shader part epilog
569  %result = ashr <2 x i16> %value, %amount
570  %cast = bitcast <2 x i16> %result to i32
571  ret i32 %cast
572}
573
574define amdgpu_ps float @ashr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
575; GFX6-LABEL: ashr_v2i16_sv:
576; GFX6:       ; %bb.0:
577; GFX6-NEXT:    s_mov_b32 s2, 0xffff
578; GFX6-NEXT:    v_and_b32_e32 v0, s2, v0
579; GFX6-NEXT:    s_sext_i32_i16 s0, s0
580; GFX6-NEXT:    v_ashr_i32_e32 v0, s0, v0
581; GFX6-NEXT:    v_and_b32_e32 v1, s2, v1
582; GFX6-NEXT:    s_sext_i32_i16 s0, s1
583; GFX6-NEXT:    v_ashr_i32_e32 v1, s0, v1
584; GFX6-NEXT:    v_and_b32_e32 v1, s2, v1
585; GFX6-NEXT:    v_and_b32_e32 v0, s2, v0
586; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
587; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
588; GFX6-NEXT:    ; return to shader part epilog
589;
590; GFX8-LABEL: ashr_v2i16_sv:
591; GFX8:       ; %bb.0:
592; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
593; GFX8-NEXT:    v_mov_b32_e32 v2, s1
594; GFX8-NEXT:    v_ashrrev_i16_e64 v1, v0, s0
595; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
596; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
597; GFX8-NEXT:    ; return to shader part epilog
598;
599; GFX9-LABEL: ashr_v2i16_sv:
600; GFX9:       ; %bb.0:
601; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v0, s0
602; GFX9-NEXT:    ; return to shader part epilog
603  %result = ashr <2 x i16> %value, %amount
604  %cast = bitcast <2 x i16> %result to float
605  ret float %cast
606}
607
608define amdgpu_ps float @ashr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
609; GFX6-LABEL: ashr_v2i16_vs:
610; GFX6:       ; %bb.0:
611; GFX6-NEXT:    s_mov_b32 s2, 0xffff
612; GFX6-NEXT:    s_and_b32 s0, s0, s2
613; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
614; GFX6-NEXT:    v_ashrrev_i32_e32 v0, s0, v0
615; GFX6-NEXT:    s_and_b32 s0, s1, s2
616; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
617; GFX6-NEXT:    v_ashrrev_i32_e32 v1, s0, v1
618; GFX6-NEXT:    v_and_b32_e32 v1, s2, v1
619; GFX6-NEXT:    v_and_b32_e32 v0, s2, v0
620; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
621; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
622; GFX6-NEXT:    ; return to shader part epilog
623;
624; GFX8-LABEL: ashr_v2i16_vs:
625; GFX8:       ; %bb.0:
626; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
627; GFX8-NEXT:    v_mov_b32_e32 v2, s1
628; GFX8-NEXT:    v_ashrrev_i16_e32 v1, s0, v0
629; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
630; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
631; GFX8-NEXT:    ; return to shader part epilog
632;
633; GFX9-LABEL: ashr_v2i16_vs:
634; GFX9:       ; %bb.0:
635; GFX9-NEXT:    v_pk_ashrrev_i16 v0, s0, v0
636; GFX9-NEXT:    ; return to shader part epilog
637  %result = ashr <2 x i16> %value, %amount
638  %cast = bitcast <2 x i16> %result to float
639  ret float %cast
640}
641
642; FIXME
643; define <3 x i16> @v_ashr_v3i16(<3 x i16> %value, <3 x i16> %amount) {
644;   %result = ashr <3 x i16> %value, %amount
645;   ret <3 x i16> %result
646; }
647
648; define amdgpu_ps <3 x i16> @s_ashr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
649;   %result = ashr <3 x i16> %value, %amount
650;   ret <3 x i16> %result
651; }
652
653define <2 x float> @v_ashr_v4i16(<4 x i16> %value, <4 x i16> %amount) {
654; GFX6-LABEL: v_ashr_v4i16:
655; GFX6:       ; %bb.0:
656; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657; GFX6-NEXT:    s_mov_b32 s4, 0xffff
658; GFX6-NEXT:    v_and_b32_e32 v4, s4, v4
659; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
660; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
661; GFX6-NEXT:    v_and_b32_e32 v4, s4, v5
662; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
663; GFX6-NEXT:    v_ashrrev_i32_e32 v1, v4, v1
664; GFX6-NEXT:    v_and_b32_e32 v4, s4, v6
665; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
666; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
667; GFX6-NEXT:    v_ashrrev_i32_e32 v2, v4, v2
668; GFX6-NEXT:    v_and_b32_e32 v4, s4, v7
669; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
670; GFX6-NEXT:    v_ashrrev_i32_e32 v3, v4, v3
671; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
672; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
673; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
674; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
675; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
676; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
677; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
678; GFX6-NEXT:    s_setpc_b64 s[30:31]
679;
680; GFX8-LABEL: v_ashr_v4i16:
681; GFX8:       ; %bb.0:
682; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v2, v0
684; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
685; GFX8-NEXT:    v_ashrrev_i16_e32 v2, v3, v1
686; GFX8-NEXT:    v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
687; GFX8-NEXT:    v_or_b32_e32 v0, v4, v0
688; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
689; GFX8-NEXT:    s_setpc_b64 s[30:31]
690;
691; GFX9-LABEL: v_ashr_v4i16:
692; GFX9:       ; %bb.0:
693; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v2, v0
695; GFX9-NEXT:    v_pk_ashrrev_i16 v1, v3, v1
696; GFX9-NEXT:    s_setpc_b64 s[30:31]
697  %result = ashr <4 x i16> %value, %amount
698  %cast = bitcast <4 x i16> %result to <2 x float>
699  ret <2 x float> %cast
700}
701
702define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
703; GFX6-LABEL: s_ashr_v4i16:
704; GFX6:       ; %bb.0:
705; GFX6-NEXT:    s_mov_b32 s8, 0xffff
706; GFX6-NEXT:    s_and_b32 s4, s4, s8
707; GFX6-NEXT:    s_sext_i32_i16 s0, s0
708; GFX6-NEXT:    s_ashr_i32 s0, s0, s4
709; GFX6-NEXT:    s_and_b32 s4, s5, s8
710; GFX6-NEXT:    s_sext_i32_i16 s1, s1
711; GFX6-NEXT:    s_ashr_i32 s1, s1, s4
712; GFX6-NEXT:    s_and_b32 s4, s6, s8
713; GFX6-NEXT:    s_sext_i32_i16 s2, s2
714; GFX6-NEXT:    s_and_b32 s1, s1, s8
715; GFX6-NEXT:    s_ashr_i32 s2, s2, s4
716; GFX6-NEXT:    s_and_b32 s4, s7, s8
717; GFX6-NEXT:    s_sext_i32_i16 s3, s3
718; GFX6-NEXT:    s_ashr_i32 s3, s3, s4
719; GFX6-NEXT:    s_and_b32 s0, s0, s8
720; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
721; GFX6-NEXT:    s_or_b32 s0, s0, s1
722; GFX6-NEXT:    s_and_b32 s1, s2, s8
723; GFX6-NEXT:    s_and_b32 s2, s3, s8
724; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
725; GFX6-NEXT:    s_or_b32 s1, s1, s2
726; GFX6-NEXT:    ; return to shader part epilog
727;
728; GFX8-LABEL: s_ashr_v4i16:
729; GFX8:       ; %bb.0:
730; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
731; GFX8-NEXT:    s_lshr_b32 s6, s2, 16
732; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
733; GFX8-NEXT:    s_lshr_b32 s7, s3, 16
734; GFX8-NEXT:    s_sext_i32_i16 s0, s0
735; GFX8-NEXT:    s_sext_i32_i16 s2, s2
736; GFX8-NEXT:    s_sext_i32_i16 s4, s4
737; GFX8-NEXT:    s_sext_i32_i16 s6, s6
738; GFX8-NEXT:    s_ashr_i32 s0, s0, s2
739; GFX8-NEXT:    s_ashr_i32 s2, s4, s6
740; GFX8-NEXT:    s_mov_b32 s4, 0xffff
741; GFX8-NEXT:    s_sext_i32_i16 s1, s1
742; GFX8-NEXT:    s_sext_i32_i16 s3, s3
743; GFX8-NEXT:    s_sext_i32_i16 s5, s5
744; GFX8-NEXT:    s_sext_i32_i16 s7, s7
745; GFX8-NEXT:    s_ashr_i32 s1, s1, s3
746; GFX8-NEXT:    s_ashr_i32 s3, s5, s7
747; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
748; GFX8-NEXT:    s_and_b32 s0, s0, s4
749; GFX8-NEXT:    s_or_b32 s0, s2, s0
750; GFX8-NEXT:    s_lshl_b32 s2, s3, 16
751; GFX8-NEXT:    s_and_b32 s1, s1, s4
752; GFX8-NEXT:    s_or_b32 s1, s2, s1
753; GFX8-NEXT:    ; return to shader part epilog
754;
755; GFX9-LABEL: s_ashr_v4i16:
756; GFX9:       ; %bb.0:
757; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
758; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
759; GFX9-NEXT:    s_ashr_i32 s0, s0, s2
760; GFX9-NEXT:    s_ashr_i32 s2, s4, s5
761; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
762; GFX9-NEXT:    s_lshr_b32 s2, s1, 16
763; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
764; GFX9-NEXT:    s_ashr_i32 s1, s1, s3
765; GFX9-NEXT:    s_ashr_i32 s2, s2, s4
766; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
767; GFX9-NEXT:    ; return to shader part epilog
768  %result = ashr <4 x i16> %value, %amount
769  %cast = bitcast <4 x i16> %result to <2 x i32>
770  ret <2 x i32> %cast
771}
772
773; FIXME
774; define <5 x i16> @v_ashr_v5i16(<5 x i16> %value, <5 x i16> %amount) {
775;   %result = ashr <5 x i16> %value, %amount
776;   ret <5 x i16> %result
777; }
778
779; define amdgpu_ps <5 x i16> @s_ashr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
780;   %result = ashr <5 x i16> %value, %amount
781;   ret <5 x i16> %result
782; }
783
784; define <3 x float> @v_ashr_v6i16(<6 x i16> %value, <6 x i16> %amount) {
785;   %result = ashr <6 x i16> %value, %amount
786;   %cast = bitcast <6 x i16> %result to <3 x float>
787;   ret <3 x float> %cast
788; }
789
790; define amdgpu_ps <3 x i32> @s_ashr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
791;   %result = ashr <6 x i16> %value, %amount
792;   %cast = bitcast <6 x i16> %result to <3 x i32>
793;   ret <3 x i32> %cast
794; }
795
796define <4 x float> @v_ashr_v8i16(<8 x i16> %value, <8 x i16> %amount) {
797; GFX6-LABEL: v_ashr_v8i16:
798; GFX6:       ; %bb.0:
799; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
800; GFX6-NEXT:    s_mov_b32 s4, 0xffff
801; GFX6-NEXT:    v_and_b32_e32 v8, s4, v8
802; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
803; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v8, v0
804; GFX6-NEXT:    v_and_b32_e32 v8, s4, v9
805; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
806; GFX6-NEXT:    v_ashrrev_i32_e32 v1, v8, v1
807; GFX6-NEXT:    v_and_b32_e32 v8, s4, v10
808; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
809; GFX6-NEXT:    v_ashrrev_i32_e32 v2, v8, v2
810; GFX6-NEXT:    v_and_b32_e32 v8, s4, v11
811; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
812; GFX6-NEXT:    v_mov_b32_e32 v16, 0xffff
813; GFX6-NEXT:    v_ashrrev_i32_e32 v3, v8, v3
814; GFX6-NEXT:    v_and_b32_e32 v8, s4, v12
815; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
816; GFX6-NEXT:    v_and_b32_e32 v1, v1, v16
817; GFX6-NEXT:    v_ashrrev_i32_e32 v4, v8, v4
818; GFX6-NEXT:    v_and_b32_e32 v8, s4, v13
819; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
820; GFX6-NEXT:    v_ashrrev_i32_e32 v5, v8, v5
821; GFX6-NEXT:    v_and_b32_e32 v8, s4, v14
822; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 16
823; GFX6-NEXT:    v_and_b32_e32 v0, v0, v16
824; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
825; GFX6-NEXT:    v_ashrrev_i32_e32 v6, v8, v6
826; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
827; GFX6-NEXT:    v_and_b32_e32 v1, v2, v16
828; GFX6-NEXT:    v_and_b32_e32 v2, v3, v16
829; GFX6-NEXT:    v_and_b32_e32 v8, v15, v16
830; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 16
831; GFX6-NEXT:    v_and_b32_e32 v3, v5, v16
832; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
833; GFX6-NEXT:    v_ashrrev_i32_e32 v7, v8, v7
834; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
835; GFX6-NEXT:    v_and_b32_e32 v2, v4, v16
836; GFX6-NEXT:    v_and_b32_e32 v4, v7, v16
837; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
838; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
839; GFX6-NEXT:    v_and_b32_e32 v3, v6, v16
840; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
841; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
842; GFX6-NEXT:    s_setpc_b64 s[30:31]
843;
844; GFX8-LABEL: v_ashr_v8i16:
845; GFX8:       ; %bb.0:
846; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847; GFX8-NEXT:    v_ashrrev_i16_e32 v8, v4, v0
848; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
849; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v5, v1
850; GFX8-NEXT:    v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
851; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
852; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v6, v2
853; GFX8-NEXT:    v_ashrrev_i16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
854; GFX8-NEXT:    v_or_b32_e32 v2, v4, v2
855; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v7, v3
856; GFX8-NEXT:    v_ashrrev_i16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
857; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
858; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
859; GFX8-NEXT:    s_setpc_b64 s[30:31]
860;
861; GFX9-LABEL: v_ashr_v8i16:
862; GFX9:       ; %bb.0:
863; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v4, v0
865; GFX9-NEXT:    v_pk_ashrrev_i16 v1, v5, v1
866; GFX9-NEXT:    v_pk_ashrrev_i16 v2, v6, v2
867; GFX9-NEXT:    v_pk_ashrrev_i16 v3, v7, v3
868; GFX9-NEXT:    s_setpc_b64 s[30:31]
869  %result = ashr <8 x i16> %value, %amount
870  %cast = bitcast <8 x i16> %result to <4 x float>
871  ret <4 x float> %cast
872}
873
874define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
875; GFX6-LABEL: s_ashr_v8i16:
876; GFX6:       ; %bb.0:
877; GFX6-NEXT:    s_mov_b32 s16, 0xffff
878; GFX6-NEXT:    s_and_b32 s8, s8, s16
879; GFX6-NEXT:    s_sext_i32_i16 s0, s0
880; GFX6-NEXT:    s_ashr_i32 s0, s0, s8
881; GFX6-NEXT:    s_and_b32 s8, s9, s16
882; GFX6-NEXT:    s_sext_i32_i16 s1, s1
883; GFX6-NEXT:    s_ashr_i32 s1, s1, s8
884; GFX6-NEXT:    s_and_b32 s8, s10, s16
885; GFX6-NEXT:    s_sext_i32_i16 s2, s2
886; GFX6-NEXT:    s_ashr_i32 s2, s2, s8
887; GFX6-NEXT:    s_and_b32 s8, s11, s16
888; GFX6-NEXT:    s_sext_i32_i16 s3, s3
889; GFX6-NEXT:    s_ashr_i32 s3, s3, s8
890; GFX6-NEXT:    s_and_b32 s8, s12, s16
891; GFX6-NEXT:    s_sext_i32_i16 s4, s4
892; GFX6-NEXT:    s_and_b32 s1, s1, s16
893; GFX6-NEXT:    s_ashr_i32 s4, s4, s8
894; GFX6-NEXT:    s_and_b32 s8, s13, s16
895; GFX6-NEXT:    s_sext_i32_i16 s5, s5
896; GFX6-NEXT:    s_ashr_i32 s5, s5, s8
897; GFX6-NEXT:    s_and_b32 s8, s14, s16
898; GFX6-NEXT:    s_sext_i32_i16 s6, s6
899; GFX6-NEXT:    s_and_b32 s0, s0, s16
900; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
901; GFX6-NEXT:    s_ashr_i32 s6, s6, s8
902; GFX6-NEXT:    s_or_b32 s0, s0, s1
903; GFX6-NEXT:    s_and_b32 s1, s2, s16
904; GFX6-NEXT:    s_and_b32 s2, s3, s16
905; GFX6-NEXT:    s_and_b32 s8, s15, s16
906; GFX6-NEXT:    s_sext_i32_i16 s7, s7
907; GFX6-NEXT:    s_and_b32 s3, s5, s16
908; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
909; GFX6-NEXT:    s_ashr_i32 s7, s7, s8
910; GFX6-NEXT:    s_or_b32 s1, s1, s2
911; GFX6-NEXT:    s_and_b32 s2, s4, s16
912; GFX6-NEXT:    s_and_b32 s4, s7, s16
913; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
914; GFX6-NEXT:    s_or_b32 s2, s2, s3
915; GFX6-NEXT:    s_and_b32 s3, s6, s16
916; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
917; GFX6-NEXT:    s_or_b32 s3, s3, s4
918; GFX6-NEXT:    ; return to shader part epilog
919;
920; GFX8-LABEL: s_ashr_v8i16:
921; GFX8:       ; %bb.0:
922; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
923; GFX8-NEXT:    s_lshr_b32 s12, s4, 16
924; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
925; GFX8-NEXT:    s_lshr_b32 s13, s5, 16
926; GFX8-NEXT:    s_sext_i32_i16 s0, s0
927; GFX8-NEXT:    s_sext_i32_i16 s4, s4
928; GFX8-NEXT:    s_sext_i32_i16 s8, s8
929; GFX8-NEXT:    s_sext_i32_i16 s12, s12
930; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
931; GFX8-NEXT:    s_lshr_b32 s14, s6, 16
932; GFX8-NEXT:    s_ashr_i32 s0, s0, s4
933; GFX8-NEXT:    s_ashr_i32 s4, s8, s12
934; GFX8-NEXT:    s_mov_b32 s8, 0xffff
935; GFX8-NEXT:    s_sext_i32_i16 s1, s1
936; GFX8-NEXT:    s_sext_i32_i16 s5, s5
937; GFX8-NEXT:    s_sext_i32_i16 s9, s9
938; GFX8-NEXT:    s_sext_i32_i16 s13, s13
939; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
940; GFX8-NEXT:    s_lshr_b32 s15, s7, 16
941; GFX8-NEXT:    s_ashr_i32 s1, s1, s5
942; GFX8-NEXT:    s_sext_i32_i16 s2, s2
943; GFX8-NEXT:    s_sext_i32_i16 s6, s6
944; GFX8-NEXT:    s_sext_i32_i16 s10, s10
945; GFX8-NEXT:    s_sext_i32_i16 s14, s14
946; GFX8-NEXT:    s_ashr_i32 s5, s9, s13
947; GFX8-NEXT:    s_lshl_b32 s4, s4, 16
948; GFX8-NEXT:    s_and_b32 s0, s0, s8
949; GFX8-NEXT:    s_ashr_i32 s2, s2, s6
950; GFX8-NEXT:    s_or_b32 s0, s4, s0
951; GFX8-NEXT:    s_sext_i32_i16 s3, s3
952; GFX8-NEXT:    s_sext_i32_i16 s7, s7
953; GFX8-NEXT:    s_sext_i32_i16 s11, s11
954; GFX8-NEXT:    s_sext_i32_i16 s15, s15
955; GFX8-NEXT:    s_ashr_i32 s6, s10, s14
956; GFX8-NEXT:    s_lshl_b32 s4, s5, 16
957; GFX8-NEXT:    s_and_b32 s1, s1, s8
958; GFX8-NEXT:    s_ashr_i32 s3, s3, s7
959; GFX8-NEXT:    s_or_b32 s1, s4, s1
960; GFX8-NEXT:    s_ashr_i32 s7, s11, s15
961; GFX8-NEXT:    s_lshl_b32 s4, s6, 16
962; GFX8-NEXT:    s_and_b32 s2, s2, s8
963; GFX8-NEXT:    s_or_b32 s2, s4, s2
964; GFX8-NEXT:    s_lshl_b32 s4, s7, 16
965; GFX8-NEXT:    s_and_b32 s3, s3, s8
966; GFX8-NEXT:    s_or_b32 s3, s4, s3
967; GFX8-NEXT:    ; return to shader part epilog
968;
969; GFX9-LABEL: s_ashr_v8i16:
970; GFX9:       ; %bb.0:
971; GFX9-NEXT:    s_lshr_b32 s8, s0, 16
972; GFX9-NEXT:    s_lshr_b32 s9, s4, 16
973; GFX9-NEXT:    s_ashr_i32 s0, s0, s4
974; GFX9-NEXT:    s_ashr_i32 s4, s8, s9
975; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
976; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
977; GFX9-NEXT:    s_lshr_b32 s8, s5, 16
978; GFX9-NEXT:    s_ashr_i32 s1, s1, s5
979; GFX9-NEXT:    s_ashr_i32 s4, s4, s8
980; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
981; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
982; GFX9-NEXT:    s_lshr_b32 s5, s6, 16
983; GFX9-NEXT:    s_ashr_i32 s4, s4, s5
984; GFX9-NEXT:    s_ashr_i32 s2, s2, s6
985; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
986; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
987; GFX9-NEXT:    s_lshr_b32 s5, s7, 16
988; GFX9-NEXT:    s_ashr_i32 s3, s3, s7
989; GFX9-NEXT:    s_ashr_i32 s4, s4, s5
990; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s3, s4
991; GFX9-NEXT:    ; return to shader part epilog
992  %result = ashr <8 x i16> %value, %amount
993  %cast = bitcast <8 x i16> %result to <4 x i32>
994  ret <4 x i32> %cast
995}
996
997define i64 @v_ashr_i64(i64 %value, i64 %amount) {
998; GFX6-LABEL: v_ashr_i64:
999; GFX6:       ; %bb.0:
1000; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], v2
1002; GFX6-NEXT:    s_setpc_b64 s[30:31]
1003;
1004; GFX8-LABEL: v_ashr_i64:
1005; GFX8:       ; %bb.0:
1006; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007; GFX8-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
1008; GFX8-NEXT:    s_setpc_b64 s[30:31]
1009;
1010; GFX9-LABEL: v_ashr_i64:
1011; GFX9:       ; %bb.0:
1012; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1013; GFX9-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
1014; GFX9-NEXT:    s_setpc_b64 s[30:31]
1015  %result = ashr i64 %value, %amount
1016  ret i64 %result
1017}
1018
1019define i64 @v_ashr_i64_63(i64 %value) {
1020; GCN-LABEL: v_ashr_i64_63:
1021; GCN:       ; %bb.0:
1022; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
1024; GCN-NEXT:    v_mov_b32_e32 v1, v0
1025; GCN-NEXT:    s_setpc_b64 s[30:31]
1026  %result = ashr i64 %value, 63
1027  ret i64 %result
1028}
1029
1030define i64 @v_ashr_i64_33(i64 %value) {
1031; GCN-LABEL: v_ashr_i64_33:
1032; GCN:       ; %bb.0:
1033; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1035; GCN-NEXT:    v_ashrrev_i32_e32 v0, 1, v1
1036; GCN-NEXT:    v_mov_b32_e32 v1, v2
1037; GCN-NEXT:    s_setpc_b64 s[30:31]
1038  %result = ashr i64 %value, 33
1039  ret i64 %result
1040}
1041
1042define i64 @v_ashr_i64_32(i64 %value) {
1043; GCN-LABEL: v_ashr_i64_32:
1044; GCN:       ; %bb.0:
1045; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046; GCN-NEXT:    v_mov_b32_e32 v0, v1
1047; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1048; GCN-NEXT:    s_setpc_b64 s[30:31]
1049  %result = ashr i64 %value, 32
1050  ret i64 %result
1051}
1052
1053define i64 @v_ashr_i64_31(i64 %value) {
1054; GFX6-LABEL: v_ashr_i64_31:
1055; GFX6:       ; %bb.0:
1056; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], 31
1058; GFX6-NEXT:    s_setpc_b64 s[30:31]
1059;
1060; GFX8-LABEL: v_ashr_i64_31:
1061; GFX8:       ; %bb.0:
1062; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063; GFX8-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1064; GFX8-NEXT:    s_setpc_b64 s[30:31]
1065;
1066; GFX9-LABEL: v_ashr_i64_31:
1067; GFX9:       ; %bb.0:
1068; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1070; GFX9-NEXT:    s_setpc_b64 s[30:31]
1071  %result = ashr i64 %value, 31
1072  ret i64 %result
1073}
1074
1075define amdgpu_ps i64 @s_ashr_i64(i64 inreg %value, i64 inreg %amount) {
1076; GCN-LABEL: s_ashr_i64:
1077; GCN:       ; %bb.0:
1078; GCN-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
1079; GCN-NEXT:    ; return to shader part epilog
1080  %result = ashr i64 %value, %amount
1081  ret i64 %result
1082}
1083
1084define amdgpu_ps i64 @s_ashr_i64_63(i64 inreg %value) {
1085; GCN-LABEL: s_ashr_i64_63:
1086; GCN:       ; %bb.0:
1087; GCN-NEXT:    s_ashr_i32 s0, s1, 31
1088; GCN-NEXT:    s_mov_b32 s1, s0
1089; GCN-NEXT:    ; return to shader part epilog
1090  %result = ashr i64 %value, 63
1091  ret i64 %result
1092}
1093
1094define amdgpu_ps i64 @s_ashr_i64_33(i64 inreg %value) {
1095; GCN-LABEL: s_ashr_i64_33:
1096; GCN:       ; %bb.0:
1097; GCN-NEXT:    s_ashr_i32 s2, s1, 31
1098; GCN-NEXT:    s_ashr_i32 s0, s1, 1
1099; GCN-NEXT:    s_mov_b32 s1, s2
1100; GCN-NEXT:    ; return to shader part epilog
1101  %result = ashr i64 %value, 33
1102  ret i64 %result
1103}
1104
1105define amdgpu_ps i64 @s_ashr_i64_32(i64 inreg %value) {
1106; GCN-LABEL: s_ashr_i64_32:
1107; GCN:       ; %bb.0:
1108; GCN-NEXT:    s_mov_b32 s0, s1
1109; GCN-NEXT:    s_ashr_i32 s1, s1, 31
1110; GCN-NEXT:    ; return to shader part epilog
1111  %result = ashr i64 %value, 32
1112  ret i64 %result
1113}
1114
1115define amdgpu_ps i64 @s_ashr_i64_31(i64 inreg %value) {
1116; GCN-LABEL: s_ashr_i64_31:
1117; GCN:       ; %bb.0:
1118; GCN-NEXT:    s_ashr_i64 s[0:1], s[0:1], 31
1119; GCN-NEXT:    ; return to shader part epilog
1120  %result = ashr i64 %value, 31
1121  ret i64 %result
1122}
1123
1124define amdgpu_ps <2 x float> @ashr_i64_sv(i64 inreg %value, i64 %amount) {
1125; GFX6-LABEL: ashr_i64_sv:
1126; GFX6:       ; %bb.0:
1127; GFX6-NEXT:    v_ashr_i64 v[0:1], s[0:1], v0
1128; GFX6-NEXT:    ; return to shader part epilog
1129;
1130; GFX8-LABEL: ashr_i64_sv:
1131; GFX8:       ; %bb.0:
1132; GFX8-NEXT:    v_ashrrev_i64 v[0:1], v0, s[0:1]
1133; GFX8-NEXT:    ; return to shader part epilog
1134;
1135; GFX9-LABEL: ashr_i64_sv:
1136; GFX9:       ; %bb.0:
1137; GFX9-NEXT:    v_ashrrev_i64 v[0:1], v0, s[0:1]
1138; GFX9-NEXT:    ; return to shader part epilog
1139  %result = ashr i64 %value, %amount
1140  %cast = bitcast i64 %result to <2 x float>
1141  ret <2 x float> %cast
1142}
1143
1144define amdgpu_ps <2 x float> @ashr_i64_vs(i64 %value, i64 inreg %amount) {
1145; GFX6-LABEL: ashr_i64_vs:
1146; GFX6:       ; %bb.0:
1147; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], s0
1148; GFX6-NEXT:    ; return to shader part epilog
1149;
1150; GFX8-LABEL: ashr_i64_vs:
1151; GFX8:       ; %bb.0:
1152; GFX8-NEXT:    v_ashrrev_i64 v[0:1], s0, v[0:1]
1153; GFX8-NEXT:    ; return to shader part epilog
1154;
1155; GFX9-LABEL: ashr_i64_vs:
1156; GFX9:       ; %bb.0:
1157; GFX9-NEXT:    v_ashrrev_i64 v[0:1], s0, v[0:1]
1158; GFX9-NEXT:    ; return to shader part epilog
1159  %result = ashr i64 %value, %amount
1160  %cast = bitcast i64 %result to <2 x float>
1161  ret <2 x float> %cast
1162}
1163
1164define <2 x i64> @v_ashr_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1165; GFX6-LABEL: v_ashr_v2i64:
1166; GFX6:       ; %bb.0:
1167; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], v4
1169; GFX6-NEXT:    v_ashr_i64 v[2:3], v[2:3], v6
1170; GFX6-NEXT:    s_setpc_b64 s[30:31]
1171;
1172; GFX8-LABEL: v_ashr_v2i64:
1173; GFX8:       ; %bb.0:
1174; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1175; GFX8-NEXT:    v_ashrrev_i64 v[0:1], v4, v[0:1]
1176; GFX8-NEXT:    v_ashrrev_i64 v[2:3], v6, v[2:3]
1177; GFX8-NEXT:    s_setpc_b64 s[30:31]
1178;
1179; GFX9-LABEL: v_ashr_v2i64:
1180; GFX9:       ; %bb.0:
1181; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1182; GFX9-NEXT:    v_ashrrev_i64 v[0:1], v4, v[0:1]
1183; GFX9-NEXT:    v_ashrrev_i64 v[2:3], v6, v[2:3]
1184; GFX9-NEXT:    s_setpc_b64 s[30:31]
1185  %result = ashr <2 x i64> %value, %amount
1186  ret <2 x i64> %result
1187}
1188
1189define <2 x i64> @v_ashr_v2i64_31(<2 x i64> %value) {
1190; GFX6-LABEL: v_ashr_v2i64_31:
1191; GFX6:       ; %bb.0:
1192; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1193; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], 31
1194; GFX6-NEXT:    v_ashr_i64 v[2:3], v[2:3], 31
1195; GFX6-NEXT:    s_setpc_b64 s[30:31]
1196;
1197; GFX8-LABEL: v_ashr_v2i64_31:
1198; GFX8:       ; %bb.0:
1199; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200; GFX8-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1201; GFX8-NEXT:    v_ashrrev_i64 v[2:3], 31, v[2:3]
1202; GFX8-NEXT:    s_setpc_b64 s[30:31]
1203;
1204; GFX9-LABEL: v_ashr_v2i64_31:
1205; GFX9:       ; %bb.0:
1206; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1207; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1208; GFX9-NEXT:    v_ashrrev_i64 v[2:3], 31, v[2:3]
1209; GFX9-NEXT:    s_setpc_b64 s[30:31]
1210  %result = ashr <2 x i64> %value, <i64 31, i64 31>
1211  ret <2 x i64> %result
1212}
1213
1214define amdgpu_ps <2 x i64> @s_ashr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1215; GCN-LABEL: s_ashr_v2i64:
1216; GCN:       ; %bb.0:
1217; GCN-NEXT:    s_ashr_i64 s[0:1], s[0:1], s4
1218; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], s6
1219; GCN-NEXT:    ; return to shader part epilog
1220  %result = ashr <2 x i64> %value, %amount
1221  ret <2 x i64> %result
1222}
1223