• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s
4
5define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
6; GFX6-LABEL: gather4_2d:
7; GFX6:       ; %bb.0: ; %main_body
8; GFX6-NEXT:    s_mov_b32 s0, s2
9; GFX6-NEXT:    s_mov_b32 s1, s3
10; GFX6-NEXT:    s_mov_b32 s2, s4
11; GFX6-NEXT:    s_mov_b32 s3, s5
12; GFX6-NEXT:    s_mov_b32 s4, s6
13; GFX6-NEXT:    s_mov_b32 s5, s7
14; GFX6-NEXT:    s_mov_b32 s6, s8
15; GFX6-NEXT:    s_mov_b32 s7, s9
16; GFX6-NEXT:    s_mov_b32 s8, s10
17; GFX6-NEXT:    s_mov_b32 s9, s11
18; GFX6-NEXT:    s_mov_b64 s[14:15], exec
19; GFX6-NEXT:    s_mov_b32 s10, s12
20; GFX6-NEXT:    s_mov_b32 s11, s13
21; GFX6-NEXT:    s_wqm_b64 exec, exec
22; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
23; GFX6-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1
24; GFX6-NEXT:    s_waitcnt vmcnt(0)
25; GFX6-NEXT:    ; return to shader part epilog
26;
27; GFX10NSA-LABEL: gather4_2d:
28; GFX10NSA:       ; %bb.0: ; %main_body
29; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
30; GFX10NSA-NEXT:    s_mov_b32 s0, s2
31; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
32; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
33; GFX10NSA-NEXT:    s_mov_b32 s1, s3
34; GFX10NSA-NEXT:    s_mov_b32 s2, s4
35; GFX10NSA-NEXT:    s_mov_b32 s3, s5
36; GFX10NSA-NEXT:    s_mov_b32 s4, s6
37; GFX10NSA-NEXT:    s_mov_b32 s5, s7
38; GFX10NSA-NEXT:    s_mov_b32 s6, s8
39; GFX10NSA-NEXT:    s_mov_b32 s7, s9
40; GFX10NSA-NEXT:    s_mov_b32 s8, s10
41; GFX10NSA-NEXT:    s_mov_b32 s9, s11
42; GFX10NSA-NEXT:    s_mov_b32 s10, s12
43; GFX10NSA-NEXT:    s_mov_b32 s11, s13
44; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
45; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
46; GFX10NSA-NEXT:    ; return to shader part epilog
47main_body:
48  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
49  ret <4 x float> %v
50}
51
52define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
53; GFX6-LABEL: gather4_cube:
54; GFX6:       ; %bb.0: ; %main_body
55; GFX6-NEXT:    s_mov_b32 s0, s2
56; GFX6-NEXT:    s_mov_b32 s1, s3
57; GFX6-NEXT:    s_mov_b32 s2, s4
58; GFX6-NEXT:    s_mov_b32 s3, s5
59; GFX6-NEXT:    s_mov_b32 s4, s6
60; GFX6-NEXT:    s_mov_b32 s5, s7
61; GFX6-NEXT:    s_mov_b32 s6, s8
62; GFX6-NEXT:    s_mov_b32 s7, s9
63; GFX6-NEXT:    s_mov_b32 s8, s10
64; GFX6-NEXT:    s_mov_b32 s9, s11
65; GFX6-NEXT:    s_mov_b64 s[14:15], exec
66; GFX6-NEXT:    s_mov_b32 s10, s12
67; GFX6-NEXT:    s_mov_b32 s11, s13
68; GFX6-NEXT:    s_wqm_b64 exec, exec
69; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
70; GFX6-NEXT:    image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da
71; GFX6-NEXT:    s_waitcnt vmcnt(0)
72; GFX6-NEXT:    ; return to shader part epilog
73;
74; GFX10NSA-LABEL: gather4_cube:
75; GFX10NSA:       ; %bb.0: ; %main_body
76; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
77; GFX10NSA-NEXT:    s_mov_b32 s0, s2
78; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
79; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
80; GFX10NSA-NEXT:    s_mov_b32 s1, s3
81; GFX10NSA-NEXT:    s_mov_b32 s2, s4
82; GFX10NSA-NEXT:    s_mov_b32 s3, s5
83; GFX10NSA-NEXT:    s_mov_b32 s4, s6
84; GFX10NSA-NEXT:    s_mov_b32 s5, s7
85; GFX10NSA-NEXT:    s_mov_b32 s6, s8
86; GFX10NSA-NEXT:    s_mov_b32 s7, s9
87; GFX10NSA-NEXT:    s_mov_b32 s8, s10
88; GFX10NSA-NEXT:    s_mov_b32 s9, s11
89; GFX10NSA-NEXT:    s_mov_b32 s10, s12
90; GFX10NSA-NEXT:    s_mov_b32 s11, s13
91; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE
92; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
93; GFX10NSA-NEXT:    ; return to shader part epilog
94main_body:
95  %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
96  ret <4 x float> %v
97}
98
99define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
100; GFX6-LABEL: gather4_2darray:
101; GFX6:       ; %bb.0: ; %main_body
102; GFX6-NEXT:    s_mov_b32 s0, s2
103; GFX6-NEXT:    s_mov_b32 s1, s3
104; GFX6-NEXT:    s_mov_b32 s2, s4
105; GFX6-NEXT:    s_mov_b32 s3, s5
106; GFX6-NEXT:    s_mov_b32 s4, s6
107; GFX6-NEXT:    s_mov_b32 s5, s7
108; GFX6-NEXT:    s_mov_b32 s6, s8
109; GFX6-NEXT:    s_mov_b32 s7, s9
110; GFX6-NEXT:    s_mov_b32 s8, s10
111; GFX6-NEXT:    s_mov_b32 s9, s11
112; GFX6-NEXT:    s_mov_b64 s[14:15], exec
113; GFX6-NEXT:    s_mov_b32 s10, s12
114; GFX6-NEXT:    s_mov_b32 s11, s13
115; GFX6-NEXT:    s_wqm_b64 exec, exec
116; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
117; GFX6-NEXT:    image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da
118; GFX6-NEXT:    s_waitcnt vmcnt(0)
119; GFX6-NEXT:    ; return to shader part epilog
120;
121; GFX10NSA-LABEL: gather4_2darray:
122; GFX10NSA:       ; %bb.0: ; %main_body
123; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
124; GFX10NSA-NEXT:    s_mov_b32 s0, s2
125; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
126; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
127; GFX10NSA-NEXT:    s_mov_b32 s1, s3
128; GFX10NSA-NEXT:    s_mov_b32 s2, s4
129; GFX10NSA-NEXT:    s_mov_b32 s3, s5
130; GFX10NSA-NEXT:    s_mov_b32 s4, s6
131; GFX10NSA-NEXT:    s_mov_b32 s5, s7
132; GFX10NSA-NEXT:    s_mov_b32 s6, s8
133; GFX10NSA-NEXT:    s_mov_b32 s7, s9
134; GFX10NSA-NEXT:    s_mov_b32 s8, s10
135; GFX10NSA-NEXT:    s_mov_b32 s9, s11
136; GFX10NSA-NEXT:    s_mov_b32 s10, s12
137; GFX10NSA-NEXT:    s_mov_b32 s11, s13
138; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
139; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
140; GFX10NSA-NEXT:    ; return to shader part epilog
141main_body:
142  %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
143  ret <4 x float> %v
144}
145
146define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
147; GFX6-LABEL: gather4_c_2d:
148; GFX6:       ; %bb.0: ; %main_body
149; GFX6-NEXT:    s_mov_b32 s0, s2
150; GFX6-NEXT:    s_mov_b32 s1, s3
151; GFX6-NEXT:    s_mov_b32 s2, s4
152; GFX6-NEXT:    s_mov_b32 s3, s5
153; GFX6-NEXT:    s_mov_b32 s4, s6
154; GFX6-NEXT:    s_mov_b32 s5, s7
155; GFX6-NEXT:    s_mov_b32 s6, s8
156; GFX6-NEXT:    s_mov_b32 s7, s9
157; GFX6-NEXT:    s_mov_b32 s8, s10
158; GFX6-NEXT:    s_mov_b32 s9, s11
159; GFX6-NEXT:    s_mov_b64 s[14:15], exec
160; GFX6-NEXT:    s_mov_b32 s10, s12
161; GFX6-NEXT:    s_mov_b32 s11, s13
162; GFX6-NEXT:    s_wqm_b64 exec, exec
163; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
164; GFX6-NEXT:    image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
165; GFX6-NEXT:    s_waitcnt vmcnt(0)
166; GFX6-NEXT:    ; return to shader part epilog
167;
168; GFX10NSA-LABEL: gather4_c_2d:
169; GFX10NSA:       ; %bb.0: ; %main_body
170; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
171; GFX10NSA-NEXT:    s_mov_b32 s0, s2
172; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
173; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
174; GFX10NSA-NEXT:    s_mov_b32 s1, s3
175; GFX10NSA-NEXT:    s_mov_b32 s2, s4
176; GFX10NSA-NEXT:    s_mov_b32 s3, s5
177; GFX10NSA-NEXT:    s_mov_b32 s4, s6
178; GFX10NSA-NEXT:    s_mov_b32 s5, s7
179; GFX10NSA-NEXT:    s_mov_b32 s6, s8
180; GFX10NSA-NEXT:    s_mov_b32 s7, s9
181; GFX10NSA-NEXT:    s_mov_b32 s8, s10
182; GFX10NSA-NEXT:    s_mov_b32 s9, s11
183; GFX10NSA-NEXT:    s_mov_b32 s10, s12
184; GFX10NSA-NEXT:    s_mov_b32 s11, s13
185; GFX10NSA-NEXT:    image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
186; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
187; GFX10NSA-NEXT:    ; return to shader part epilog
188main_body:
189  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
190  ret <4 x float> %v
191}
192
193define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
194; GFX6-LABEL: gather4_cl_2d:
195; GFX6:       ; %bb.0: ; %main_body
196; GFX6-NEXT:    s_mov_b32 s0, s2
197; GFX6-NEXT:    s_mov_b32 s1, s3
198; GFX6-NEXT:    s_mov_b32 s2, s4
199; GFX6-NEXT:    s_mov_b32 s3, s5
200; GFX6-NEXT:    s_mov_b32 s4, s6
201; GFX6-NEXT:    s_mov_b32 s5, s7
202; GFX6-NEXT:    s_mov_b32 s6, s8
203; GFX6-NEXT:    s_mov_b32 s7, s9
204; GFX6-NEXT:    s_mov_b32 s8, s10
205; GFX6-NEXT:    s_mov_b32 s9, s11
206; GFX6-NEXT:    s_mov_b64 s[14:15], exec
207; GFX6-NEXT:    s_mov_b32 s10, s12
208; GFX6-NEXT:    s_mov_b32 s11, s13
209; GFX6-NEXT:    s_wqm_b64 exec, exec
210; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
211; GFX6-NEXT:    image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
212; GFX6-NEXT:    s_waitcnt vmcnt(0)
213; GFX6-NEXT:    ; return to shader part epilog
214;
215; GFX10NSA-LABEL: gather4_cl_2d:
216; GFX10NSA:       ; %bb.0: ; %main_body
217; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
218; GFX10NSA-NEXT:    s_mov_b32 s0, s2
219; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
220; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
221; GFX10NSA-NEXT:    s_mov_b32 s1, s3
222; GFX10NSA-NEXT:    s_mov_b32 s2, s4
223; GFX10NSA-NEXT:    s_mov_b32 s3, s5
224; GFX10NSA-NEXT:    s_mov_b32 s4, s6
225; GFX10NSA-NEXT:    s_mov_b32 s5, s7
226; GFX10NSA-NEXT:    s_mov_b32 s6, s8
227; GFX10NSA-NEXT:    s_mov_b32 s7, s9
228; GFX10NSA-NEXT:    s_mov_b32 s8, s10
229; GFX10NSA-NEXT:    s_mov_b32 s9, s11
230; GFX10NSA-NEXT:    s_mov_b32 s10, s12
231; GFX10NSA-NEXT:    s_mov_b32 s11, s13
232; GFX10NSA-NEXT:    image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
233; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
234; GFX10NSA-NEXT:    ; return to shader part epilog
235main_body:
236  %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
237  ret <4 x float> %v
238}
239
240define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
241; GFX6-LABEL: gather4_c_cl_2d:
242; GFX6:       ; %bb.0: ; %main_body
243; GFX6-NEXT:    s_mov_b32 s0, s2
244; GFX6-NEXT:    s_mov_b32 s1, s3
245; GFX6-NEXT:    s_mov_b32 s2, s4
246; GFX6-NEXT:    s_mov_b32 s3, s5
247; GFX6-NEXT:    s_mov_b32 s4, s6
248; GFX6-NEXT:    s_mov_b32 s5, s7
249; GFX6-NEXT:    s_mov_b32 s6, s8
250; GFX6-NEXT:    s_mov_b32 s7, s9
251; GFX6-NEXT:    s_mov_b32 s8, s10
252; GFX6-NEXT:    s_mov_b32 s9, s11
253; GFX6-NEXT:    s_mov_b64 s[14:15], exec
254; GFX6-NEXT:    s_mov_b32 s10, s12
255; GFX6-NEXT:    s_mov_b32 s11, s13
256; GFX6-NEXT:    s_wqm_b64 exec, exec
257; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
258; GFX6-NEXT:    image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
259; GFX6-NEXT:    s_waitcnt vmcnt(0)
260; GFX6-NEXT:    ; return to shader part epilog
261;
262; GFX10NSA-LABEL: gather4_c_cl_2d:
263; GFX10NSA:       ; %bb.0: ; %main_body
264; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
265; GFX10NSA-NEXT:    s_mov_b32 s0, s2
266; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
267; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
268; GFX10NSA-NEXT:    s_mov_b32 s1, s3
269; GFX10NSA-NEXT:    s_mov_b32 s2, s4
270; GFX10NSA-NEXT:    s_mov_b32 s3, s5
271; GFX10NSA-NEXT:    s_mov_b32 s4, s6
272; GFX10NSA-NEXT:    s_mov_b32 s5, s7
273; GFX10NSA-NEXT:    s_mov_b32 s6, s8
274; GFX10NSA-NEXT:    s_mov_b32 s7, s9
275; GFX10NSA-NEXT:    s_mov_b32 s8, s10
276; GFX10NSA-NEXT:    s_mov_b32 s9, s11
277; GFX10NSA-NEXT:    s_mov_b32 s10, s12
278; GFX10NSA-NEXT:    s_mov_b32 s11, s13
279; GFX10NSA-NEXT:    image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
280; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
281; GFX10NSA-NEXT:    ; return to shader part epilog
282main_body:
283  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
284  ret <4 x float> %v
285}
286
287define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
288; GFX6-LABEL: gather4_b_2d:
289; GFX6:       ; %bb.0: ; %main_body
290; GFX6-NEXT:    s_mov_b32 s0, s2
291; GFX6-NEXT:    s_mov_b32 s1, s3
292; GFX6-NEXT:    s_mov_b32 s2, s4
293; GFX6-NEXT:    s_mov_b32 s3, s5
294; GFX6-NEXT:    s_mov_b32 s4, s6
295; GFX6-NEXT:    s_mov_b32 s5, s7
296; GFX6-NEXT:    s_mov_b32 s6, s8
297; GFX6-NEXT:    s_mov_b32 s7, s9
298; GFX6-NEXT:    s_mov_b32 s8, s10
299; GFX6-NEXT:    s_mov_b32 s9, s11
300; GFX6-NEXT:    s_mov_b64 s[14:15], exec
301; GFX6-NEXT:    s_mov_b32 s10, s12
302; GFX6-NEXT:    s_mov_b32 s11, s13
303; GFX6-NEXT:    s_wqm_b64 exec, exec
304; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
305; GFX6-NEXT:    image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
306; GFX6-NEXT:    s_waitcnt vmcnt(0)
307; GFX6-NEXT:    ; return to shader part epilog
308;
309; GFX10NSA-LABEL: gather4_b_2d:
310; GFX10NSA:       ; %bb.0: ; %main_body
311; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
312; GFX10NSA-NEXT:    s_mov_b32 s0, s2
313; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
314; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
315; GFX10NSA-NEXT:    s_mov_b32 s1, s3
316; GFX10NSA-NEXT:    s_mov_b32 s2, s4
317; GFX10NSA-NEXT:    s_mov_b32 s3, s5
318; GFX10NSA-NEXT:    s_mov_b32 s4, s6
319; GFX10NSA-NEXT:    s_mov_b32 s5, s7
320; GFX10NSA-NEXT:    s_mov_b32 s6, s8
321; GFX10NSA-NEXT:    s_mov_b32 s7, s9
322; GFX10NSA-NEXT:    s_mov_b32 s8, s10
323; GFX10NSA-NEXT:    s_mov_b32 s9, s11
324; GFX10NSA-NEXT:    s_mov_b32 s10, s12
325; GFX10NSA-NEXT:    s_mov_b32 s11, s13
326; GFX10NSA-NEXT:    image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
327; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
328; GFX10NSA-NEXT:    ; return to shader part epilog
329main_body:
330  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
331  ret <4 x float> %v
332}
333
334define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
335; GFX6-LABEL: gather4_c_b_2d:
336; GFX6:       ; %bb.0: ; %main_body
337; GFX6-NEXT:    s_mov_b32 s0, s2
338; GFX6-NEXT:    s_mov_b32 s1, s3
339; GFX6-NEXT:    s_mov_b32 s2, s4
340; GFX6-NEXT:    s_mov_b32 s3, s5
341; GFX6-NEXT:    s_mov_b32 s4, s6
342; GFX6-NEXT:    s_mov_b32 s5, s7
343; GFX6-NEXT:    s_mov_b32 s6, s8
344; GFX6-NEXT:    s_mov_b32 s7, s9
345; GFX6-NEXT:    s_mov_b32 s8, s10
346; GFX6-NEXT:    s_mov_b32 s9, s11
347; GFX6-NEXT:    s_mov_b64 s[14:15], exec
348; GFX6-NEXT:    s_mov_b32 s10, s12
349; GFX6-NEXT:    s_mov_b32 s11, s13
350; GFX6-NEXT:    s_wqm_b64 exec, exec
351; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
352; GFX6-NEXT:    image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
353; GFX6-NEXT:    s_waitcnt vmcnt(0)
354; GFX6-NEXT:    ; return to shader part epilog
355;
356; GFX10NSA-LABEL: gather4_c_b_2d:
357; GFX10NSA:       ; %bb.0: ; %main_body
358; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
359; GFX10NSA-NEXT:    s_mov_b32 s0, s2
360; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
361; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
362; GFX10NSA-NEXT:    s_mov_b32 s1, s3
363; GFX10NSA-NEXT:    s_mov_b32 s2, s4
364; GFX10NSA-NEXT:    s_mov_b32 s3, s5
365; GFX10NSA-NEXT:    s_mov_b32 s4, s6
366; GFX10NSA-NEXT:    s_mov_b32 s5, s7
367; GFX10NSA-NEXT:    s_mov_b32 s6, s8
368; GFX10NSA-NEXT:    s_mov_b32 s7, s9
369; GFX10NSA-NEXT:    s_mov_b32 s8, s10
370; GFX10NSA-NEXT:    s_mov_b32 s9, s11
371; GFX10NSA-NEXT:    s_mov_b32 s10, s12
372; GFX10NSA-NEXT:    s_mov_b32 s11, s13
373; GFX10NSA-NEXT:    image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
374; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
375; GFX10NSA-NEXT:    ; return to shader part epilog
376main_body:
377  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
378  ret <4 x float> %v
379}
380
381define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
382; GFX6-LABEL: gather4_b_cl_2d:
383; GFX6:       ; %bb.0: ; %main_body
384; GFX6-NEXT:    s_mov_b32 s0, s2
385; GFX6-NEXT:    s_mov_b32 s1, s3
386; GFX6-NEXT:    s_mov_b32 s2, s4
387; GFX6-NEXT:    s_mov_b32 s3, s5
388; GFX6-NEXT:    s_mov_b32 s4, s6
389; GFX6-NEXT:    s_mov_b32 s5, s7
390; GFX6-NEXT:    s_mov_b32 s6, s8
391; GFX6-NEXT:    s_mov_b32 s7, s9
392; GFX6-NEXT:    s_mov_b32 s8, s10
393; GFX6-NEXT:    s_mov_b32 s9, s11
394; GFX6-NEXT:    s_mov_b64 s[14:15], exec
395; GFX6-NEXT:    s_mov_b32 s10, s12
396; GFX6-NEXT:    s_mov_b32 s11, s13
397; GFX6-NEXT:    s_wqm_b64 exec, exec
398; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
399; GFX6-NEXT:    image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
400; GFX6-NEXT:    s_waitcnt vmcnt(0)
401; GFX6-NEXT:    ; return to shader part epilog
402;
403; GFX10NSA-LABEL: gather4_b_cl_2d:
404; GFX10NSA:       ; %bb.0: ; %main_body
405; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
406; GFX10NSA-NEXT:    s_mov_b32 s0, s2
407; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
408; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
409; GFX10NSA-NEXT:    s_mov_b32 s1, s3
410; GFX10NSA-NEXT:    s_mov_b32 s2, s4
411; GFX10NSA-NEXT:    s_mov_b32 s3, s5
412; GFX10NSA-NEXT:    s_mov_b32 s4, s6
413; GFX10NSA-NEXT:    s_mov_b32 s5, s7
414; GFX10NSA-NEXT:    s_mov_b32 s6, s8
415; GFX10NSA-NEXT:    s_mov_b32 s7, s9
416; GFX10NSA-NEXT:    s_mov_b32 s8, s10
417; GFX10NSA-NEXT:    s_mov_b32 s9, s11
418; GFX10NSA-NEXT:    s_mov_b32 s10, s12
419; GFX10NSA-NEXT:    s_mov_b32 s11, s13
420; GFX10NSA-NEXT:    image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
421; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
422; GFX10NSA-NEXT:    ; return to shader part epilog
423main_body:
424  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
425  ret <4 x float> %v
426}
427
428define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
429; GFX6-LABEL: gather4_c_b_cl_2d:
430; GFX6:       ; %bb.0: ; %main_body
431; GFX6-NEXT:    s_mov_b32 s0, s2
432; GFX6-NEXT:    s_mov_b32 s1, s3
433; GFX6-NEXT:    s_mov_b32 s2, s4
434; GFX6-NEXT:    s_mov_b32 s3, s5
435; GFX6-NEXT:    s_mov_b32 s4, s6
436; GFX6-NEXT:    s_mov_b32 s5, s7
437; GFX6-NEXT:    s_mov_b32 s6, s8
438; GFX6-NEXT:    s_mov_b32 s7, s9
439; GFX6-NEXT:    s_mov_b32 s8, s10
440; GFX6-NEXT:    s_mov_b32 s9, s11
441; GFX6-NEXT:    s_mov_b64 s[14:15], exec
442; GFX6-NEXT:    s_mov_b32 s10, s12
443; GFX6-NEXT:    s_mov_b32 s11, s13
444; GFX6-NEXT:    s_wqm_b64 exec, exec
445; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
446; GFX6-NEXT:    image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1
447; GFX6-NEXT:    s_waitcnt vmcnt(0)
448; GFX6-NEXT:    ; return to shader part epilog
449;
450; GFX10NSA-LABEL: gather4_c_b_cl_2d:
451; GFX10NSA:       ; %bb.0: ; %main_body
452; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
453; GFX10NSA-NEXT:    s_mov_b32 s0, s2
454; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
455; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
456; GFX10NSA-NEXT:    s_mov_b32 s1, s3
457; GFX10NSA-NEXT:    s_mov_b32 s2, s4
458; GFX10NSA-NEXT:    s_mov_b32 s3, s5
459; GFX10NSA-NEXT:    s_mov_b32 s4, s6
460; GFX10NSA-NEXT:    s_mov_b32 s5, s7
461; GFX10NSA-NEXT:    s_mov_b32 s6, s8
462; GFX10NSA-NEXT:    s_mov_b32 s7, s9
463; GFX10NSA-NEXT:    s_mov_b32 s8, s10
464; GFX10NSA-NEXT:    s_mov_b32 s9, s11
465; GFX10NSA-NEXT:    s_mov_b32 s10, s12
466; GFX10NSA-NEXT:    s_mov_b32 s11, s13
467; GFX10NSA-NEXT:    image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
468; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
469; GFX10NSA-NEXT:    ; return to shader part epilog
470main_body:
471  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
472  ret <4 x float> %v
473}
474
475define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
476; GFX6-LABEL: gather4_l_2d:
477; GFX6:       ; %bb.0: ; %main_body
478; GFX6-NEXT:    s_mov_b32 s0, s2
479; GFX6-NEXT:    s_mov_b32 s1, s3
480; GFX6-NEXT:    s_mov_b32 s2, s4
481; GFX6-NEXT:    s_mov_b32 s3, s5
482; GFX6-NEXT:    s_mov_b32 s4, s6
483; GFX6-NEXT:    s_mov_b32 s5, s7
484; GFX6-NEXT:    s_mov_b32 s6, s8
485; GFX6-NEXT:    s_mov_b32 s7, s9
486; GFX6-NEXT:    s_mov_b32 s8, s10
487; GFX6-NEXT:    s_mov_b32 s9, s11
488; GFX6-NEXT:    s_mov_b32 s10, s12
489; GFX6-NEXT:    s_mov_b32 s11, s13
490; GFX6-NEXT:    image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
491; GFX6-NEXT:    s_waitcnt vmcnt(0)
492; GFX6-NEXT:    ; return to shader part epilog
493;
494; GFX10NSA-LABEL: gather4_l_2d:
495; GFX10NSA:       ; %bb.0: ; %main_body
496; GFX10NSA-NEXT:    s_mov_b32 s0, s2
497; GFX10NSA-NEXT:    s_mov_b32 s1, s3
498; GFX10NSA-NEXT:    s_mov_b32 s2, s4
499; GFX10NSA-NEXT:    s_mov_b32 s3, s5
500; GFX10NSA-NEXT:    s_mov_b32 s4, s6
501; GFX10NSA-NEXT:    s_mov_b32 s5, s7
502; GFX10NSA-NEXT:    s_mov_b32 s6, s8
503; GFX10NSA-NEXT:    s_mov_b32 s7, s9
504; GFX10NSA-NEXT:    s_mov_b32 s8, s10
505; GFX10NSA-NEXT:    s_mov_b32 s9, s11
506; GFX10NSA-NEXT:    s_mov_b32 s10, s12
507; GFX10NSA-NEXT:    s_mov_b32 s11, s13
508; GFX10NSA-NEXT:    image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
509; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
510; GFX10NSA-NEXT:    ; return to shader part epilog
511main_body:
512  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
513  ret <4 x float> %v
514}
515
516define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
517; GFX6-LABEL: gather4_c_l_2d:
518; GFX6:       ; %bb.0: ; %main_body
519; GFX6-NEXT:    s_mov_b32 s0, s2
520; GFX6-NEXT:    s_mov_b32 s1, s3
521; GFX6-NEXT:    s_mov_b32 s2, s4
522; GFX6-NEXT:    s_mov_b32 s3, s5
523; GFX6-NEXT:    s_mov_b32 s4, s6
524; GFX6-NEXT:    s_mov_b32 s5, s7
525; GFX6-NEXT:    s_mov_b32 s6, s8
526; GFX6-NEXT:    s_mov_b32 s7, s9
527; GFX6-NEXT:    s_mov_b32 s8, s10
528; GFX6-NEXT:    s_mov_b32 s9, s11
529; GFX6-NEXT:    s_mov_b32 s10, s12
530; GFX6-NEXT:    s_mov_b32 s11, s13
531; GFX6-NEXT:    image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
532; GFX6-NEXT:    s_waitcnt vmcnt(0)
533; GFX6-NEXT:    ; return to shader part epilog
534;
535; GFX10NSA-LABEL: gather4_c_l_2d:
536; GFX10NSA:       ; %bb.0: ; %main_body
537; GFX10NSA-NEXT:    s_mov_b32 s0, s2
538; GFX10NSA-NEXT:    s_mov_b32 s1, s3
539; GFX10NSA-NEXT:    s_mov_b32 s2, s4
540; GFX10NSA-NEXT:    s_mov_b32 s3, s5
541; GFX10NSA-NEXT:    s_mov_b32 s4, s6
542; GFX10NSA-NEXT:    s_mov_b32 s5, s7
543; GFX10NSA-NEXT:    s_mov_b32 s6, s8
544; GFX10NSA-NEXT:    s_mov_b32 s7, s9
545; GFX10NSA-NEXT:    s_mov_b32 s8, s10
546; GFX10NSA-NEXT:    s_mov_b32 s9, s11
547; GFX10NSA-NEXT:    s_mov_b32 s10, s12
548; GFX10NSA-NEXT:    s_mov_b32 s11, s13
549; GFX10NSA-NEXT:    image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
550; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
551; GFX10NSA-NEXT:    ; return to shader part epilog
552main_body:
553  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
554  ret <4 x float> %v
555}
556
557define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
558; GFX6-LABEL: gather4_lz_2d:
559; GFX6:       ; %bb.0: ; %main_body
560; GFX6-NEXT:    s_mov_b32 s0, s2
561; GFX6-NEXT:    s_mov_b32 s1, s3
562; GFX6-NEXT:    s_mov_b32 s2, s4
563; GFX6-NEXT:    s_mov_b32 s3, s5
564; GFX6-NEXT:    s_mov_b32 s4, s6
565; GFX6-NEXT:    s_mov_b32 s5, s7
566; GFX6-NEXT:    s_mov_b32 s6, s8
567; GFX6-NEXT:    s_mov_b32 s7, s9
568; GFX6-NEXT:    s_mov_b32 s8, s10
569; GFX6-NEXT:    s_mov_b32 s9, s11
570; GFX6-NEXT:    s_mov_b32 s10, s12
571; GFX6-NEXT:    s_mov_b32 s11, s13
572; GFX6-NEXT:    image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1
573; GFX6-NEXT:    s_waitcnt vmcnt(0)
574; GFX6-NEXT:    ; return to shader part epilog
575;
576; GFX10NSA-LABEL: gather4_lz_2d:
577; GFX10NSA:       ; %bb.0: ; %main_body
578; GFX10NSA-NEXT:    s_mov_b32 s0, s2
579; GFX10NSA-NEXT:    s_mov_b32 s1, s3
580; GFX10NSA-NEXT:    s_mov_b32 s2, s4
581; GFX10NSA-NEXT:    s_mov_b32 s3, s5
582; GFX10NSA-NEXT:    s_mov_b32 s4, s6
583; GFX10NSA-NEXT:    s_mov_b32 s5, s7
584; GFX10NSA-NEXT:    s_mov_b32 s6, s8
585; GFX10NSA-NEXT:    s_mov_b32 s7, s9
586; GFX10NSA-NEXT:    s_mov_b32 s8, s10
587; GFX10NSA-NEXT:    s_mov_b32 s9, s11
588; GFX10NSA-NEXT:    s_mov_b32 s10, s12
589; GFX10NSA-NEXT:    s_mov_b32 s11, s13
590; GFX10NSA-NEXT:    image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
591; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
592; GFX10NSA-NEXT:    ; return to shader part epilog
593main_body:
594  %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
595  ret <4 x float> %v
596}
597
598define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
599; GFX6-LABEL: gather4_c_lz_2d:
600; GFX6:       ; %bb.0: ; %main_body
601; GFX6-NEXT:    s_mov_b32 s0, s2
602; GFX6-NEXT:    s_mov_b32 s1, s3
603; GFX6-NEXT:    s_mov_b32 s2, s4
604; GFX6-NEXT:    s_mov_b32 s3, s5
605; GFX6-NEXT:    s_mov_b32 s4, s6
606; GFX6-NEXT:    s_mov_b32 s5, s7
607; GFX6-NEXT:    s_mov_b32 s6, s8
608; GFX6-NEXT:    s_mov_b32 s7, s9
609; GFX6-NEXT:    s_mov_b32 s8, s10
610; GFX6-NEXT:    s_mov_b32 s9, s11
611; GFX6-NEXT:    s_mov_b32 s10, s12
612; GFX6-NEXT:    s_mov_b32 s11, s13
613; GFX6-NEXT:    image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
614; GFX6-NEXT:    s_waitcnt vmcnt(0)
615; GFX6-NEXT:    ; return to shader part epilog
616;
617; GFX10NSA-LABEL: gather4_c_lz_2d:
618; GFX10NSA:       ; %bb.0: ; %main_body
619; GFX10NSA-NEXT:    s_mov_b32 s0, s2
620; GFX10NSA-NEXT:    s_mov_b32 s1, s3
621; GFX10NSA-NEXT:    s_mov_b32 s2, s4
622; GFX10NSA-NEXT:    s_mov_b32 s3, s5
623; GFX10NSA-NEXT:    s_mov_b32 s4, s6
624; GFX10NSA-NEXT:    s_mov_b32 s5, s7
625; GFX10NSA-NEXT:    s_mov_b32 s6, s8
626; GFX10NSA-NEXT:    s_mov_b32 s7, s9
627; GFX10NSA-NEXT:    s_mov_b32 s8, s10
628; GFX10NSA-NEXT:    s_mov_b32 s9, s11
629; GFX10NSA-NEXT:    s_mov_b32 s10, s12
630; GFX10NSA-NEXT:    s_mov_b32 s11, s13
631; GFX10NSA-NEXT:    image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
632; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
633; GFX10NSA-NEXT:    ; return to shader part epilog
634main_body:
635  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
636  ret <4 x float> %v
637}
638
639define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
640; GFX6-LABEL: gather4_2d_dmask_2:
641; GFX6:       ; %bb.0: ; %main_body
642; GFX6-NEXT:    s_mov_b32 s0, s2
643; GFX6-NEXT:    s_mov_b32 s1, s3
644; GFX6-NEXT:    s_mov_b32 s2, s4
645; GFX6-NEXT:    s_mov_b32 s3, s5
646; GFX6-NEXT:    s_mov_b32 s4, s6
647; GFX6-NEXT:    s_mov_b32 s5, s7
648; GFX6-NEXT:    s_mov_b32 s6, s8
649; GFX6-NEXT:    s_mov_b32 s7, s9
650; GFX6-NEXT:    s_mov_b32 s8, s10
651; GFX6-NEXT:    s_mov_b32 s9, s11
652; GFX6-NEXT:    s_mov_b64 s[14:15], exec
653; GFX6-NEXT:    s_mov_b32 s10, s12
654; GFX6-NEXT:    s_mov_b32 s11, s13
655; GFX6-NEXT:    s_wqm_b64 exec, exec
656; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
657; GFX6-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2
658; GFX6-NEXT:    s_waitcnt vmcnt(0)
659; GFX6-NEXT:    ; return to shader part epilog
660;
661; GFX10NSA-LABEL: gather4_2d_dmask_2:
662; GFX10NSA:       ; %bb.0: ; %main_body
663; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
664; GFX10NSA-NEXT:    s_mov_b32 s0, s2
665; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
666; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
667; GFX10NSA-NEXT:    s_mov_b32 s1, s3
668; GFX10NSA-NEXT:    s_mov_b32 s2, s4
669; GFX10NSA-NEXT:    s_mov_b32 s3, s5
670; GFX10NSA-NEXT:    s_mov_b32 s4, s6
671; GFX10NSA-NEXT:    s_mov_b32 s5, s7
672; GFX10NSA-NEXT:    s_mov_b32 s6, s8
673; GFX10NSA-NEXT:    s_mov_b32 s7, s9
674; GFX10NSA-NEXT:    s_mov_b32 s8, s10
675; GFX10NSA-NEXT:    s_mov_b32 s9, s11
676; GFX10NSA-NEXT:    s_mov_b32 s10, s12
677; GFX10NSA-NEXT:    s_mov_b32 s11, s13
678; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D
679; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
680; GFX10NSA-NEXT:    ; return to shader part epilog
681main_body:
682  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
683  ret <4 x float> %v
684}
685
686define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
687; GFX6-LABEL: gather4_2d_dmask_4:
688; GFX6:       ; %bb.0: ; %main_body
689; GFX6-NEXT:    s_mov_b32 s0, s2
690; GFX6-NEXT:    s_mov_b32 s1, s3
691; GFX6-NEXT:    s_mov_b32 s2, s4
692; GFX6-NEXT:    s_mov_b32 s3, s5
693; GFX6-NEXT:    s_mov_b32 s4, s6
694; GFX6-NEXT:    s_mov_b32 s5, s7
695; GFX6-NEXT:    s_mov_b32 s6, s8
696; GFX6-NEXT:    s_mov_b32 s7, s9
697; GFX6-NEXT:    s_mov_b32 s8, s10
698; GFX6-NEXT:    s_mov_b32 s9, s11
699; GFX6-NEXT:    s_mov_b64 s[14:15], exec
700; GFX6-NEXT:    s_mov_b32 s10, s12
701; GFX6-NEXT:    s_mov_b32 s11, s13
702; GFX6-NEXT:    s_wqm_b64 exec, exec
703; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
704; GFX6-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4
705; GFX6-NEXT:    s_waitcnt vmcnt(0)
706; GFX6-NEXT:    ; return to shader part epilog
707;
708; GFX10NSA-LABEL: gather4_2d_dmask_4:
709; GFX10NSA:       ; %bb.0: ; %main_body
710; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
711; GFX10NSA-NEXT:    s_mov_b32 s0, s2
712; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
713; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
714; GFX10NSA-NEXT:    s_mov_b32 s1, s3
715; GFX10NSA-NEXT:    s_mov_b32 s2, s4
716; GFX10NSA-NEXT:    s_mov_b32 s3, s5
717; GFX10NSA-NEXT:    s_mov_b32 s4, s6
718; GFX10NSA-NEXT:    s_mov_b32 s5, s7
719; GFX10NSA-NEXT:    s_mov_b32 s6, s8
720; GFX10NSA-NEXT:    s_mov_b32 s7, s9
721; GFX10NSA-NEXT:    s_mov_b32 s8, s10
722; GFX10NSA-NEXT:    s_mov_b32 s9, s11
723; GFX10NSA-NEXT:    s_mov_b32 s10, s12
724; GFX10NSA-NEXT:    s_mov_b32 s11, s13
725; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D
726; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
727; GFX10NSA-NEXT:    ; return to shader part epilog
728main_body:
729  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
730  ret <4 x float> %v
731}
732
733define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
734; GFX6-LABEL: gather4_2d_dmask_8:
735; GFX6:       ; %bb.0: ; %main_body
736; GFX6-NEXT:    s_mov_b32 s0, s2
737; GFX6-NEXT:    s_mov_b32 s1, s3
738; GFX6-NEXT:    s_mov_b32 s2, s4
739; GFX6-NEXT:    s_mov_b32 s3, s5
740; GFX6-NEXT:    s_mov_b32 s4, s6
741; GFX6-NEXT:    s_mov_b32 s5, s7
742; GFX6-NEXT:    s_mov_b32 s6, s8
743; GFX6-NEXT:    s_mov_b32 s7, s9
744; GFX6-NEXT:    s_mov_b32 s8, s10
745; GFX6-NEXT:    s_mov_b32 s9, s11
746; GFX6-NEXT:    s_mov_b64 s[14:15], exec
747; GFX6-NEXT:    s_mov_b32 s10, s12
748; GFX6-NEXT:    s_mov_b32 s11, s13
749; GFX6-NEXT:    s_wqm_b64 exec, exec
750; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
751; GFX6-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8
752; GFX6-NEXT:    s_waitcnt vmcnt(0)
753; GFX6-NEXT:    ; return to shader part epilog
754;
755; GFX10NSA-LABEL: gather4_2d_dmask_8:
756; GFX10NSA:       ; %bb.0: ; %main_body
757; GFX10NSA-NEXT:    s_mov_b32 s1, exec_lo
758; GFX10NSA-NEXT:    s_mov_b32 s0, s2
759; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
760; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s1
761; GFX10NSA-NEXT:    s_mov_b32 s1, s3
762; GFX10NSA-NEXT:    s_mov_b32 s2, s4
763; GFX10NSA-NEXT:    s_mov_b32 s3, s5
764; GFX10NSA-NEXT:    s_mov_b32 s4, s6
765; GFX10NSA-NEXT:    s_mov_b32 s5, s7
766; GFX10NSA-NEXT:    s_mov_b32 s6, s8
767; GFX10NSA-NEXT:    s_mov_b32 s7, s9
768; GFX10NSA-NEXT:    s_mov_b32 s8, s10
769; GFX10NSA-NEXT:    s_mov_b32 s9, s11
770; GFX10NSA-NEXT:    s_mov_b32 s10, s12
771; GFX10NSA-NEXT:    s_mov_b32 s11, s13
772; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D
773; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
774; GFX10NSA-NEXT:    ; return to shader part epilog
775main_body:
776  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
777  ret <4 x float> %v
778}
779
780declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
781declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
782declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
783declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
784declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
785declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
786declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
787declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
788declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
789declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
790declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
791declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
792declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
793declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
794
795attributes #0 = { nounwind readonly }
796