• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5
6define amdgpu_ps float @load_1d_f32_x(<8 x i32> inreg %rsrc, i32 %s) {
7; GFX6-LABEL: load_1d_f32_x:
8; GFX6:       ; %bb.0:
9; GFX6-NEXT:    s_mov_b32 s0, s2
10; GFX6-NEXT:    s_mov_b32 s1, s3
11; GFX6-NEXT:    s_mov_b32 s2, s4
12; GFX6-NEXT:    s_mov_b32 s3, s5
13; GFX6-NEXT:    s_mov_b32 s4, s6
14; GFX6-NEXT:    s_mov_b32 s5, s7
15; GFX6-NEXT:    s_mov_b32 s6, s8
16; GFX6-NEXT:    s_mov_b32 s7, s9
17; GFX6-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm
18; GFX6-NEXT:    s_waitcnt vmcnt(0)
19; GFX6-NEXT:    ; return to shader part epilog
20;
21; GFX8-LABEL: load_1d_f32_x:
22; GFX8:       ; %bb.0:
23; GFX8-NEXT:    s_mov_b32 s0, s2
24; GFX8-NEXT:    s_mov_b32 s1, s3
25; GFX8-NEXT:    s_mov_b32 s2, s4
26; GFX8-NEXT:    s_mov_b32 s3, s5
27; GFX8-NEXT:    s_mov_b32 s4, s6
28; GFX8-NEXT:    s_mov_b32 s5, s7
29; GFX8-NEXT:    s_mov_b32 s6, s8
30; GFX8-NEXT:    s_mov_b32 s7, s9
31; GFX8-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm
32; GFX8-NEXT:    s_waitcnt vmcnt(0)
33; GFX8-NEXT:    ; return to shader part epilog
34;
35; GFX10-LABEL: load_1d_f32_x:
36; GFX10:       ; %bb.0:
37; GFX10-NEXT:    s_mov_b32 s0, s2
38; GFX10-NEXT:    s_mov_b32 s1, s3
39; GFX10-NEXT:    s_mov_b32 s2, s4
40; GFX10-NEXT:    s_mov_b32 s3, s5
41; GFX10-NEXT:    s_mov_b32 s4, s6
42; GFX10-NEXT:    s_mov_b32 s5, s7
43; GFX10-NEXT:    s_mov_b32 s6, s8
44; GFX10-NEXT:    s_mov_b32 s7, s9
45; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
46; GFX10-NEXT:    s_waitcnt vmcnt(0)
47; GFX10-NEXT:    ; return to shader part epilog
48  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
49  ret float %v
50}
51
52define amdgpu_ps float @load_1d_f32_y(<8 x i32> inreg %rsrc, i32 %s) {
53; GFX6-LABEL: load_1d_f32_y:
54; GFX6:       ; %bb.0:
55; GFX6-NEXT:    s_mov_b32 s0, s2
56; GFX6-NEXT:    s_mov_b32 s1, s3
57; GFX6-NEXT:    s_mov_b32 s2, s4
58; GFX6-NEXT:    s_mov_b32 s3, s5
59; GFX6-NEXT:    s_mov_b32 s4, s6
60; GFX6-NEXT:    s_mov_b32 s5, s7
61; GFX6-NEXT:    s_mov_b32 s6, s8
62; GFX6-NEXT:    s_mov_b32 s7, s9
63; GFX6-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm
64; GFX6-NEXT:    s_waitcnt vmcnt(0)
65; GFX6-NEXT:    ; return to shader part epilog
66;
67; GFX8-LABEL: load_1d_f32_y:
68; GFX8:       ; %bb.0:
69; GFX8-NEXT:    s_mov_b32 s0, s2
70; GFX8-NEXT:    s_mov_b32 s1, s3
71; GFX8-NEXT:    s_mov_b32 s2, s4
72; GFX8-NEXT:    s_mov_b32 s3, s5
73; GFX8-NEXT:    s_mov_b32 s4, s6
74; GFX8-NEXT:    s_mov_b32 s5, s7
75; GFX8-NEXT:    s_mov_b32 s6, s8
76; GFX8-NEXT:    s_mov_b32 s7, s9
77; GFX8-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm
78; GFX8-NEXT:    s_waitcnt vmcnt(0)
79; GFX8-NEXT:    ; return to shader part epilog
80;
81; GFX10-LABEL: load_1d_f32_y:
82; GFX10:       ; %bb.0:
83; GFX10-NEXT:    s_mov_b32 s0, s2
84; GFX10-NEXT:    s_mov_b32 s1, s3
85; GFX10-NEXT:    s_mov_b32 s2, s4
86; GFX10-NEXT:    s_mov_b32 s3, s5
87; GFX10-NEXT:    s_mov_b32 s4, s6
88; GFX10-NEXT:    s_mov_b32 s5, s7
89; GFX10-NEXT:    s_mov_b32 s6, s8
90; GFX10-NEXT:    s_mov_b32 s7, s9
91; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm
92; GFX10-NEXT:    s_waitcnt vmcnt(0)
93; GFX10-NEXT:    ; return to shader part epilog
94  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
95  ret float %v
96}
97
98define amdgpu_ps float @load_1d_f32_z(<8 x i32> inreg %rsrc, i32 %s) {
99; GFX6-LABEL: load_1d_f32_z:
100; GFX6:       ; %bb.0:
101; GFX6-NEXT:    s_mov_b32 s0, s2
102; GFX6-NEXT:    s_mov_b32 s1, s3
103; GFX6-NEXT:    s_mov_b32 s2, s4
104; GFX6-NEXT:    s_mov_b32 s3, s5
105; GFX6-NEXT:    s_mov_b32 s4, s6
106; GFX6-NEXT:    s_mov_b32 s5, s7
107; GFX6-NEXT:    s_mov_b32 s6, s8
108; GFX6-NEXT:    s_mov_b32 s7, s9
109; GFX6-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm
110; GFX6-NEXT:    s_waitcnt vmcnt(0)
111; GFX6-NEXT:    ; return to shader part epilog
112;
113; GFX8-LABEL: load_1d_f32_z:
114; GFX8:       ; %bb.0:
115; GFX8-NEXT:    s_mov_b32 s0, s2
116; GFX8-NEXT:    s_mov_b32 s1, s3
117; GFX8-NEXT:    s_mov_b32 s2, s4
118; GFX8-NEXT:    s_mov_b32 s3, s5
119; GFX8-NEXT:    s_mov_b32 s4, s6
120; GFX8-NEXT:    s_mov_b32 s5, s7
121; GFX8-NEXT:    s_mov_b32 s6, s8
122; GFX8-NEXT:    s_mov_b32 s7, s9
123; GFX8-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm
124; GFX8-NEXT:    s_waitcnt vmcnt(0)
125; GFX8-NEXT:    ; return to shader part epilog
126;
127; GFX10-LABEL: load_1d_f32_z:
128; GFX10:       ; %bb.0:
129; GFX10-NEXT:    s_mov_b32 s0, s2
130; GFX10-NEXT:    s_mov_b32 s1, s3
131; GFX10-NEXT:    s_mov_b32 s2, s4
132; GFX10-NEXT:    s_mov_b32 s3, s5
133; GFX10-NEXT:    s_mov_b32 s4, s6
134; GFX10-NEXT:    s_mov_b32 s5, s7
135; GFX10-NEXT:    s_mov_b32 s6, s8
136; GFX10-NEXT:    s_mov_b32 s7, s9
137; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm
138; GFX10-NEXT:    s_waitcnt vmcnt(0)
139; GFX10-NEXT:    ; return to shader part epilog
140  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
141  ret float %v
142}
143
144define amdgpu_ps float @load_1d_f32_w(<8 x i32> inreg %rsrc, i32 %s) {
145; GFX6-LABEL: load_1d_f32_w:
146; GFX6:       ; %bb.0:
147; GFX6-NEXT:    s_mov_b32 s0, s2
148; GFX6-NEXT:    s_mov_b32 s1, s3
149; GFX6-NEXT:    s_mov_b32 s2, s4
150; GFX6-NEXT:    s_mov_b32 s3, s5
151; GFX6-NEXT:    s_mov_b32 s4, s6
152; GFX6-NEXT:    s_mov_b32 s5, s7
153; GFX6-NEXT:    s_mov_b32 s6, s8
154; GFX6-NEXT:    s_mov_b32 s7, s9
155; GFX6-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm
156; GFX6-NEXT:    s_waitcnt vmcnt(0)
157; GFX6-NEXT:    ; return to shader part epilog
158;
159; GFX8-LABEL: load_1d_f32_w:
160; GFX8:       ; %bb.0:
161; GFX8-NEXT:    s_mov_b32 s0, s2
162; GFX8-NEXT:    s_mov_b32 s1, s3
163; GFX8-NEXT:    s_mov_b32 s2, s4
164; GFX8-NEXT:    s_mov_b32 s3, s5
165; GFX8-NEXT:    s_mov_b32 s4, s6
166; GFX8-NEXT:    s_mov_b32 s5, s7
167; GFX8-NEXT:    s_mov_b32 s6, s8
168; GFX8-NEXT:    s_mov_b32 s7, s9
169; GFX8-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm
170; GFX8-NEXT:    s_waitcnt vmcnt(0)
171; GFX8-NEXT:    ; return to shader part epilog
172;
173; GFX10-LABEL: load_1d_f32_w:
174; GFX10:       ; %bb.0:
175; GFX10-NEXT:    s_mov_b32 s0, s2
176; GFX10-NEXT:    s_mov_b32 s1, s3
177; GFX10-NEXT:    s_mov_b32 s2, s4
178; GFX10-NEXT:    s_mov_b32 s3, s5
179; GFX10-NEXT:    s_mov_b32 s4, s6
180; GFX10-NEXT:    s_mov_b32 s5, s7
181; GFX10-NEXT:    s_mov_b32 s6, s8
182; GFX10-NEXT:    s_mov_b32 s7, s9
183; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm
184; GFX10-NEXT:    s_waitcnt vmcnt(0)
185; GFX10-NEXT:    ; return to shader part epilog
186  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
187  ret float %v
188}
189
190define amdgpu_ps <2 x float> @load_1d_v2f32_xy(<8 x i32> inreg %rsrc, i32 %s) {
191; GFX6-LABEL: load_1d_v2f32_xy:
192; GFX6:       ; %bb.0:
193; GFX6-NEXT:    s_mov_b32 s0, s2
194; GFX6-NEXT:    s_mov_b32 s1, s3
195; GFX6-NEXT:    s_mov_b32 s2, s4
196; GFX6-NEXT:    s_mov_b32 s3, s5
197; GFX6-NEXT:    s_mov_b32 s4, s6
198; GFX6-NEXT:    s_mov_b32 s5, s7
199; GFX6-NEXT:    s_mov_b32 s6, s8
200; GFX6-NEXT:    s_mov_b32 s7, s9
201; GFX6-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm
202; GFX6-NEXT:    s_waitcnt vmcnt(0)
203; GFX6-NEXT:    ; return to shader part epilog
204;
205; GFX8-LABEL: load_1d_v2f32_xy:
206; GFX8:       ; %bb.0:
207; GFX8-NEXT:    s_mov_b32 s0, s2
208; GFX8-NEXT:    s_mov_b32 s1, s3
209; GFX8-NEXT:    s_mov_b32 s2, s4
210; GFX8-NEXT:    s_mov_b32 s3, s5
211; GFX8-NEXT:    s_mov_b32 s4, s6
212; GFX8-NEXT:    s_mov_b32 s5, s7
213; GFX8-NEXT:    s_mov_b32 s6, s8
214; GFX8-NEXT:    s_mov_b32 s7, s9
215; GFX8-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm
216; GFX8-NEXT:    s_waitcnt vmcnt(0)
217; GFX8-NEXT:    ; return to shader part epilog
218;
219; GFX10-LABEL: load_1d_v2f32_xy:
220; GFX10:       ; %bb.0:
221; GFX10-NEXT:    s_mov_b32 s0, s2
222; GFX10-NEXT:    s_mov_b32 s1, s3
223; GFX10-NEXT:    s_mov_b32 s2, s4
224; GFX10-NEXT:    s_mov_b32 s3, s5
225; GFX10-NEXT:    s_mov_b32 s4, s6
226; GFX10-NEXT:    s_mov_b32 s5, s7
227; GFX10-NEXT:    s_mov_b32 s6, s8
228; GFX10-NEXT:    s_mov_b32 s7, s9
229; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
230; GFX10-NEXT:    s_waitcnt vmcnt(0)
231; GFX10-NEXT:    ; return to shader part epilog
232  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
233  ret <2 x float> %v
234}
235
236define amdgpu_ps <2 x float> @load_1d_v2f32_xz(<8 x i32> inreg %rsrc, i32 %s) {
237; GFX6-LABEL: load_1d_v2f32_xz:
238; GFX6:       ; %bb.0:
239; GFX6-NEXT:    s_mov_b32 s0, s2
240; GFX6-NEXT:    s_mov_b32 s1, s3
241; GFX6-NEXT:    s_mov_b32 s2, s4
242; GFX6-NEXT:    s_mov_b32 s3, s5
243; GFX6-NEXT:    s_mov_b32 s4, s6
244; GFX6-NEXT:    s_mov_b32 s5, s7
245; GFX6-NEXT:    s_mov_b32 s6, s8
246; GFX6-NEXT:    s_mov_b32 s7, s9
247; GFX6-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 unorm
248; GFX6-NEXT:    s_waitcnt vmcnt(0)
249; GFX6-NEXT:    ; return to shader part epilog
250;
251; GFX8-LABEL: load_1d_v2f32_xz:
252; GFX8:       ; %bb.0:
253; GFX8-NEXT:    s_mov_b32 s0, s2
254; GFX8-NEXT:    s_mov_b32 s1, s3
255; GFX8-NEXT:    s_mov_b32 s2, s4
256; GFX8-NEXT:    s_mov_b32 s3, s5
257; GFX8-NEXT:    s_mov_b32 s4, s6
258; GFX8-NEXT:    s_mov_b32 s5, s7
259; GFX8-NEXT:    s_mov_b32 s6, s8
260; GFX8-NEXT:    s_mov_b32 s7, s9
261; GFX8-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 unorm
262; GFX8-NEXT:    s_waitcnt vmcnt(0)
263; GFX8-NEXT:    ; return to shader part epilog
264;
265; GFX10-LABEL: load_1d_v2f32_xz:
266; GFX10:       ; %bb.0:
267; GFX10-NEXT:    s_mov_b32 s0, s2
268; GFX10-NEXT:    s_mov_b32 s1, s3
269; GFX10-NEXT:    s_mov_b32 s2, s4
270; GFX10-NEXT:    s_mov_b32 s3, s5
271; GFX10-NEXT:    s_mov_b32 s4, s6
272; GFX10-NEXT:    s_mov_b32 s5, s7
273; GFX10-NEXT:    s_mov_b32 s6, s8
274; GFX10-NEXT:    s_mov_b32 s7, s9
275; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm
276; GFX10-NEXT:    s_waitcnt vmcnt(0)
277; GFX10-NEXT:    ; return to shader part epilog
278  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
279  ret <2 x float> %v
280}
281
282define amdgpu_ps <2 x float> @load_1d_v2f32_xw(<8 x i32> inreg %rsrc, i32 %s) {
283; GFX6-LABEL: load_1d_v2f32_xw:
284; GFX6:       ; %bb.0:
285; GFX6-NEXT:    s_mov_b32 s0, s2
286; GFX6-NEXT:    s_mov_b32 s1, s3
287; GFX6-NEXT:    s_mov_b32 s2, s4
288; GFX6-NEXT:    s_mov_b32 s3, s5
289; GFX6-NEXT:    s_mov_b32 s4, s6
290; GFX6-NEXT:    s_mov_b32 s5, s7
291; GFX6-NEXT:    s_mov_b32 s6, s8
292; GFX6-NEXT:    s_mov_b32 s7, s9
293; GFX6-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
294; GFX6-NEXT:    s_waitcnt vmcnt(0)
295; GFX6-NEXT:    ; return to shader part epilog
296;
297; GFX8-LABEL: load_1d_v2f32_xw:
298; GFX8:       ; %bb.0:
299; GFX8-NEXT:    s_mov_b32 s0, s2
300; GFX8-NEXT:    s_mov_b32 s1, s3
301; GFX8-NEXT:    s_mov_b32 s2, s4
302; GFX8-NEXT:    s_mov_b32 s3, s5
303; GFX8-NEXT:    s_mov_b32 s4, s6
304; GFX8-NEXT:    s_mov_b32 s5, s7
305; GFX8-NEXT:    s_mov_b32 s6, s8
306; GFX8-NEXT:    s_mov_b32 s7, s9
307; GFX8-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
308; GFX8-NEXT:    s_waitcnt vmcnt(0)
309; GFX8-NEXT:    ; return to shader part epilog
310;
311; GFX10-LABEL: load_1d_v2f32_xw:
312; GFX10:       ; %bb.0:
313; GFX10-NEXT:    s_mov_b32 s0, s2
314; GFX10-NEXT:    s_mov_b32 s1, s3
315; GFX10-NEXT:    s_mov_b32 s2, s4
316; GFX10-NEXT:    s_mov_b32 s3, s5
317; GFX10-NEXT:    s_mov_b32 s4, s6
318; GFX10-NEXT:    s_mov_b32 s5, s7
319; GFX10-NEXT:    s_mov_b32 s6, s8
320; GFX10-NEXT:    s_mov_b32 s7, s9
321; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm
322; GFX10-NEXT:    s_waitcnt vmcnt(0)
323; GFX10-NEXT:    ; return to shader part epilog
324  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
325  ret <2 x float> %v
326}
327
328define amdgpu_ps <2 x float> @load_1d_v2f32_yz(<8 x i32> inreg %rsrc, i32 %s) {
329; GFX6-LABEL: load_1d_v2f32_yz:
330; GFX6:       ; %bb.0:
331; GFX6-NEXT:    s_mov_b32 s0, s2
332; GFX6-NEXT:    s_mov_b32 s1, s3
333; GFX6-NEXT:    s_mov_b32 s2, s4
334; GFX6-NEXT:    s_mov_b32 s3, s5
335; GFX6-NEXT:    s_mov_b32 s4, s6
336; GFX6-NEXT:    s_mov_b32 s5, s7
337; GFX6-NEXT:    s_mov_b32 s6, s8
338; GFX6-NEXT:    s_mov_b32 s7, s9
339; GFX6-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 unorm
340; GFX6-NEXT:    s_waitcnt vmcnt(0)
341; GFX6-NEXT:    ; return to shader part epilog
342;
343; GFX8-LABEL: load_1d_v2f32_yz:
344; GFX8:       ; %bb.0:
345; GFX8-NEXT:    s_mov_b32 s0, s2
346; GFX8-NEXT:    s_mov_b32 s1, s3
347; GFX8-NEXT:    s_mov_b32 s2, s4
348; GFX8-NEXT:    s_mov_b32 s3, s5
349; GFX8-NEXT:    s_mov_b32 s4, s6
350; GFX8-NEXT:    s_mov_b32 s5, s7
351; GFX8-NEXT:    s_mov_b32 s6, s8
352; GFX8-NEXT:    s_mov_b32 s7, s9
353; GFX8-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 unorm
354; GFX8-NEXT:    s_waitcnt vmcnt(0)
355; GFX8-NEXT:    ; return to shader part epilog
356;
357; GFX10-LABEL: load_1d_v2f32_yz:
358; GFX10:       ; %bb.0:
359; GFX10-NEXT:    s_mov_b32 s0, s2
360; GFX10-NEXT:    s_mov_b32 s1, s3
361; GFX10-NEXT:    s_mov_b32 s2, s4
362; GFX10-NEXT:    s_mov_b32 s3, s5
363; GFX10-NEXT:    s_mov_b32 s4, s6
364; GFX10-NEXT:    s_mov_b32 s5, s7
365; GFX10-NEXT:    s_mov_b32 s6, s8
366; GFX10-NEXT:    s_mov_b32 s7, s9
367; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm
368; GFX10-NEXT:    s_waitcnt vmcnt(0)
369; GFX10-NEXT:    ; return to shader part epilog
370  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
371  ret <2 x float> %v
372}
373
374define amdgpu_ps <3 x float> @load_1d_v3f32_xyz(<8 x i32> inreg %rsrc, i32 %s) {
375; GFX6-LABEL: load_1d_v3f32_xyz:
376; GFX6:       ; %bb.0:
377; GFX6-NEXT:    s_mov_b32 s0, s2
378; GFX6-NEXT:    s_mov_b32 s1, s3
379; GFX6-NEXT:    s_mov_b32 s2, s4
380; GFX6-NEXT:    s_mov_b32 s3, s5
381; GFX6-NEXT:    s_mov_b32 s4, s6
382; GFX6-NEXT:    s_mov_b32 s5, s7
383; GFX6-NEXT:    s_mov_b32 s6, s8
384; GFX6-NEXT:    s_mov_b32 s7, s9
385; GFX6-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm
386; GFX6-NEXT:    s_waitcnt vmcnt(0)
387; GFX6-NEXT:    ; return to shader part epilog
388;
389; GFX8-LABEL: load_1d_v3f32_xyz:
390; GFX8:       ; %bb.0:
391; GFX8-NEXT:    s_mov_b32 s0, s2
392; GFX8-NEXT:    s_mov_b32 s1, s3
393; GFX8-NEXT:    s_mov_b32 s2, s4
394; GFX8-NEXT:    s_mov_b32 s3, s5
395; GFX8-NEXT:    s_mov_b32 s4, s6
396; GFX8-NEXT:    s_mov_b32 s5, s7
397; GFX8-NEXT:    s_mov_b32 s6, s8
398; GFX8-NEXT:    s_mov_b32 s7, s9
399; GFX8-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm
400; GFX8-NEXT:    s_waitcnt vmcnt(0)
401; GFX8-NEXT:    ; return to shader part epilog
402;
403; GFX10-LABEL: load_1d_v3f32_xyz:
404; GFX10:       ; %bb.0:
405; GFX10-NEXT:    s_mov_b32 s0, s2
406; GFX10-NEXT:    s_mov_b32 s1, s3
407; GFX10-NEXT:    s_mov_b32 s2, s4
408; GFX10-NEXT:    s_mov_b32 s3, s5
409; GFX10-NEXT:    s_mov_b32 s4, s6
410; GFX10-NEXT:    s_mov_b32 s5, s7
411; GFX10-NEXT:    s_mov_b32 s6, s8
412; GFX10-NEXT:    s_mov_b32 s7, s9
413; GFX10-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm
414; GFX10-NEXT:    s_waitcnt vmcnt(0)
415; GFX10-NEXT:    ; return to shader part epilog
416  %v = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
417  ret <3 x float> %v
418}
419
420define amdgpu_ps <4 x float> @load_1d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
421; GFX6-LABEL: load_1d_v4f32_xyzw:
422; GFX6:       ; %bb.0:
423; GFX6-NEXT:    s_mov_b32 s0, s2
424; GFX6-NEXT:    s_mov_b32 s1, s3
425; GFX6-NEXT:    s_mov_b32 s2, s4
426; GFX6-NEXT:    s_mov_b32 s3, s5
427; GFX6-NEXT:    s_mov_b32 s4, s6
428; GFX6-NEXT:    s_mov_b32 s5, s7
429; GFX6-NEXT:    s_mov_b32 s6, s8
430; GFX6-NEXT:    s_mov_b32 s7, s9
431; GFX6-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
432; GFX6-NEXT:    s_waitcnt vmcnt(0)
433; GFX6-NEXT:    ; return to shader part epilog
434;
435; GFX8-LABEL: load_1d_v4f32_xyzw:
436; GFX8:       ; %bb.0:
437; GFX8-NEXT:    s_mov_b32 s0, s2
438; GFX8-NEXT:    s_mov_b32 s1, s3
439; GFX8-NEXT:    s_mov_b32 s2, s4
440; GFX8-NEXT:    s_mov_b32 s3, s5
441; GFX8-NEXT:    s_mov_b32 s4, s6
442; GFX8-NEXT:    s_mov_b32 s5, s7
443; GFX8-NEXT:    s_mov_b32 s6, s8
444; GFX8-NEXT:    s_mov_b32 s7, s9
445; GFX8-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
446; GFX8-NEXT:    s_waitcnt vmcnt(0)
447; GFX8-NEXT:    ; return to shader part epilog
448;
449; GFX10-LABEL: load_1d_v4f32_xyzw:
450; GFX10:       ; %bb.0:
451; GFX10-NEXT:    s_mov_b32 s0, s2
452; GFX10-NEXT:    s_mov_b32 s1, s3
453; GFX10-NEXT:    s_mov_b32 s2, s4
454; GFX10-NEXT:    s_mov_b32 s3, s5
455; GFX10-NEXT:    s_mov_b32 s4, s6
456; GFX10-NEXT:    s_mov_b32 s5, s7
457; GFX10-NEXT:    s_mov_b32 s6, s8
458; GFX10-NEXT:    s_mov_b32 s7, s9
459; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
460; GFX10-NEXT:    s_waitcnt vmcnt(0)
461; GFX10-NEXT:    ; return to shader part epilog
462  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
463  ret <4 x float> %v
464}
465
466define amdgpu_ps float @load_1d_f32_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
467; GFX6-LABEL: load_1d_f32_tfe_dmask_x:
468; GFX6:       ; %bb.0:
469; GFX6-NEXT:    s_mov_b32 s0, s2
470; GFX6-NEXT:    s_mov_b32 s1, s3
471; GFX6-NEXT:    s_mov_b32 s2, s4
472; GFX6-NEXT:    s_mov_b32 s3, s5
473; GFX6-NEXT:    s_mov_b32 s4, s6
474; GFX6-NEXT:    s_mov_b32 s5, s7
475; GFX6-NEXT:    s_mov_b32 s6, s8
476; GFX6-NEXT:    s_mov_b32 s7, s9
477; GFX6-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe
478; GFX6-NEXT:    s_waitcnt vmcnt(0)
479; GFX6-NEXT:    v_mov_b32_e32 v0, v1
480; GFX6-NEXT:    ; return to shader part epilog
481;
482; GFX8-LABEL: load_1d_f32_tfe_dmask_x:
483; GFX8:       ; %bb.0:
484; GFX8-NEXT:    s_mov_b32 s0, s2
485; GFX8-NEXT:    s_mov_b32 s1, s3
486; GFX8-NEXT:    s_mov_b32 s2, s4
487; GFX8-NEXT:    s_mov_b32 s3, s5
488; GFX8-NEXT:    s_mov_b32 s4, s6
489; GFX8-NEXT:    s_mov_b32 s5, s7
490; GFX8-NEXT:    s_mov_b32 s6, s8
491; GFX8-NEXT:    s_mov_b32 s7, s9
492; GFX8-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe
493; GFX8-NEXT:    s_waitcnt vmcnt(0)
494; GFX8-NEXT:    v_mov_b32_e32 v0, v1
495; GFX8-NEXT:    ; return to shader part epilog
496;
497; GFX10-LABEL: load_1d_f32_tfe_dmask_x:
498; GFX10:       ; %bb.0:
499; GFX10-NEXT:    s_mov_b32 s0, s2
500; GFX10-NEXT:    s_mov_b32 s1, s3
501; GFX10-NEXT:    s_mov_b32 s2, s4
502; GFX10-NEXT:    s_mov_b32 s3, s5
503; GFX10-NEXT:    s_mov_b32 s4, s6
504; GFX10-NEXT:    s_mov_b32 s5, s7
505; GFX10-NEXT:    s_mov_b32 s6, s8
506; GFX10-NEXT:    s_mov_b32 s7, s9
507; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
508; GFX10-NEXT:    s_waitcnt vmcnt(0)
509; GFX10-NEXT:    v_mov_b32_e32 v0, v1
510; GFX10-NEXT:    ; return to shader part epilog
511  %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
512  %v.err = extractvalue { float, i32 } %v, 1
513  %vv = bitcast i32 %v.err to float
514  ret float %vv
515}
516
517define amdgpu_ps float @load_1d_v2f32_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
518; GFX6-LABEL: load_1d_v2f32_tfe_dmask_xy:
519; GFX6:       ; %bb.0:
520; GFX6-NEXT:    s_mov_b32 s0, s2
521; GFX6-NEXT:    s_mov_b32 s1, s3
522; GFX6-NEXT:    s_mov_b32 s2, s4
523; GFX6-NEXT:    s_mov_b32 s3, s5
524; GFX6-NEXT:    s_mov_b32 s4, s6
525; GFX6-NEXT:    s_mov_b32 s5, s7
526; GFX6-NEXT:    s_mov_b32 s6, s8
527; GFX6-NEXT:    s_mov_b32 s7, s9
528; GFX6-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe
529; GFX6-NEXT:    s_waitcnt vmcnt(0)
530; GFX6-NEXT:    v_mov_b32_e32 v0, v2
531; GFX6-NEXT:    ; return to shader part epilog
532;
533; GFX8-LABEL: load_1d_v2f32_tfe_dmask_xy:
534; GFX8:       ; %bb.0:
535; GFX8-NEXT:    s_mov_b32 s0, s2
536; GFX8-NEXT:    s_mov_b32 s1, s3
537; GFX8-NEXT:    s_mov_b32 s2, s4
538; GFX8-NEXT:    s_mov_b32 s3, s5
539; GFX8-NEXT:    s_mov_b32 s4, s6
540; GFX8-NEXT:    s_mov_b32 s5, s7
541; GFX8-NEXT:    s_mov_b32 s6, s8
542; GFX8-NEXT:    s_mov_b32 s7, s9
543; GFX8-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe
544; GFX8-NEXT:    s_waitcnt vmcnt(0)
545; GFX8-NEXT:    v_mov_b32_e32 v0, v2
546; GFX8-NEXT:    ; return to shader part epilog
547;
548; GFX10-LABEL: load_1d_v2f32_tfe_dmask_xy:
549; GFX10:       ; %bb.0:
550; GFX10-NEXT:    s_mov_b32 s0, s2
551; GFX10-NEXT:    s_mov_b32 s1, s3
552; GFX10-NEXT:    s_mov_b32 s2, s4
553; GFX10-NEXT:    s_mov_b32 s3, s5
554; GFX10-NEXT:    s_mov_b32 s4, s6
555; GFX10-NEXT:    s_mov_b32 s5, s7
556; GFX10-NEXT:    s_mov_b32 s6, s8
557; GFX10-NEXT:    s_mov_b32 s7, s9
558; GFX10-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe
559; GFX10-NEXT:    s_waitcnt vmcnt(0)
560; GFX10-NEXT:    v_mov_b32_e32 v0, v2
561; GFX10-NEXT:    ; return to shader part epilog
562  %v = call { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
563  %v.err = extractvalue { <2 x float>, i32 } %v, 1
564  %vv = bitcast i32 %v.err to float
565  ret float %vv
566}
567
568define amdgpu_ps float @load_1d_v3f32_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
569; GFX6-LABEL: load_1d_v3f32_tfe_dmask_xyz:
570; GFX6:       ; %bb.0:
571; GFX6-NEXT:    s_mov_b32 s0, s2
572; GFX6-NEXT:    s_mov_b32 s1, s3
573; GFX6-NEXT:    s_mov_b32 s2, s4
574; GFX6-NEXT:    s_mov_b32 s3, s5
575; GFX6-NEXT:    s_mov_b32 s4, s6
576; GFX6-NEXT:    s_mov_b32 s5, s7
577; GFX6-NEXT:    s_mov_b32 s6, s8
578; GFX6-NEXT:    s_mov_b32 s7, s9
579; GFX6-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe
580; GFX6-NEXT:    s_waitcnt vmcnt(0)
581; GFX6-NEXT:    v_mov_b32_e32 v0, v3
582; GFX6-NEXT:    ; return to shader part epilog
583;
584; GFX8-LABEL: load_1d_v3f32_tfe_dmask_xyz:
585; GFX8:       ; %bb.0:
586; GFX8-NEXT:    s_mov_b32 s0, s2
587; GFX8-NEXT:    s_mov_b32 s1, s3
588; GFX8-NEXT:    s_mov_b32 s2, s4
589; GFX8-NEXT:    s_mov_b32 s3, s5
590; GFX8-NEXT:    s_mov_b32 s4, s6
591; GFX8-NEXT:    s_mov_b32 s5, s7
592; GFX8-NEXT:    s_mov_b32 s6, s8
593; GFX8-NEXT:    s_mov_b32 s7, s9
594; GFX8-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe
595; GFX8-NEXT:    s_waitcnt vmcnt(0)
596; GFX8-NEXT:    v_mov_b32_e32 v0, v3
597; GFX8-NEXT:    ; return to shader part epilog
598;
599; GFX10-LABEL: load_1d_v3f32_tfe_dmask_xyz:
600; GFX10:       ; %bb.0:
601; GFX10-NEXT:    s_mov_b32 s0, s2
602; GFX10-NEXT:    s_mov_b32 s1, s3
603; GFX10-NEXT:    s_mov_b32 s2, s4
604; GFX10-NEXT:    s_mov_b32 s3, s5
605; GFX10-NEXT:    s_mov_b32 s4, s6
606; GFX10-NEXT:    s_mov_b32 s5, s7
607; GFX10-NEXT:    s_mov_b32 s6, s8
608; GFX10-NEXT:    s_mov_b32 s7, s9
609; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe
610; GFX10-NEXT:    s_waitcnt vmcnt(0)
611; GFX10-NEXT:    v_mov_b32_e32 v0, v3
612; GFX10-NEXT:    ; return to shader part epilog
613  %v = call { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
614  %v.err = extractvalue { <3 x float>, i32 } %v, 1
615  %vv = bitcast i32 %v.err to float
616  ret float %vv
617}
618
619define amdgpu_ps float @load_1d_v4f32_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
620; GFX6-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
621; GFX6:       ; %bb.0:
622; GFX6-NEXT:    s_mov_b32 s0, s2
623; GFX6-NEXT:    s_mov_b32 s1, s3
624; GFX6-NEXT:    s_mov_b32 s2, s4
625; GFX6-NEXT:    s_mov_b32 s3, s5
626; GFX6-NEXT:    s_mov_b32 s4, s6
627; GFX6-NEXT:    s_mov_b32 s5, s7
628; GFX6-NEXT:    s_mov_b32 s6, s8
629; GFX6-NEXT:    s_mov_b32 s7, s9
630; GFX6-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe
631; GFX6-NEXT:    s_waitcnt vmcnt(0)
632; GFX6-NEXT:    v_mov_b32_e32 v0, v1
633; GFX6-NEXT:    ; return to shader part epilog
634;
635; GFX8-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
636; GFX8:       ; %bb.0:
637; GFX8-NEXT:    s_mov_b32 s0, s2
638; GFX8-NEXT:    s_mov_b32 s1, s3
639; GFX8-NEXT:    s_mov_b32 s2, s4
640; GFX8-NEXT:    s_mov_b32 s3, s5
641; GFX8-NEXT:    s_mov_b32 s4, s6
642; GFX8-NEXT:    s_mov_b32 s5, s7
643; GFX8-NEXT:    s_mov_b32 s6, s8
644; GFX8-NEXT:    s_mov_b32 s7, s9
645; GFX8-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe
646; GFX8-NEXT:    s_waitcnt vmcnt(0)
647; GFX8-NEXT:    v_mov_b32_e32 v0, v1
648; GFX8-NEXT:    ; return to shader part epilog
649;
650; GFX10-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
651; GFX10:       ; %bb.0:
652; GFX10-NEXT:    s_mov_b32 s0, s2
653; GFX10-NEXT:    s_mov_b32 s1, s3
654; GFX10-NEXT:    s_mov_b32 s2, s4
655; GFX10-NEXT:    s_mov_b32 s3, s5
656; GFX10-NEXT:    s_mov_b32 s4, s6
657; GFX10-NEXT:    s_mov_b32 s5, s7
658; GFX10-NEXT:    s_mov_b32 s6, s8
659; GFX10-NEXT:    s_mov_b32 s7, s9
660; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe
661; GFX10-NEXT:    s_waitcnt vmcnt(0)
662; GFX10-NEXT:    v_mov_b32_e32 v0, v1
663; GFX10-NEXT:    ; return to shader part epilog
664  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
665  %v.err = extractvalue { <4 x float>, i32 } %v, 1
666  %vv = bitcast i32 %v.err to float
667  ret float %vv
668}
669
670define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) {
671; GFX6-LABEL: load_1d_f32_tfe_dmask_0:
672; GFX6:       ; %bb.0:
673; GFX6-NEXT:    s_mov_b32 s0, s2
674; GFX6-NEXT:    s_mov_b32 s1, s3
675; GFX6-NEXT:    s_mov_b32 s2, s4
676; GFX6-NEXT:    s_mov_b32 s3, s5
677; GFX6-NEXT:    s_mov_b32 s4, s6
678; GFX6-NEXT:    s_mov_b32 s5, s7
679; GFX6-NEXT:    s_mov_b32 s6, s8
680; GFX6-NEXT:    s_mov_b32 s7, s9
681; GFX6-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe
682; GFX6-NEXT:    s_waitcnt vmcnt(0)
683; GFX6-NEXT:    v_mov_b32_e32 v0, v1
684; GFX6-NEXT:    ; return to shader part epilog
685;
686; GFX8-LABEL: load_1d_f32_tfe_dmask_0:
687; GFX8:       ; %bb.0:
688; GFX8-NEXT:    s_mov_b32 s0, s2
689; GFX8-NEXT:    s_mov_b32 s1, s3
690; GFX8-NEXT:    s_mov_b32 s2, s4
691; GFX8-NEXT:    s_mov_b32 s3, s5
692; GFX8-NEXT:    s_mov_b32 s4, s6
693; GFX8-NEXT:    s_mov_b32 s5, s7
694; GFX8-NEXT:    s_mov_b32 s6, s8
695; GFX8-NEXT:    s_mov_b32 s7, s9
696; GFX8-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe
697; GFX8-NEXT:    s_waitcnt vmcnt(0)
698; GFX8-NEXT:    v_mov_b32_e32 v0, v1
699; GFX8-NEXT:    ; return to shader part epilog
700;
701; GFX10-LABEL: load_1d_f32_tfe_dmask_0:
702; GFX10:       ; %bb.0:
703; GFX10-NEXT:    s_mov_b32 s0, s2
704; GFX10-NEXT:    s_mov_b32 s1, s3
705; GFX10-NEXT:    s_mov_b32 s2, s4
706; GFX10-NEXT:    s_mov_b32 s3, s5
707; GFX10-NEXT:    s_mov_b32 s4, s6
708; GFX10-NEXT:    s_mov_b32 s5, s7
709; GFX10-NEXT:    s_mov_b32 s6, s8
710; GFX10-NEXT:    s_mov_b32 s7, s9
711; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
712; GFX10-NEXT:    s_waitcnt vmcnt(0)
713; GFX10-NEXT:    v_mov_b32_e32 v0, v1
714; GFX10-NEXT:    ; return to shader part epilog
715  %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
716  %v.err = extractvalue { float, i32 } %v, 1
717  %vv = bitcast i32 %v.err to float
718  ret float %vv
719}
720
721declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
722declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
723declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
724declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
725
726declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
727declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
728declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
729declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
730
731attributes #0 = { nounwind readonly }
732