• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s
2
3; --------------------------------------------------------------------
4; llvm.amdgcn.buffer.load
5; --------------------------------------------------------------------
6
7; CHECK-LABEL: @buffer_load_f32(
8; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
9; CHECK-NEXT: ret float %data
10define amdgpu_ps float @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
11  %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
12  ret float %data
13}
14
15; CHECK-LABEL: @buffer_load_v1f32(
16; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
17; CHECK-NEXT: ret <1 x float> %data
18define amdgpu_ps <1 x float> @buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
19  %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
20  ret <1 x float> %data
21}
22
23; CHECK-LABEL: @buffer_load_v2f32(
24; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
25; CHECK-NEXT: ret <2 x float> %data
26define amdgpu_ps <2 x float> @buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
27  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
28  ret <2 x float> %data
29}
30
31; CHECK-LABEL: @buffer_load_v4f32(
32; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
33; CHECK-NEXT: ret <4 x float> %data
34define amdgpu_ps <4 x float> @buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
35  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
36  ret <4 x float> %data
37}
38
39; CHECK-LABEL: @extract_elt0_buffer_load_v2f32(
40; CHECK: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
41; CHECK-NEXT: ret float %data
42define amdgpu_ps float @extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
43  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
44  %elt0 = extractelement <2 x float> %data, i32 0
45  ret float %elt0
46}
47
48; CHECK-LABEL: @extract_elt1_buffer_load_v2f32(
49; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
50; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
51; CHECK-NEXT: ret float %elt1
52define amdgpu_ps float @extract_elt1_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
53  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
54  %elt1 = extractelement <2 x float> %data, i32 1
55  ret float %elt1
56}
57
58; CHECK-LABEL: @extract_elt0_buffer_load_v4f32(
59; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
60; CHECK-NEXT: ret float %data
61define amdgpu_ps float @extract_elt0_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
62  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
63  %elt0 = extractelement <4 x float> %data, i32 0
64  ret float %elt0
65}
66
67; CHECK-LABEL: @extract_elt1_buffer_load_v4f32(
68; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
69; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
70; CHECK-NEXT: ret float %elt1
71define amdgpu_ps float @extract_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
72  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
73  %elt1 = extractelement <4 x float> %data, i32 1
74  ret float %elt1
75}
76
77; CHECK-LABEL: @extract_elt2_buffer_load_v4f32(
78; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
79; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
80; CHECK-NEXT: ret float %elt1
81define amdgpu_ps float @extract_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
82  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
83  %elt1 = extractelement <4 x float> %data, i32 2
84  ret float %elt1
85}
86
87; CHECK-LABEL: @extract_elt3_buffer_load_v4f32(
88; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
89; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
90; CHECK-NEXT: ret float %elt1
91define amdgpu_ps float @extract_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
92  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
93  %elt1 = extractelement <4 x float> %data, i32 3
94  ret float %elt1
95}
96
97; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32(
98; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
99; CHECK-NEXT: ret <2 x float>
100define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
101  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
102  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
103  ret <2 x float> %shuf
104}
105
106; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v4f32(
107; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
108; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
109; CHECK-NEXT: ret <2 x float> %shuf
110define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
111  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
112  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
113  ret <2 x float> %shuf
114}
115
116; CHECK-LABEL: @extract_elt2_elt3_buffer_load_v4f32(
117; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
118; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
119; CHECK-NEXT: ret <2 x float> %shuf
120define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
121  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
122  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
123  ret <2 x float> %shuf
124}
125
126; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32(
127; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
128; CHECK-NEXT: ret <3 x float> %data
129define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
130  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
131  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
132  ret <3 x float> %shuf
133}
134
135; CHECK-LABEL: @extract_elt1_elt2_elt3_buffer_load_v4f32(
136; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
137; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
138; CHECK-NEXT: ret <3 x float> %shuf
139define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
140  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
141  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
142  ret <3 x float> %shuf
143}
144
145; CHECK-LABEL: @extract_elt0_elt2_elt3_buffer_load_v4f32(
146; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
147; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
148; CHECK-NEXT: ret <3 x float> %shuf
149define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
150  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
151  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
152  ret <3 x float> %shuf
153}
154
155; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32_2(
156; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
157; CHECK-NEXT: %elt0 = extractelement <2 x float> %data, i32 0
158; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
159; CHECK-NEXT: %ins0 = insertvalue { float, float } undef, float %elt0, 0
160; CHECK-NEXT: %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
161; CHECK-NEXT: ret { float, float } %ins1
162define amdgpu_ps { float, float } @extract_elt0_elt1_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
163  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
164  %elt0 = extractelement <4 x float> %data, i32 0
165  %elt1 = extractelement <4 x float> %data, i32 1
166  %ins0 = insertvalue { float, float } undef, float %elt0, 0
167  %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
168  ret { float, float } %ins1
169}
170
171; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_2(
172; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
173; CHECK-NEXT: %elt0 = extractelement <3 x float> %data, i32 0
174; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1
175; CHECK-NEXT: %elt2 = extractelement <3 x float> %data, i32 2
176; CHECK-NEXT: %ins0 = insertvalue { float, float, float } undef, float %elt0, 0
177; CHECK-NEXT: %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1
178; CHECK-NEXT: %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2
179; CHECK-NEXT: ret { float, float, float } %ins2
180define amdgpu_ps { float, float, float } @extract_elt0_elt1_elt2_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
181  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
182  %elt0 = extractelement <4 x float> %data, i32 0
183  %elt1 = extractelement <4 x float> %data, i32 1
184  %elt2 = extractelement <4 x float> %data, i32 2
185  %ins0 = insertvalue { float, float, float } undef, float %elt0, 0
186  %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1
187  %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2
188  ret { float, float, float } %ins2
189}
190
191; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3(
192; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
193; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
194; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 undef, i32 1>
195; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
196define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
197  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
198  %elt0 = extractelement <4 x float> %data, i32 0
199  %elt2 = extractelement <4 x float> %data, i32 2
200  %ins0 = insertelement <2 x float> undef, float %elt0, i32 0
201  %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1
202  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 4, i32 1>
203  %ret = fadd <2 x float> %ins1, %shuf
204  ret <2 x float> %ret
205}
206
207; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4(
208; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
209; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
210; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 undef>
211; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
212; CHECK-NEXT: ret <2 x float> %ret
213define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
214  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
215  %elt0 = extractelement <4 x float> %data, i32 0
216  %elt2 = extractelement <4 x float> %data, i32 2
217  %ins0 = insertelement <2 x float> undef, float %elt0, i32 0
218  %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1
219  %shuf = shufflevector <4 x float> undef, <4 x float> %data, <2 x i32> <i32 5, i32 1>
220  %ret = fadd <2 x float> %ins1, %shuf
221  ret <2 x float> %ret
222}
223
224; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_5(
225; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
226; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 2, i32 2>
227; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
228; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
229define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_5(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
230  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
231  %elt2 = extractelement <4 x float> %data, i32 2
232  %ins0 = insertelement <2 x float> undef, float %elt2, i32 0
233  %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1
234  %shuf = shufflevector <4 x float> %data, <4 x float> %data, <2 x i32> <i32 0, i32 5>
235  %ret = fadd <2 x float> %ins1, %shuf
236  ret <2 x float> %ret
237}
238
239; CHECK-LABEL: @extract_elt0_buffer_load_v3f32(
240; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
241; CHECK-NEXT: ret float %data
242define amdgpu_ps float @extract_elt0_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
243  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
244  %elt0 = extractelement <3 x float> %data, i32 0
245  ret float %elt0
246}
247
248; CHECK-LABEL: @extract_elt1_buffer_load_v3f32(
249; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
250; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
251; CHECK-NEXT: ret float %elt1
252define amdgpu_ps float @extract_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
253  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
254  %elt1 = extractelement <3 x float> %data, i32 1
255  ret float %elt1
256}
257
258; CHECK-LABEL: @extract_elt2_buffer_load_v3f32(
259; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
260; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
261; CHECK-NEXT: ret float %elt1
262define amdgpu_ps float @extract_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
263  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
264  %elt1 = extractelement <3 x float> %data, i32 2
265  ret float %elt1
266}
267
268; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v3f32(
269; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
270; CHECK-NEXT: ret <2 x float>
271define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
272  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
273  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
274  ret <2 x float> %shuf
275}
276
277; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v3f32(
278; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
279; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
280; CHECK-NEXT: ret <2 x float> %shuf
281define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
282  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
283  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
284  ret <2 x float> %shuf
285}
286
287; CHECK-LABEL: @preserve_metadata_extract_elt0_buffer_load_v2f32(
288; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0
289; CHECK-NEXT: ret float %data
290define amdgpu_ps float @preserve_metadata_extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
291  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0
292  %elt0 = extractelement <2 x float> %data, i32 0
293  ret float %elt0
294}
295
296declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1
297declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1
298declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1
299declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1
300declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
301
302; --------------------------------------------------------------------
303; llvm.amdgcn.buffer.load.format
304; --------------------------------------------------------------------
305
306; CHECK-LABEL: @buffer_load_format_v1f32(
307; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true)
308; CHECK-NEXT: ret <1 x float> %data
309define amdgpu_ps <1 x float> @buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
310  %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true)
311  ret <1 x float> %data
312}
313
314; CHECK-LABEL: @extract_elt0_buffer_load_format_v2f32(
315; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false)
316; CHECK-NEXT: ret float %data
317define amdgpu_ps float @extract_elt0_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
318  %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false)
319  %elt0 = extractelement <2 x float> %data, i32 0
320  ret float %elt0
321}
322
323; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v3f32(
324; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
325; CHECK-NEXT: ret <2 x float> %data
326define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
327  %data = call <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
328  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
329  ret <2 x float> %shuf
330}
331
332; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v4f32(
333; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
334; CHECK-NEXT: ret <2 x float> %data
335define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
336  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
337  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
338  ret <2 x float> %shuf
339}
340
341; The initial insertion point is at the extractelement
342; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32(
343; CHECK-NEXT: %tmp = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
344; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
345; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <2 x double>
346; CHECK-NEXT: %tmp2 = extractelement <2 x double> %tmp1, i32 0
347; CHECK-NEXT: ret double %tmp2
348define double @extract01_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
349  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
350  %tmp1 = bitcast <4 x float> %tmp to <2 x double>
351  %tmp2 = extractelement <2 x double> %tmp1, i32 0
352  ret double %tmp2
353}
354
355; CHECK-LABEL: @extract0_bitcast_buffer_load_format_v4f32(
356; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
357; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
358; CHECK-NEXT: ret i32 %tmp2
359define i32 @extract0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
360  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
361  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
362  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
363  ret i32 %tmp2
364}
365
366; CHECK-LABEL: @extract_lo16_0_bitcast_buffer_load_format_v4f32(
367; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
368; CHECK-NEXT: %1 = bitcast float %tmp to i32
369; CHECK-NEXT: %tmp2 = trunc i32 %1 to i16
370; CHECK-NEXT: ret i16 %tmp2
371define i16 @extract_lo16_0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
372  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
373  %tmp1 = bitcast <4 x float> %tmp to <8 x i16>
374  %tmp2 = extractelement <8 x i16> %tmp1, i32 0
375  ret i16 %tmp2
376}
377
378declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
379declare <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32>, i32, i32, i1, i1) #1
380declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
381declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1
382declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
383
384; --------------------------------------------------------------------
385; llvm.amdgcn.raw.buffer.load
386; --------------------------------------------------------------------
387
388; CHECK-LABEL: @raw_buffer_load_f32(
389; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
390; CHECK-NEXT: ret float %data
391define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
392  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
393  ret float %data
394}
395
396; CHECK-LABEL: @raw_buffer_load_v1f32(
397; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
398; CHECK-NEXT: ret <1 x float> %data
399define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
400  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
401  ret <1 x float> %data
402}
403
404; CHECK-LABEL: @raw_buffer_load_v2f32(
405; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
406; CHECK-NEXT: ret <2 x float> %data
407define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
408  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
409  ret <2 x float> %data
410}
411
412; CHECK-LABEL: @raw_buffer_load_v4f32(
413; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
414; CHECK-NEXT: ret <4 x float> %data
415define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
416  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
417  ret <4 x float> %data
418}
419
420; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
421; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
422; CHECK-NEXT: ret float %data
423define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
424  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
425  %elt0 = extractelement <2 x float> %data, i32 0
426  ret float %elt0
427}
428
429; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
430; CHECK-NEXT: %1 = add i32 %ofs, 4
431; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
432; CHECK-NEXT: ret float %data
433define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
434  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
435  %elt1 = extractelement <2 x float> %data, i32 1
436  ret float %elt1
437}
438
439; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
440; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
441; CHECK-NEXT: ret float %data
442define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
443  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
444  %elt0 = extractelement <4 x float> %data, i32 0
445  ret float %elt0
446}
447
448; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
449; CHECK-NEXT: %1 = add i32 %ofs, 4
450; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
451; CHECK-NEXT: ret float %data
452define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
453  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
454  %elt1 = extractelement <4 x float> %data, i32 1
455  ret float %elt1
456}
457
458; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
459; CHECK-NEXT: %1 = add i32 %ofs, 8
460; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
461; CHECK-NEXT: ret float %data
462define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
463  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
464  %elt1 = extractelement <4 x float> %data, i32 2
465  ret float %elt1
466}
467
468; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
469; CHECK-NEXT: %1 = add i32 %ofs, 12
470; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
471; CHECK-NEXT: ret float %data
472define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
473  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
474  %elt1 = extractelement <4 x float> %data, i32 3
475  ret float %elt1
476}
477
478; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
479; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
480; CHECK-NEXT: ret <2 x float>
481define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
482  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
483  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
484  ret <2 x float> %shuf
485}
486
487; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
488; CHECK-NEXT: %1 = add i32 %ofs, 4
489; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
490; CHECK-NEXT: ret <2 x float> %data
491define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
492  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
493  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
494  ret <2 x float> %shuf
495}
496
497; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
498; CHECK-NEXT: %1 = add i32 %ofs, 8
499; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
500; CHECK-NEXT: ret <2 x float> %data
501define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
502  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
503  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
504  ret <2 x float> %shuf
505}
506
507; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
508; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
509; CHECK-NEXT: ret <3 x float> %data
510define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
511  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
512  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
513  ret <3 x float> %shuf
514}
515
516; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
517; CHECK-NEXT: %1 = add i32 %ofs, 4
518; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
519; CHECK-NEXT: ret <3 x float> %data
520define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
521  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
522  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
523  ret <3 x float> %shuf
524}
525
526; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
527; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
528; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
529; CHECK-NEXT: ret <3 x float> %shuf
530define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
531  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
532  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
533  ret <3 x float> %shuf
534}
535
536; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
537; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
538; CHECK-NEXT: ret float %data
539define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
540  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
541  %elt0 = extractelement <3 x float> %data, i32 0
542  ret float %elt0
543}
544
545; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
546; CHECK-NEXT: %1 = add i32 %ofs, 4
547; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
548; CHECK-NEXT: ret float %data
549define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
550  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
551  %elt1 = extractelement <3 x float> %data, i32 1
552  ret float %elt1
553}
554
555; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
556; CHECK-NEXT: %1 = add i32 %ofs, 8
557; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
558; CHECK-NEXT: ret float %data
559define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
560  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
561  %elt1 = extractelement <3 x float> %data, i32 2
562  ret float %elt1
563}
564
565; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
566; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
567; CHECK-NEXT: ret <2 x float>
568define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
569  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
570  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
571  ret <2 x float> %shuf
572}
573
574; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
575; CHECK-NEXT: %1 = add i32 %ofs, 4
576; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
577; CHECK-NEXT: ret <2 x float> %data
578define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
579  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
580  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
581  ret <2 x float> %shuf
582}
583
584; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
585; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
586; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
587; CHECK-NEXT: ret i32 %tmp2
588define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
589  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
590  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
591  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
592  ret i32 %tmp2
593}
594
595; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
596; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
597; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
598; CHECK-NEXT: ret float %tmp2
599define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
600  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
601  %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
602  %tmp2 = extractelement <4 x float> %tmp1, i32 0
603  ret float %tmp2
604}
605
606; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
607; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
608; CHECK-NEXT: ret float %data
609define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
610  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
611  %elt0 = extractelement <2 x float> %data, i32 0
612  ret float %elt0
613}
614
615declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #1
616declare <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32>, i32, i32, i32) #1
617declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #1
618declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #1
619declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
620
621declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #1
622
623; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f16(
624; CHECK: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
625; CHECK-NEXT: ret half %data
626define amdgpu_ps half @extract_elt0_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
627  %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
628  %elt0 = extractelement <2 x half> %data, i32 0
629  ret half %elt0
630}
631
632; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f16(
633; CHECK-NEXT: %1 = add i32 %ofs, 2
634; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
635; CHECK-NEXT: ret half %data
636define amdgpu_ps half @extract_elt1_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
637  %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
638  %elt1 = extractelement <2 x half> %data, i32 1
639  ret half %elt1
640}
641
642; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f16(
643; CHECK-NEXT: %1 = add i32 %ofs, 2
644; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
645; CHECK-NEXT: ret half %data
646define amdgpu_ps half @extract_elt1_raw_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
647  %data = call <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
648  %elt0 = extractelement <3 x half> %data, i32 1
649  ret half %elt0
650}
651
652; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f16(
653; CHECK-NEXT: %1 = add i32 %ofs, 2
654; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
655; CHECK-NEXT: ret half %data
656define amdgpu_ps half @extract_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
657  %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
658  %elt1 = extractelement <4 x half> %data, i32 1
659  ret half %elt1
660}
661
662; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f16(
663; CHECK-NEXT: %1 = add i32 %ofs, 6
664; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
665; CHECK-NEXT: ret half %data
666define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
667  %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
668  %elt1 = extractelement <4 x half> %data, i32 3
669  ret half %elt1
670}
671
672; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f16(
673; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
674; CHECK-NEXT: ret <2 x half>
675define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
676  %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
677  %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1>
678  ret <2 x half> %shuf
679}
680
681declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #1
682declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #1
683declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32) #1
684declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #1
685
686; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2i8(
687; CHECK: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
688; CHECK-NEXT: ret i8 %data
689define amdgpu_ps i8 @extract_elt0_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
690  %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
691  %elt0 = extractelement <2 x i8> %data, i32 0
692  ret i8 %elt0
693}
694
695; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2i8(
696; CHECK-NEXT: %1 = add i32 %ofs, 1
697; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
698; CHECK-NEXT: ret i8 %data
699define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
700  %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
701  %elt1 = extractelement <2 x i8> %data, i32 1
702  ret i8 %elt1
703}
704
705; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3i8(
706; CHECK-NEXT: %1 = add i32 %ofs, 1
707; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
708; CHECK-NEXT: ret i8 %data
709define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
710  %data = call <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
711  %elt0 = extractelement <3 x i8> %data, i32 1
712  ret i8 %elt0
713}
714
715; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4i8(
716; CHECK-NEXT: %1 = add i32 %ofs, 1
717; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
718; CHECK-NEXT: ret i8 %data
719define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
720  %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
721  %elt1 = extractelement <4 x i8> %data, i32 1
722  ret i8 %elt1
723}
724
725; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4i8(
726; CHECK-NEXT: %1 = add i32 %ofs, 3
727; CHECK-NEXT: %data = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 %sofs, i32 0)
728; CHECK-NEXT: ret i8 %data
729define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
730  %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
731  %elt1 = extractelement <4 x i8> %data, i32 3
732  ret i8 %elt1
733}
734
735; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4i8(
736; CHECK-NEXT: %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
737; CHECK-NEXT: ret <2 x i8>
738define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
739  %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
740  %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
741  ret <2 x i8> %shuf
742}
743
744declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #1
745declare <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32>, i32, i32, i32) #1
746declare <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32>, i32, i32, i32) #1
747declare <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32>, i32, i32, i32) #1
748
749; --------------------------------------------------------------------
750; llvm.amdgcn.s.buffer.load
751; --------------------------------------------------------------------
752
753; CHECK-LABEL: @s_buffer_load_f32(
754; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
755; CHECK-NEXT: ret float %data
756define amdgpu_ps float @s_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
757  %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
758  ret float %data
759}
760
761; CHECK-LABEL: @s_buffer_load_v2f32(
762; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
763; CHECK-NEXT: ret <2 x float> %data
764define amdgpu_ps <2 x float> @s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
765  %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
766  ret <2 x float> %data
767}
768
769; CHECK-LABEL: @s_buffer_load_v4f32(
770; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
771; CHECK-NEXT: ret <4 x float> %data
772define amdgpu_ps <4 x float> @s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
773  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
774  ret <4 x float> %data
775}
776
777; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f32(
778; CHECK: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
779; CHECK-NEXT: ret float %data
780define amdgpu_ps float @extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
781  %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
782  %elt0 = extractelement <2 x float> %data, i32 0
783  ret float %elt0
784}
785
786; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f32(
787; CHECK-NEXT: %1 = add i32 %ofs, 4
788; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0)
789; CHECK-NEXT: ret float %data
790define amdgpu_ps float @extract_elt1_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
791  %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
792  %elt1 = extractelement <2 x float> %data, i32 1
793  ret float %elt1
794}
795
796; CHECK-LABEL: @extract_elt0_s_buffer_load_v4f32(
797; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
798; CHECK-NEXT: ret float %data
799define amdgpu_ps float @extract_elt0_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
800  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
801  %elt0 = extractelement <4 x float> %data, i32 0
802  ret float %elt0
803}
804
805; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f32(
806; CHECK-NEXT: %1 = add i32 %ofs, 4
807; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0)
808; CHECK-NEXT: ret float %data
809define amdgpu_ps float @extract_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
810  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
811  %elt1 = extractelement <4 x float> %data, i32 1
812  ret float %elt1
813}
814
815; CHECK-LABEL: @extract_elt2_s_buffer_load_v4f32(
816; CHECK-NEXT: %1 = add i32 %ofs, 8
817; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0)
818; CHECK-NEXT: ret float %data
819define amdgpu_ps float @extract_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
820  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
821  %elt1 = extractelement <4 x float> %data, i32 2
822  ret float %elt1
823}
824
825; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f32(
826; CHECK-NEXT: %1 = add i32 %ofs, 12
827; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0)
828; CHECK-NEXT: ret float %data
829define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
830  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
831  %elt1 = extractelement <4 x float> %data, i32 3
832  ret float %elt1
833}
834
835; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f32(
836; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
837; CHECK-NEXT: ret <2 x float>
838define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
839  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
840  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
841  ret <2 x float> %shuf
842}
843
844; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v4f32(
845; CHECK-NEXT: %1 = add i32 %ofs, 4
846; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 0)
847; CHECK-NEXT: ret <2 x float> %data
848define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
849  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
850  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
851  ret <2 x float> %shuf
852}
853
854; CHECK-LABEL: @extract_elt2_elt3_s_buffer_load_v4f32(
855; CHECK-NEXT: %1 = add i32 %ofs, 8
856; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 0)
857; CHECK-NEXT: ret <2 x float> %data
858define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
859  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
860  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
861  ret <2 x float> %shuf
862}
863
864; CHECK-LABEL: @extract_elt0_elt1_elt2_s_buffer_load_v4f32(
865; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
866; CHECK-NEXT: ret <3 x float> %data
867define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
868  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
869  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
870  ret <3 x float> %shuf
871}
872
873; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32(
874; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
875; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
876; CHECK-NEXT: ret <3 x float> %shuf
877define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
878  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
879  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
880  ret <3 x float> %shuf
881}
882
883; CHECK-LABEL: @extract_elt0_s_buffer_load_v3f32(
884; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
885; CHECK-NEXT: ret float %data
886define amdgpu_ps float @extract_elt0_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
887  %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
888  %elt0 = extractelement <3 x float> %data, i32 0
889  ret float %elt0
890}
891
892; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f32(
893; CHECK-NEXT: %1 = add i32 %ofs, 4
894; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0)
895; CHECK-NEXT: ret float %data
896define amdgpu_ps float @extract_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
897  %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
898  %elt1 = extractelement <3 x float> %data, i32 1
899  ret float %elt1
900}
901
902; CHECK-LABEL: @extract_elt2_s_buffer_load_v3f32(
903; CHECK-NEXT: %1 = add i32 %ofs, 8
904; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %1, i32 0)
905; CHECK-NEXT: ret float %data
906define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
907  %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
908  %elt1 = extractelement <3 x float> %data, i32 2
909  ret float %elt1
910}
911
912; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v3f32(
913; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
914; CHECK-NEXT: ret <2 x float>
915define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
916  %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
917  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
918  ret <2 x float> %shuf
919}
920
921; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v3f32(
922; CHECK-NEXT: %1 = add i32 %ofs, 4
923; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %1, i32 0)
924; CHECK-NEXT: ret <2 x float> %data
925define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
926  %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
927  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
928  ret <2 x float> %shuf
929}
930
931; Do not trim to vec3 s_buffer_load in instcombine, as the load will most likely be widened
932; to vec4 anyway during lowering.
933; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32(
934; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
935; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
936; CHECK-NEXT: ret <3 x float> %shuf
937define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
938  %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
939  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
940  ret <3 x float> %shuf
941}
942
943; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4f32(
944; CHECK-NEXT: %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
945; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
946; CHECK-NEXT: ret i32 %tmp2
947define i32 @extract0_bitcast_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
948  %tmp = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
949  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
950  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
951  ret i32 %tmp2
952}
953
954; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4i32(
955; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 0)
956; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
957; CHECK-NEXT: ret float %tmp2
958define float @extract0_bitcast_s_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
959  %tmp = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 0)
960  %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
961  %tmp2 = extractelement <4 x float> %tmp1, i32 0
962  ret float %tmp2
963}
964
965; CHECK-LABEL: @preserve_metadata_extract_elt0_s_buffer_load_v2f32(
966; CHECK-NEXT: %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0
967; CHECK-NEXT: ret float %data
968define amdgpu_ps float @preserve_metadata_extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
969  %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0
970  %elt0 = extractelement <2 x float> %data, i32 0
971  ret float %elt0
972}
973
974declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
975declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32) #1
976declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32) #1
977declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32) #1
978declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) #1
979
980; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f16(
981; CHECK: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
982; CHECK-NEXT: ret half %data
983define amdgpu_ps half @extract_elt0_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
984  %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
985  %elt0 = extractelement <2 x half> %data, i32 0
986  ret half %elt0
987}
988
989; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f16(
990; CHECK-NEXT: %1 = add i32 %ofs, 2
991; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0)
992; CHECK-NEXT: ret half %data
993define amdgpu_ps half @extract_elt1_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
994  %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
995  %elt1 = extractelement <2 x half> %data, i32 1
996  ret half %elt1
997}
998
999; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f16(
1000; CHECK-NEXT: %1 = add i32 %ofs, 2
1001; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0)
1002; CHECK-NEXT: ret half %data
1003define amdgpu_ps half @extract_elt1_s_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1004  %data = call <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1005  %elt1 = extractelement <3 x half> %data, i32 1
1006  ret half %elt1
1007}
1008
1009; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f16(
1010; CHECK-NEXT: %1 = add i32 %ofs, 2
1011; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0)
1012; CHECK-NEXT: ret half %data
1013define amdgpu_ps half @extract_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1014  %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1015  %elt1 = extractelement <4 x half> %data, i32 1
1016  ret half %elt1
1017}
1018
1019
1020; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f16(
1021; CHECK-NEXT: %1 = add i32 %ofs, 6
1022; CHECK-NEXT: %data = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> %rsrc, i32 %1, i32 0)
1023; CHECK-NEXT: ret half %data
1024define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1025  %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1026  %elt1 = extractelement <4 x half> %data, i32 3
1027  ret half %elt1
1028}
1029
1030; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f16(
1031; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1032; CHECK-NEXT: ret <2 x half>
1033define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1034  %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1035  %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1>
1036  ret <2 x half> %shuf
1037}
1038
1039declare half @llvm.amdgcn.s.buffer.load.f16(<4 x i32>, i32, i32) #1
1040declare <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32>, i32, i32) #1
1041declare <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32>, i32, i32) #1
1042declare <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32>, i32, i32) #1
1043
1044; CHECK-LABEL: @extract_elt0_s_buffer_load_v2i8(
1045; CHECK: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1046; CHECK-NEXT: ret i8 %data
1047define amdgpu_ps i8 @extract_elt0_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1048  %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1049  %elt0 = extractelement <2 x i8> %data, i32 0
1050  ret i8 %elt0
1051}
1052
1053; CHECK-LABEL: @extract_elt1_s_buffer_load_v2i8(
1054; CHECK-NEXT: %1 = add i32 %ofs, 1
1055; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0)
1056; CHECK-NEXT: ret i8 %data
1057define amdgpu_ps i8 @extract_elt1_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1058  %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1059  %elt1 = extractelement <2 x i8> %data, i32 1
1060  ret i8 %elt1
1061}
1062
1063; CHECK-LABEL: @extract_elt1_s_buffer_load_v3i8(
1064; CHECK-NEXT: %1 = add i32 %ofs, 1
1065; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0)
1066; CHECK-NEXT: ret i8 %data
1067define amdgpu_ps i8 @extract_elt1_s_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1068  %data = call <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1069  %elt1 = extractelement <3 x i8> %data, i32 1
1070  ret i8 %elt1
1071}
1072
1073; CHECK-LABEL: @extract_elt1_s_buffer_load_v4i8(
1074; CHECK-NEXT: %1 = add i32 %ofs, 1
1075; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0)
1076; CHECK-NEXT: ret i8 %data
1077define amdgpu_ps i8 @extract_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1078  %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1079  %elt1 = extractelement <4 x i8> %data, i32 1
1080  ret i8 %elt1
1081}
1082
1083; CHECK-LABEL: @extract_elt3_s_buffer_load_v4i8(
1084; CHECK-NEXT: %1 = add i32 %ofs, 3
1085; CHECK-NEXT: %data = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %rsrc, i32 %1, i32 0)
1086; CHECK-NEXT: ret i8 %data
1087define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1088  %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1089  %elt1 = extractelement <4 x i8> %data, i32 3
1090  ret i8 %elt1
1091}
1092
1093; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4i8(
1094; CHECK-NEXT: %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1095; CHECK-NEXT: ret <2 x i8>
1096define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1097  %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1098  %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
1099  ret <2 x i8> %shuf
1100}
1101
1102declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32) #1
1103declare <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32>, i32, i32) #1
1104declare <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32>, i32, i32) #1
1105declare <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32>, i32, i32) #1
1106
1107; --------------------------------------------------------------------
1108; llvm.amdgcn.raw.buffer.load.format
1109; --------------------------------------------------------------------
1110
1111; CHECK-LABEL: @raw_buffer_load_format_f32(
1112; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1113; CHECK-NEXT: ret float %data
1114define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1115  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1116  ret float %data
1117}
1118
1119; CHECK-LABEL: @raw_buffer_load_format_v1f32(
1120; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1121; CHECK-NEXT: ret <1 x float> %data
1122define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1123  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1124  ret <1 x float> %data
1125}
1126
1127; CHECK-LABEL: @raw_buffer_load_format_v2f32(
1128; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1129; CHECK-NEXT: ret <2 x float> %data
1130define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1131  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1132  ret <2 x float> %data
1133}
1134
1135; CHECK-LABEL: @raw_buffer_load_format_v4f32(
1136; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1137; CHECK-NEXT: ret <4 x float> %data
1138define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1139  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1140  ret <4 x float> %data
1141}
1142
1143; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
1144; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1145; CHECK-NEXT: ret float %data
1146define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1147  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1148  %elt0 = extractelement <2 x float> %data, i32 0
1149  ret float %elt0
1150}
1151
1152; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
1153; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1154; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1155; CHECK-NEXT: ret float %elt1
1156define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1157  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1158  %elt1 = extractelement <2 x float> %data, i32 1
1159  ret float %elt1
1160}
1161
1162; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
1163; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1164; CHECK-NEXT: ret float %data
1165define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1166  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1167  %elt0 = extractelement <4 x float> %data, i32 0
1168  ret float %elt0
1169}
1170
1171; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
1172; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1173; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1174; CHECK-NEXT: ret float %elt1
1175define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1176  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1177  %elt1 = extractelement <4 x float> %data, i32 1
1178  ret float %elt1
1179}
1180
1181; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
1182; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1183; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
1184; CHECK-NEXT: ret float %elt1
1185define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1186  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1187  %elt1 = extractelement <4 x float> %data, i32 2
1188  ret float %elt1
1189}
1190
1191; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
1192; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1193; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
1194; CHECK-NEXT: ret float %elt1
1195define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1196  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1197  %elt1 = extractelement <4 x float> %data, i32 3
1198  ret float %elt1
1199}
1200
1201; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
1202; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1203; CHECK-NEXT: ret <2 x float>
1204define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1205  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1206  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1207  ret <2 x float> %shuf
1208}
1209
1210; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
1211; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1212; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
1213; CHECK-NEXT: ret <2 x float> %shuf
1214define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1215  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1216  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
1217  ret <2 x float> %shuf
1218}
1219
1220; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
1221; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1222; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1223; CHECK-NEXT: ret <2 x float> %shuf
1224define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1225  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1226  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1227  ret <2 x float> %shuf
1228}
1229
1230; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
1231; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1232; CHECK-NEXT: ret <3 x float> %data
1233define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1234  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1235  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1236  ret <3 x float> %shuf
1237}
1238
1239; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
1240; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1241; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
1242; CHECK-NEXT: ret <3 x float> %shuf
1243define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1244  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1245  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
1246  ret <3 x float> %shuf
1247}
1248
1249; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
1250; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1251; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
1252; CHECK-NEXT: ret <3 x float> %shuf
1253define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1254  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1255  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
1256  ret <3 x float> %shuf
1257}
1258
1259; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
1260; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1261; CHECK-NEXT: ret float %data
1262define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1263  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1264  %elt0 = extractelement <3 x float> %data, i32 0
1265  ret float %elt0
1266}
1267
1268; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
1269; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1270; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1271; CHECK-NEXT: ret float %elt1
1272define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1273  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1274  %elt1 = extractelement <3 x float> %data, i32 1
1275  ret float %elt1
1276}
1277
1278; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
1279; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1280; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
1281; CHECK-NEXT: ret float %elt1
1282define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1283  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1284  %elt1 = extractelement <3 x float> %data, i32 2
1285  ret float %elt1
1286}
1287
1288; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
1289; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1290; CHECK-NEXT: ret <2 x float>
1291define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1292  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1293  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
1294  ret <2 x float> %shuf
1295}
1296
1297; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
1298; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1299; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
1300; CHECK-NEXT: ret <2 x float> %shuf
1301define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1302  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1303  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
1304  ret <2 x float> %shuf
1305}
1306
1307; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
1308; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1309; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
1310; CHECK-NEXT: ret i32 %tmp2
1311define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1312  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1313  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
1314  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
1315  ret i32 %tmp2
1316}
1317
1318; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
1319; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1320; CHECK-NEXT: ret float %tmp
1321define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1322  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1323  %tmp1 = extractelement <4 x float> %tmp, i32 0
1324  ret float %tmp1
1325}
1326
1327; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
1328; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1329; CHECK-NEXT: ret float %data
1330define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1331  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1332  %elt0 = extractelement <2 x float> %data, i32 0
1333  ret float %elt0
1334}
1335
1336declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #1
1337declare <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32) #1
1338declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #1
1339declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #1
1340declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1
1341
1342; --------------------------------------------------------------------
1343; llvm.amdgcn.struct.buffer.load
1344; --------------------------------------------------------------------
1345
1346; CHECK-LABEL: @struct_buffer_load_f32(
1347; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1348; CHECK-NEXT: ret float %data
1349define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1350  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1351  ret float %data
1352}
1353
1354; CHECK-LABEL: @struct_buffer_load_v1f32(
1355; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1356; CHECK-NEXT: ret <1 x float> %data
1357define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1358  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1359  ret <1 x float> %data
1360}
1361
1362; CHECK-LABEL: @struct_buffer_load_v2f32(
1363; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1364; CHECK-NEXT: ret <2 x float> %data
1365define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1366  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1367  ret <2 x float> %data
1368}
1369
1370; CHECK-LABEL: @struct_buffer_load_v4f32(
1371; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1372; CHECK-NEXT: ret <4 x float> %data
1373define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1374  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1375  ret <4 x float> %data
1376}
1377
1378; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
1379; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1380; CHECK-NEXT: ret float %data
1381define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1382  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1383  %elt0 = extractelement <2 x float> %data, i32 0
1384  ret float %elt0
1385}
1386
1387; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
1388; CHECK-NEXT: %1 = add i32 %ofs, 4
1389; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1390; CHECK-NEXT: ret float %data
1391define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1392  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1393  %elt1 = extractelement <2 x float> %data, i32 1
1394  ret float %elt1
1395}
1396
1397; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
1398; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1399; CHECK-NEXT: ret float %data
1400define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1401  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1402  %elt0 = extractelement <4 x float> %data, i32 0
1403  ret float %elt0
1404}
1405
1406; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
1407; CHECK-NEXT: %1 = add i32 %ofs, 4
1408; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1409; CHECK-NEXT: ret float %data
1410define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1411  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1412  %elt1 = extractelement <4 x float> %data, i32 1
1413  ret float %elt1
1414}
1415
1416; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
1417; CHECK-NEXT: %1 = add i32 %ofs, 8
1418; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1419; CHECK-NEXT: ret float %data
1420define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1421  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1422  %elt1 = extractelement <4 x float> %data, i32 2
1423  ret float %elt1
1424}
1425
1426; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
1427; CHECK-NEXT: %1 = add i32 %ofs, 12
1428; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1429; CHECK-NEXT: ret float %data
1430define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1431  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1432  %elt1 = extractelement <4 x float> %data, i32 3
1433  ret float %elt1
1434}
1435
1436; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
1437; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1438; CHECK-NEXT: ret <2 x float>
1439define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1440  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1441  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1442  ret <2 x float> %shuf
1443}
1444
1445; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
1446; CHECK-NEXT: %1 = add i32 %ofs, 4
1447; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1448; CHECK-NEXT: ret <2 x float> %data
1449define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1450  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1451  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
1452  ret <2 x float> %shuf
1453}
1454
1455; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
1456; CHECK-NEXT: %1 = add i32 %ofs, 8
1457; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1458; CHECK-NEXT: ret <2 x float> %data
1459define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1460  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1461  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1462  ret <2 x float> %shuf
1463}
1464
1465; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
1466; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1467; CHECK-NEXT: ret <3 x float> %data
1468define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1469  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1470  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1471  ret <3 x float> %shuf
1472}
1473
1474; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
1475; CHECK-NEXT: %1 = add i32 %ofs, 4
1476; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1477; CHECK-NEXT: ret <3 x float> %data
1478define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1479  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1480  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
1481  ret <3 x float> %shuf
1482}
1483
1484; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
1485; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1486; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
1487; CHECK-NEXT: ret <3 x float> %shuf
1488define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1489  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1490  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
1491  ret <3 x float> %shuf
1492}
1493
1494; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
1495; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1496; CHECK-NEXT: ret float %data
1497define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1498  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1499  %elt0 = extractelement <3 x float> %data, i32 0
1500  ret float %elt0
1501}
1502
1503; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
1504; CHECK-NEXT: %1 = add i32 %ofs, 4
1505; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1506; CHECK-NEXT: ret float %data
1507define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1508  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1509  %elt1 = extractelement <3 x float> %data, i32 1
1510  ret float %elt1
1511}
1512
1513; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
1514; CHECK-NEXT: %1 = add i32 %ofs, 8
1515; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1516; CHECK-NEXT: ret float %data
1517define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1518  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1519  %elt1 = extractelement <3 x float> %data, i32 2
1520  ret float %elt1
1521}
1522
1523; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
1524; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1525; CHECK-NEXT: ret <2 x float>
1526define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1527  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1528  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
1529  ret <2 x float> %shuf
1530}
1531
1532; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
1533; CHECK-NEXT: %1 = add i32 %ofs, 4
1534; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1535; CHECK-NEXT: ret <2 x float> %data
1536define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1537  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1538  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
1539  ret <2 x float> %shuf
1540}
1541
1542; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
1543; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1544; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
1545; CHECK-NEXT: ret i32 %tmp2
1546define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1547  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1548  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
1549  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
1550  ret i32 %tmp2
1551}
1552
1553; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
1554; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1555; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
1556; CHECK-NEXT: ret float %tmp2
1557define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1558  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1559  %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
1560  %tmp2 = extractelement <4 x float> %tmp1, i32 0
1561  ret float %tmp2
1562}
1563
1564; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
1565; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1566; CHECK-NEXT: ret float %data
1567define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1568  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1569  %elt0 = extractelement <2 x float> %data, i32 0
1570  ret float %elt0
1571}
1572
1573declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
1574declare <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32>, i32, i32, i32, i32) #1
1575declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
1576declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
1577declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
1578
1579declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
1580
1581; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f16(
1582; CHECK: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1583; CHECK-NEXT: ret half %data
1584define amdgpu_ps half @extract_elt0_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1585  %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1586  %elt0 = extractelement <2 x half> %data, i32 0
1587  ret half %elt0
1588}
1589
1590; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f16(
1591; CHECK-NEXT: %1 = add i32 %ofs, 2
1592; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1593; CHECK-NEXT: ret half %data
1594define amdgpu_ps half @extract_elt1_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1595  %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1596  %elt1 = extractelement <2 x half> %data, i32 1
1597  ret half %elt1
1598}
1599
1600; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f16(
1601; CHECK-NEXT: %1 = add i32 %ofs, 2
1602; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1603; CHECK-NEXT: ret half %data
1604define amdgpu_ps half @extract_elt1_struct_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1605  %data = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1606  %elt1 = extractelement <3 x half> %data, i32 1
1607  ret half %elt1
1608}
1609
1610; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f16(
1611; CHECK-NEXT: %1 = add i32 %ofs, 2
1612; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1613; CHECK-NEXT: ret half %data
1614define amdgpu_ps half @extract_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1615  %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1616  %elt1 = extractelement <4 x half> %data, i32 1
1617  ret half %elt1
1618}
1619
1620; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f16(
1621; CHECK-NEXT: %1 = add i32 %ofs, 6
1622; CHECK-NEXT: %data = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1623; CHECK-NEXT: ret half %data
1624define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1625  %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1626  %elt1 = extractelement <4 x half> %data, i32 3
1627  ret half %elt1
1628}
1629
1630; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f16(
1631; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1632; CHECK-NEXT: ret <2 x half>
1633define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1634  %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1635  %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1>
1636  ret <2 x half> %shuf
1637}
1638
1639declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
1640declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
1641declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
1642declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
1643
1644; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2i8(
1645; CHECK: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1646; CHECK-NEXT: ret i8 %data
1647define amdgpu_ps i8 @extract_elt0_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1648  %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1649  %elt0 = extractelement <2 x i8> %data, i32 0
1650  ret i8 %elt0
1651}
1652
1653; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2i8(
1654; CHECK-NEXT: %1 = add i32 %ofs, 1
1655; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1656; CHECK-NEXT: ret i8 %data
1657define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1658  %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1659  %elt1 = extractelement <2 x i8> %data, i32 1
1660  ret i8 %elt1
1661}
1662
1663; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3i8(
1664; CHECK-NEXT: %1 = add i32 %ofs, 1
1665; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1666; CHECK-NEXT: ret i8 %data
1667define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1668  %data = call <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1669  %elt1 = extractelement <3 x i8> %data, i32 1
1670  ret i8 %elt1
1671}
1672
1673; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4i8(
1674; CHECK-NEXT: %1 = add i32 %ofs, 1
1675; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1676; CHECK-NEXT: ret i8 %data
1677define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1678  %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1679  %elt1 = extractelement <4 x i8> %data, i32 1
1680  ret i8 %elt1
1681}
1682
1683; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4i8(
1684; CHECK-NEXT: %1 = add i32 %ofs, 3
1685; CHECK-NEXT: %data = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %idx, i32 %1, i32 %sofs, i32 0)
1686; CHECK-NEXT: ret i8 %data
1687define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1688  %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1689  %elt1 = extractelement <4 x i8> %data, i32 3
1690  ret i8 %elt1
1691}
1692
1693; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4i8(
1694; CHECK-NEXT: %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1695; CHECK-NEXT: ret <2 x i8>
1696define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1697  %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1698  %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
1699  ret <2 x i8> %shuf
1700}
1701
1702declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #1
1703declare <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32>, i32, i32, i32, i32) #1
1704declare <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32>, i32, i32, i32, i32) #1
1705declare <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32>, i32, i32, i32, i32) #1
1706
1707; --------------------------------------------------------------------
1708; llvm.amdgcn.struct.buffer.load.format
1709; --------------------------------------------------------------------
1710
1711; CHECK-LABEL: @struct_buffer_load_format_f32(
1712; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1713; CHECK-NEXT: ret float %data
1714define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1715  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1716  ret float %data
1717}
1718
1719; CHECK-LABEL: @struct_buffer_load_format_v1f32(
1720; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1721; CHECK-NEXT: ret <1 x float> %data
1722define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1723  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1724  ret <1 x float> %data
1725}
1726
1727; CHECK-LABEL: @struct_buffer_load_format_v2f32(
1728; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1729; CHECK-NEXT: ret <2 x float> %data
1730define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1731  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1732  ret <2 x float> %data
1733}
1734
1735; CHECK-LABEL: @struct_buffer_load_format_v4f32(
1736; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1737; CHECK-NEXT: ret <4 x float> %data
1738define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1739  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1740  ret <4 x float> %data
1741}
1742
1743; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
1744; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1745; CHECK-NEXT: ret float %data
1746define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1747  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1748  %elt0 = extractelement <2 x float> %data, i32 0
1749  ret float %elt0
1750}
1751
1752; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
1753; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1754; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1755; CHECK-NEXT: ret float %elt1
1756define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1757  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1758  %elt1 = extractelement <2 x float> %data, i32 1
1759  ret float %elt1
1760}
1761
1762; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
1763; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1764; CHECK-NEXT: ret float %data
1765define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1766  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1767  %elt0 = extractelement <4 x float> %data, i32 0
1768  ret float %elt0
1769}
1770
1771; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
1772; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1773; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1774; CHECK-NEXT: ret float %elt1
1775define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1776  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1777  %elt1 = extractelement <4 x float> %data, i32 1
1778  ret float %elt1
1779}
1780
1781; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
1782; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1783; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
1784; CHECK-NEXT: ret float %elt1
1785define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1786  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1787  %elt1 = extractelement <4 x float> %data, i32 2
1788  ret float %elt1
1789}
1790
1791; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
1792; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1793; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
1794; CHECK-NEXT: ret float %elt1
1795define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1796  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1797  %elt1 = extractelement <4 x float> %data, i32 3
1798  ret float %elt1
1799}
1800
1801; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
1802; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1803; CHECK-NEXT: ret <2 x float>
1804define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1805  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1806  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1807  ret <2 x float> %shuf
1808}
1809
1810; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
1811; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1812; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
1813; CHECK-NEXT: ret <2 x float> %shuf
1814define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1815  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1816  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
1817  ret <2 x float> %shuf
1818}
1819
1820; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
1821; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1822; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1823; CHECK-NEXT: ret <2 x float> %shuf
1824define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1825  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1826  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1827  ret <2 x float> %shuf
1828}
1829
1830; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
1831; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1832; CHECK-NEXT: ret <3 x float> %data
1833define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1834  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1835  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1836  ret <3 x float> %shuf
1837}
1838
1839; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
1840; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1841; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
1842; CHECK-NEXT: ret <3 x float> %shuf
1843define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1844  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1845  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
1846  ret <3 x float> %shuf
1847}
1848
1849; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
1850; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1851; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
1852; CHECK-NEXT: ret <3 x float> %shuf
1853define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1854  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1855  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
1856  ret <3 x float> %shuf
1857}
1858
1859; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
1860; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1861; CHECK-NEXT: ret float %data
1862define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1863  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1864  %elt0 = extractelement <3 x float> %data, i32 0
1865  ret float %elt0
1866}
1867
1868; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
1869; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1870; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1871; CHECK-NEXT: ret float %elt1
1872define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1873  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1874  %elt1 = extractelement <3 x float> %data, i32 1
1875  ret float %elt1
1876}
1877
1878; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
1879; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1880; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
1881; CHECK-NEXT: ret float %elt1
1882define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1883  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1884  %elt1 = extractelement <3 x float> %data, i32 2
1885  ret float %elt1
1886}
1887
1888; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
1889; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1890; CHECK-NEXT: ret <2 x float>
1891define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1892  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1893  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
1894  ret <2 x float> %shuf
1895}
1896
1897; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
1898; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1899; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
1900; CHECK-NEXT: ret <2 x float> %shuf
1901define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1902  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1903  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
1904  ret <2 x float> %shuf
1905}
1906
1907; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
1908; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1909; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
1910; CHECK-NEXT: ret i32 %tmp2
1911define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1912  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1913  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
1914  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
1915  ret i32 %tmp2
1916}
1917
1918; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
1919; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1920; CHECK-NEXT: ret float %data
1921define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1922  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1923  %elt0 = extractelement <2 x float> %data, i32 0
1924  ret float %elt0
1925}
1926
1927declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #1
1928declare <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32, i32) #1
1929declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #1
1930declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #1
1931declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
1932
1933declare <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32, i32) #1
1934
1935; --------------------------------------------------------------------
1936; llvm.amdgcn.raw.tbuffer.load
1937; --------------------------------------------------------------------
1938
1939; CHECK-LABEL: @raw_tbuffer_load_f32(
1940; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1941; CHECK-NEXT: ret float %data
1942define amdgpu_ps float @raw_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
1943  %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1944  ret float %data
1945}
1946
1947; CHECK-LABEL: @raw_tbuffer_load_v2f32(
1948; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1949; CHECK-NEXT: ret <2 x float> %data
1950define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
1951  %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1952  ret <2 x float> %data
1953}
1954
1955; CHECK-LABEL: @raw_tbuffer_load_v4f32(
1956; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1957; CHECK-NEXT: ret <4 x float> %data
1958define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
1959  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1960  ret <4 x float> %data
1961}
1962
1963; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v2f32(
1964; CHECK: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1965; CHECK-NEXT: ret float %data
1966define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
1967  %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1968  %elt0 = extractelement <2 x float> %data, i32 0
1969  ret float %elt0
1970}
1971
1972; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v2f32(
1973; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1974; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1975; CHECK-NEXT: ret float %elt1
1976define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
1977  %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1978  %elt1 = extractelement <2 x float> %data, i32 1
1979  ret float %elt1
1980}
1981
1982; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f32(
1983; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1984; CHECK-NEXT: ret float %data
1985define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
1986  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1987  %elt0 = extractelement <4 x float> %data, i32 0
1988  ret float %elt0
1989}
1990
1991; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f32(
1992; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1993; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
1994; CHECK-NEXT: ret float %elt1
1995define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
1996  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1997  %elt1 = extractelement <4 x float> %data, i32 1
1998  ret float %elt1
1999}
2000
2001; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f32(
2002; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2003; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
2004; CHECK-NEXT: ret float %elt1
2005define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2006  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2007  %elt1 = extractelement <4 x float> %data, i32 2
2008  ret float %elt1
2009}
2010
2011; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f32(
2012; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2013; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
2014; CHECK-NEXT: ret float %elt1
2015define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2016  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2017  %elt1 = extractelement <4 x float> %data, i32 3
2018  ret float %elt1
2019}
2020
2021; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v4f32(
2022; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2023; CHECK-NEXT: ret <2 x float>
2024define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2025  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2026  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2027  ret <2 x float> %shuf
2028}
2029
2030; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32(
2031; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2032; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2033; CHECK-NEXT: ret <2 x float> %shuf
2034define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2035  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2036  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2037  ret <2 x float> %shuf
2038}
2039
2040; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32(
2041; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2042; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
2043; CHECK-NEXT: ret <2 x float> %shuf
2044define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2045  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2046  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
2047  ret <2 x float> %shuf
2048}
2049
2050; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(
2051; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2052; CHECK-NEXT: ret <3 x float> %data
2053define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2054  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2055  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2056  ret <3 x float> %shuf
2057}
2058
2059; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(
2060; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2061; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2062; CHECK-NEXT: ret <3 x float> %shuf
2063define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2064  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2065  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2066  ret <3 x float> %shuf
2067}
2068
2069; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(
2070; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2071; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
2072; CHECK-NEXT: ret <3 x float> %shuf
2073define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2074  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2075  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
2076  ret <3 x float> %shuf
2077}
2078
2079; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v3f32(
2080; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2081; CHECK-NEXT: ret float %data
2082define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2083  %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2084  %elt0 = extractelement <3 x float> %data, i32 0
2085  ret float %elt0
2086}
2087
2088; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v3f32(
2089; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2090; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
2091; CHECK-NEXT: ret float %elt1
2092define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2093  %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2094  %elt1 = extractelement <3 x float> %data, i32 1
2095  ret float %elt1
2096}
2097
2098; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v3f32(
2099; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2100; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
2101; CHECK-NEXT: ret float %elt1
2102define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2103  %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2104  %elt1 = extractelement <3 x float> %data, i32 2
2105  ret float %elt1
2106}
2107
2108; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v3f32(
2109; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2110; CHECK-NEXT: ret <2 x float>
2111define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2112  %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2113  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
2114  ret <2 x float> %shuf
2115}
2116
2117; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32(
2118; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2119; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2120; CHECK-NEXT: ret <2 x float> %shuf
2121define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2122  %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2123  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2124  ret <2 x float> %shuf
2125}
2126
2127; CHECK-LABEL: @extract0_bitcast_raw_tbuffer_load_v4f32(
2128; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2129; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
2130; CHECK-NEXT: ret i32 %tmp2
2131define i32 @extract0_bitcast_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2132  %tmp = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2133  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
2134  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
2135  ret i32 %tmp2
2136}
2137
2138; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(
2139; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
2140; CHECK-NEXT: ret float %data
2141define amdgpu_ps float @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2142  %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
2143  %elt0 = extractelement <2 x float> %data, i32 0
2144  ret float %elt0
2145}
2146
2147declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
2148declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
2149declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
2150declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
2151
2152declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
2153
2154; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
2155; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2156; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 3
2157; CHECK-NEXT: ret half %elt1
2158define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2159  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2160  %elt1 = extractelement <4 x half> %data, i32 3
2161  ret half %elt1
2162}
2163
2164; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
2165; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2166; CHECK-NEXT: %elt1 = extractelement <3 x half> %data, i32 2
2167; CHECK-NEXT: ret half %elt1
2168define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2169  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2170  %elt1 = extractelement <4 x half> %data, i32 2
2171  ret half %elt1
2172}
2173
2174; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
2175; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2176; CHECK-NEXT: %elt1 = extractelement <2 x half> %data, i32 1
2177; CHECK-NEXT: ret half %elt1
2178define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2179  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2180  %elt1 = extractelement <4 x half> %data, i32 1
2181  ret half %elt1
2182}
2183
2184; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
2185; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2186; CHECK-NEXT: ret half %data
2187define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
2188  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
2189  %elt1 = extractelement <4 x half> %data, i32 0
2190  ret half %elt1
2191}
2192
2193declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
2194declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
2195declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
2196declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
2197
2198; --------------------------------------------------------------------
2199; llvm.amdgcn.struct.tbuffer.load
2200; --------------------------------------------------------------------
2201
2202; CHECK-LABEL: @struct_tbuffer_load_f32(
2203; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2204; CHECK-NEXT: ret float %data
2205define amdgpu_ps float @struct_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2206  %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2207  ret float %data
2208}
2209
2210; CHECK-LABEL: @struct_tbuffer_load_v2f32(
2211; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2212; CHECK-NEXT: ret <2 x float> %data
2213define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2214  %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2215  ret <2 x float> %data
2216}
2217
2218; CHECK-LABEL: @struct_tbuffer_load_v4f32(
2219; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2220; CHECK-NEXT: ret <4 x float> %data
2221define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2222  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2223  ret <4 x float> %data
2224}
2225
2226; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v2f32(
2227; CHECK: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2228; CHECK-NEXT: ret float %data
2229define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2230  %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2231  %elt0 = extractelement <2 x float> %data, i32 0
2232  ret float %elt0
2233}
2234
2235; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v2f32(
2236; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2237; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
2238; CHECK-NEXT: ret float %elt1
2239define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2240  %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2241  %elt1 = extractelement <2 x float> %data, i32 1
2242  ret float %elt1
2243}
2244
2245; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v4f32(
2246; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2247; CHECK-NEXT: ret float %data
2248define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2249  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2250  %elt0 = extractelement <4 x float> %data, i32 0
2251  ret float %elt0
2252}
2253
2254; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v4f32(
2255; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2256; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
2257; CHECK-NEXT: ret float %elt1
2258define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2259  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2260  %elt1 = extractelement <4 x float> %data, i32 1
2261  ret float %elt1
2262}
2263
2264; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v4f32(
2265; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2266; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
2267; CHECK-NEXT: ret float %elt1
2268define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2269  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2270  %elt1 = extractelement <4 x float> %data, i32 2
2271  ret float %elt1
2272}
2273
2274; CHECK-LABEL: @extract_elt3_struct_tbuffer_load_v4f32(
2275; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2276; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
2277; CHECK-NEXT: ret float %elt1
2278define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2279  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2280  %elt1 = extractelement <4 x float> %data, i32 3
2281  ret float %elt1
2282}
2283
2284; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v4f32(
2285; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2286; CHECK-NEXT: ret <2 x float>
2287define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2288  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2289  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2290  ret <2 x float> %shuf
2291}
2292
2293; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32(
2294; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2295; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2296; CHECK-NEXT: ret <2 x float> %shuf
2297define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2298  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2299  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2300  ret <2 x float> %shuf
2301}
2302
2303; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32(
2304; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2305; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
2306; CHECK-NEXT: ret <2 x float> %shuf
2307define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2308  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2309  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
2310  ret <2 x float> %shuf
2311}
2312
2313; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(
2314; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2315; CHECK-NEXT: ret <3 x float> %data
2316define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2317  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2318  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2319  ret <3 x float> %shuf
2320}
2321
2322; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(
2323; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2324; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2325; CHECK-NEXT: ret <3 x float> %shuf
2326define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2327  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2328  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2329  ret <3 x float> %shuf
2330}
2331
2332; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(
2333; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2334; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
2335; CHECK-NEXT: ret <3 x float> %shuf
2336define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2337  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2338  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
2339  ret <3 x float> %shuf
2340}
2341
2342; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v3f32(
2343; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2344; CHECK-NEXT: ret float %data
2345define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2346  %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2347  %elt0 = extractelement <3 x float> %data, i32 0
2348  ret float %elt0
2349}
2350
2351; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v3f32(
2352; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2353; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
2354; CHECK-NEXT: ret float %elt1
2355define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2356  %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2357  %elt1 = extractelement <3 x float> %data, i32 1
2358  ret float %elt1
2359}
2360
2361; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v3f32(
2362; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2363; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
2364; CHECK-NEXT: ret float %elt1
2365define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2366  %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2367  %elt1 = extractelement <3 x float> %data, i32 2
2368  ret float %elt1
2369}
2370
2371; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v3f32(
2372; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2373; CHECK-NEXT: ret <2 x float>
2374define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2375  %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2376  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
2377  ret <2 x float> %shuf
2378}
2379
2380; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32(
2381; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2382; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2383; CHECK-NEXT: ret <2 x float> %shuf
2384define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2385  %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2386  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2387  ret <2 x float> %shuf
2388}
2389
2390; CHECK-LABEL: @extract0_bitcast_struct_tbuffer_load_v4f32(
2391; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2392; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
2393; CHECK-NEXT: ret i32 %tmp2
2394define i32 @extract0_bitcast_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2395  %tmp = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
2396  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
2397  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
2398  ret i32 %tmp2
2399}
2400
2401; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(
2402; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
2403; CHECK-NEXT: ret float %data
2404define amdgpu_ps float @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
2405  %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
2406  %elt0 = extractelement <2 x float> %data, i32 0
2407  ret float %elt0
2408}
2409
2410declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32) #1
2411declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32) #1
2412declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32) #1
2413declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32) #1
2414
2415declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32) #1
2416
2417; --------------------------------------------------------------------
2418; llvm.amdgcn.tbuffer.load
2419; --------------------------------------------------------------------
2420
2421; CHECK-LABEL: @tbuffer_load_f32(
2422; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2423; CHECK-NEXT: ret float %data
2424define amdgpu_ps float @tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2425  %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2426  ret float %data
2427}
2428
2429; CHECK-LABEL: @tbuffer_load_v2f32(
2430; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2431; CHECK-NEXT: ret <2 x float> %data
2432define amdgpu_ps <2 x float> @tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2433  %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2434  ret <2 x float> %data
2435}
2436
2437; CHECK-LABEL: @tbuffer_load_v4f32(
2438; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2439; CHECK-NEXT: ret <4 x float> %data
2440define amdgpu_ps <4 x float> @tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2441  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2442  ret <4 x float> %data
2443}
2444
2445; CHECK-LABEL: @extract_elt0_tbuffer_load_v2f32(
2446; CHECK: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2447; CHECK-NEXT: ret float %data
2448define amdgpu_ps float @extract_elt0_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2449  %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2450  %elt0 = extractelement <2 x float> %data, i32 0
2451  ret float %elt0
2452}
2453
2454; CHECK-LABEL: @extract_elt1_tbuffer_load_v2f32(
2455; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2456; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
2457; CHECK-NEXT: ret float %elt1
2458define amdgpu_ps float @extract_elt1_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2459  %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2460  %elt1 = extractelement <2 x float> %data, i32 1
2461  ret float %elt1
2462}
2463
2464; CHECK-LABEL: @extract_elt0_tbuffer_load_v4f32(
2465; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2466; CHECK-NEXT: ret float %data
2467define amdgpu_ps float @extract_elt0_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2468  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2469  %elt0 = extractelement <4 x float> %data, i32 0
2470  ret float %elt0
2471}
2472
2473; CHECK-LABEL: @extract_elt1_tbuffer_load_v4f32(
2474; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2475; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
2476; CHECK-NEXT: ret float %elt1
2477define amdgpu_ps float @extract_elt1_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2478  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2479  %elt1 = extractelement <4 x float> %data, i32 1
2480  ret float %elt1
2481}
2482
2483; CHECK-LABEL: @extract_elt2_tbuffer_load_v4f32(
2484; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2485; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
2486; CHECK-NEXT: ret float %elt1
2487define amdgpu_ps float @extract_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2488  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2489  %elt1 = extractelement <4 x float> %data, i32 2
2490  ret float %elt1
2491}
2492
2493; CHECK-LABEL: @extract_elt3_tbuffer_load_v4f32(
2494; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2495; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
2496; CHECK-NEXT: ret float %elt1
2497define amdgpu_ps float @extract_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2498  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2499  %elt1 = extractelement <4 x float> %data, i32 3
2500  ret float %elt1
2501}
2502
2503; CHECK-LABEL: @extract_elt0_elt1_tbuffer_load_v4f32(
2504; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2505; CHECK-NEXT: ret <2 x float>
2506define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2507  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2508  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2509  ret <2 x float> %shuf
2510}
2511
2512; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v4f32(
2513; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2514; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2515; CHECK-NEXT: ret <2 x float> %shuf
2516define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2517  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2518  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2519  ret <2 x float> %shuf
2520}
2521
2522; CHECK-LABEL: @extract_elt2_elt3_tbuffer_load_v4f32(
2523; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2524; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
2525; CHECK-NEXT: ret <2 x float> %shuf
2526define amdgpu_ps <2 x float> @extract_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2527  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2528  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
2529  ret <2 x float> %shuf
2530}
2531
2532; CHECK-LABEL: @extract_elt0_elt1_elt2_tbuffer_load_v4f32(
2533; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2534; CHECK-NEXT: ret <3 x float> %data
2535define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2536  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2537  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2538  ret <3 x float> %shuf
2539}
2540
2541; CHECK-LABEL: @extract_elt1_elt2_elt3_tbuffer_load_v4f32(
2542; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2543; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2544; CHECK-NEXT: ret <3 x float> %shuf
2545define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2546  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2547  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2548  ret <3 x float> %shuf
2549}
2550
2551; CHECK-LABEL: @extract_elt0_elt2_elt3_tbuffer_load_v4f32(
2552; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2553; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
2554; CHECK-NEXT: ret <3 x float> %shuf
2555define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2556  %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2557  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
2558  ret <3 x float> %shuf
2559}
2560
2561; CHECK-LABEL: @extract_elt0_tbuffer_load_v3f32(
2562; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2563; CHECK-NEXT: ret float %data
2564define amdgpu_ps float @extract_elt0_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2565  %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2566  %elt0 = extractelement <3 x float> %data, i32 0
2567  ret float %elt0
2568}
2569
2570; CHECK-LABEL: @extract_elt1_tbuffer_load_v3f32(
2571; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2572; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
2573; CHECK-NEXT: ret float %elt1
2574define amdgpu_ps float @extract_elt1_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2575  %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2576  %elt1 = extractelement <3 x float> %data, i32 1
2577  ret float %elt1
2578}
2579
2580; CHECK-LABEL: @extract_elt2_tbuffer_load_v3f32(
2581; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2582; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
2583; CHECK-NEXT: ret float %elt1
2584define amdgpu_ps float @extract_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2585  %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2586  %elt1 = extractelement <3 x float> %data, i32 2
2587  ret float %elt1
2588}
2589
2590; CHECK-LABEL: @extract_elt0_elt1_tbuffer_load_v3f32(
2591; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2592; CHECK-NEXT: ret <2 x float>
2593define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2594  %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2595  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
2596  ret <2 x float> %shuf
2597}
2598
2599; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v3f32(
2600; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2601; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2602; CHECK-NEXT: ret <2 x float> %shuf
2603define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2604  %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2605  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
2606  ret <2 x float> %shuf
2607}
2608
2609; CHECK-LABEL: @extract0_bitcast_tbuffer_load_v4f32(
2610; CHECK-NEXT: %tmp = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2611; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
2612; CHECK-NEXT: ret i32 %tmp2
2613define i32 @extract0_bitcast_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2614  %tmp = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
2615  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
2616  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
2617  ret i32 %tmp2
2618}
2619
2620; CHECK-LABEL: @preserve_metadata_extract_elt0_tbuffer_load_v2f32(
2621; CHECK-NEXT: %data = call float @llvm.amdgcn.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false), !fpmath !0
2622; CHECK-NEXT: ret float %data
2623define amdgpu_ps float @preserve_metadata_extract_elt0_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
2624  %data = call <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false), !fpmath !0
2625  %elt0 = extractelement <2 x float> %data, i32 0
2626  ret float %elt0
2627}
2628
2629declare float @llvm.amdgcn.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1
2630declare <1 x float> @llvm.amdgcn.tbuffer.load.v1f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1
2631declare <2 x float> @llvm.amdgcn.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1
2632declare <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1
2633declare <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1
2634
2635declare <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #1
2636
2637; --------------------------------------------------------------------
2638; llvm.amdgcn.image.sample
2639; --------------------------------------------------------------------
2640
2641; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32(
2642; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2643; CHECK-NEXT: ret float %data
2644define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2645  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2646  %elt0 = extractelement <4 x float> %data, i32 0
2647  ret float %elt0
2648}
2649
2650; Check that the intrinsic remains unchanged in the presence of TFE or LWE
2651; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe(
2652; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0)
2653; CHECK: ret float %elt0
2654define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2655  %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0)
2656  %data.vec = extractvalue {<4 x float>,i32} %data, 0
2657  %elt0 = extractelement <4 x float> %data.vec, i32 0
2658  ret float %elt0
2659}
2660
2661; Check that the intrinsic remains unchanged in the presence of TFE or LWE
2662; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe(
2663; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0)
2664; CHECK: ret float %elt0
2665define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2666  %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0)
2667  %data.vec = extractvalue {<4 x float>,i32} %data, 0
2668  %elt0 = extractelement <4 x float> %data.vec, i32 0
2669  ret float %elt0
2670}
2671
2672; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32(
2673; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2674; CHECK-NEXT: ret float %data
2675define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2676  %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2677  %elt0 = extractelement <4 x float> %data, i32 0
2678  ret float %elt0
2679}
2680
2681; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
2682; CHECK-NEXT: ret float undef
2683define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2684  %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2685  %elt0 = extractelement <4 x float> %data, i32 0
2686  ret float %elt0
2687}
2688
2689; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(
2690; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1darray.f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2691; CHECK-NEXT: ret float %data
2692define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2693  %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2694  %elt0 = extractelement <4 x float> %data, i32 0
2695  ret float %elt0
2696}
2697
2698; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(
2699; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2700; CHECK-NEXT: ret float %data
2701define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2702  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2703  %elt0 = extractelement <4 x float> %data, i32 0
2704  ret float %elt0
2705}
2706
2707; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(
2708; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2709; CHECK-NEXT: ret float %data
2710define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2711  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2712  %elt0 = extractelement <4 x float> %data, i32 0
2713  ret float %elt0
2714}
2715
2716; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(
2717; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2718; CHECK-NEXT: ret float %data
2719define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2720  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2721  %elt0 = extractelement <4 x float> %data, i32 0
2722  ret float %elt0
2723}
2724
2725; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(
2726; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2727; CHECK-NEXT: ret float %data
2728define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2729  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2730  %elt0 = extractelement <4 x float> %data, i32 0
2731  ret float %elt0
2732}
2733
2734; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(
2735; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2736; CHECK-NEXT: ret float %data
2737define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2738  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2739  %elt0 = extractelement <4 x float> %data, i32 0
2740  ret float %elt0
2741}
2742
2743; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(
2744; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2745; CHECK-NEXT: ret float %data
2746define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2747  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2748  %elt0 = extractelement <4 x float> %data, i32 0
2749  ret float %elt0
2750}
2751
2752; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(
2753; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2754; CHECK-NEXT: %1 = insertelement <2 x float> undef, float %data, i32 0
2755; CHECK-NEXT: ret <2 x float> %1
2756define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2757  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2758  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2759  ret <2 x float> %shuf
2760}
2761
2762; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(
2763; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2764; CHECK-NEXT: ret <2 x float> %data
2765define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2766  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2767  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2768  ret <2 x float> %shuf
2769}
2770
2771; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(
2772; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2773; CHECK-NEXT: ret <2 x float> %data
2774define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2775  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2776  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2777  ret <2 x float> %shuf
2778}
2779
2780; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(
2781; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2782; CHECK-NEXT: ret <2 x float> %data
2783define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2784  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2785  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2786  ret <2 x float> %shuf
2787}
2788
2789; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(
2790; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2791; CHECK-NEXT: %1 = insertelement <3 x float> undef, float %data, i32 0
2792; CHECK-NEXT: ret <3 x float> %1
2793define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2794  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2795  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2796  ret <3 x float> %shuf
2797}
2798
2799; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
2800; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2801; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
2802; CHECK-NEXT: ret <3 x float> %shuf
2803define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2804  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2805  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2806  ret <3 x float> %shuf
2807}
2808
2809; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
2810; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2811; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
2812; CHECK-NEXT: ret <3 x float> %shuf
2813define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2814  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2815  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2816  ret <3 x float> %shuf
2817}
2818
2819; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(
2820; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2821; CHECK-NEXT: ret <3 x float> %data
2822define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2823  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2824  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2825  ret <3 x float> %shuf
2826}
2827
2828; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(
2829; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2830; CHECK-NEXT: ret <3 x float> %data
2831define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2832  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2833  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2834  ret <3 x float> %shuf
2835}
2836
2837declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2838declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2839declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2840declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2841declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2842
2843; --------------------------------------------------------------------
2844; llvm.amdgcn.image.sample.cl
2845; --------------------------------------------------------------------
2846
2847; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32(
2848; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32(i32 2, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2849; CHECK-NEXT: ret float %data
2850define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2851  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2852  %elt0 = extractelement <4 x float> %data, i32 1
2853  ret float %elt0
2854}
2855
2856declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2857
2858; --------------------------------------------------------------------
2859; llvm.amdgcn.image.sample.d
2860; --------------------------------------------------------------------
2861
2862; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32(
2863; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 4, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2864; CHECK-NEXT: ret float %data
2865define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2866  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2867  %elt0 = extractelement <4 x float> %data, i32 2
2868  ret float %elt0
2869}
2870
2871declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2872
2873; --------------------------------------------------------------------
2874; llvm.amdgcn.image.sample.d.cl
2875; --------------------------------------------------------------------
2876
2877; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(
2878; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32(i32 8, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2879; CHECK-NEXT: ret float %data
2880define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2881  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2882  %elt0 = extractelement <4 x float> %data, i32 3
2883  ret float %elt0
2884}
2885
2886declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2887
2888; --------------------------------------------------------------------
2889; llvm.amdgcn.image.sample.l
2890; --------------------------------------------------------------------
2891
2892; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(
2893; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 4, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2894; CHECK-NEXT: ret float %data
2895define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2896  %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2897  %elt0 = extractelement <2 x float> %data, i32 1
2898  ret float %elt0
2899}
2900
2901declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2902
2903; --------------------------------------------------------------------
2904; llvm.amdgcn.image.sample.b
2905; --------------------------------------------------------------------
2906
2907; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(
2908; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32(i32 8, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2909; CHECK-NEXT: ret float %data
2910define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2911  %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2912  %elt0 = extractelement <4 x float> %data, i32 1
2913  ret float %elt0
2914}
2915
2916declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2917
2918; --------------------------------------------------------------------
2919; llvm.amdgcn.image.sample.b.cl
2920; --------------------------------------------------------------------
2921
2922; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(
2923; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32(i32 12, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2924; CHECK-NEXT: ret <2 x float> %data
2925define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2926  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2927  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2928  ret <2 x float> %shuf
2929}
2930
2931declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2932
2933; --------------------------------------------------------------------
2934; llvm.amdgcn.image.sample.lz
2935; --------------------------------------------------------------------
2936
2937; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(
2938; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32(i32 10, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2939; CHECK-NEXT: ret <2 x float> %data
2940define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2941  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2942  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 3>
2943  ret <2 x float> %shuf
2944}
2945
2946declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2947
2948; --------------------------------------------------------------------
2949; llvm.amdgcn.image.sample.cd
2950; --------------------------------------------------------------------
2951
2952; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(
2953; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32(i32 14, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2954; CHECK-NEXT: ret <3 x float> %data
2955define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2956  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2957  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2958  ret <3 x float> %shuf
2959}
2960
2961declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2962
2963; --------------------------------------------------------------------
2964; llvm.amdgcn.image.sample.cd.cl
2965; --------------------------------------------------------------------
2966
2967; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(
2968; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 8, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2969; CHECK-NEXT: ret half %data
2970define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2971  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2972  %elt0 = extractelement <4 x half> %data, i32 3
2973  ret half %elt0
2974}
2975
2976; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(
2977; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 4, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2978; CHECK-NEXT: ret half %data
2979define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2980  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2981  %elt0 = extractelement <4 x half> %data, i32 2
2982  ret half %elt0
2983}
2984
2985; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(
2986; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 2, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2987; CHECK-NEXT: ret half %data
2988define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2989  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2990  %elt0 = extractelement <4 x half> %data, i32 1
2991  ret half %elt0
2992}
2993
2994; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(
2995; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
2996; CHECK-NEXT: %res = shufflevector <3 x half> %data, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
2997; CHECK-NEXT: ret <4 x half> %res
2998define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
2999  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3000  %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
3001  ret <4 x half> %res
3002}
3003
3004; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(
3005; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3006; CHECK-NEXT: %res = shufflevector <2 x half> %data, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
3007; CHECK-NEXT: ret <4 x half> %res
3008define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3009  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3010  %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3011  ret <4 x half> %res
3012}
3013
3014; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(
3015; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3016; CHECK-NEXT: %res = insertelement <4 x half> undef, half %data, i64 0
3017; CHECK-NEXT: ret <4 x half> %res
3018define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3019  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3020  %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
3021  ret <4 x half> %res
3022}
3023
3024; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(
3025; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3026; CHECK-NEXT: ret half %data
3027define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3028  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3029  %elt0 = extractelement <4 x half> %data, i32 0
3030  ret half %elt0
3031}
3032
3033declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3034
3035; --------------------------------------------------------------------
3036; llvm.amdgcn.image.sample.c
3037; --------------------------------------------------------------------
3038
3039; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32(
3040; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3041; CHECK-NEXT: ret float %data
3042define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3043  %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3044  %elt0 = extractelement <4 x float> %data, i32 0
3045  ret float %elt0
3046}
3047
3048declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3049
3050; --------------------------------------------------------------------
3051; llvm.amdgcn.image.sample.c.cl
3052; --------------------------------------------------------------------
3053
3054; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32(
3055; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32(i32 1, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3056; CHECK-NEXT: ret float %data
3057define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3058  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3059  %elt0 = extractelement <4 x float> %data, i32 0
3060  ret float %elt0
3061}
3062
3063declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3064
3065; --------------------------------------------------------------------
3066; llvm.amdgcn.image.sample.c.d
3067; --------------------------------------------------------------------
3068
3069; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(
3070; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3071; CHECK-NEXT: ret float %data
3072define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3073  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3074  %elt0 = extractelement <4 x float> %data, i32 0
3075  ret float %elt0
3076}
3077
3078declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3079
3080; --------------------------------------------------------------------
3081; llvm.amdgcn.image.sample.c.d.cl
3082; --------------------------------------------------------------------
3083
3084; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(
3085; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3086; CHECK-NEXT: ret float %data
3087define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3088  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3089  %elt0 = extractelement <4 x float> %data, i32 0
3090  ret float %elt0
3091}
3092
3093declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3094
3095; --------------------------------------------------------------------
3096; llvm.amdgcn.image.sample.c.l
3097; --------------------------------------------------------------------
3098
3099; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32(
3100; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32(i32 1, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3101; CHECK-NEXT: ret float %data
3102define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3103  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3104  %elt0 = extractelement <4 x float> %data, i32 0
3105  ret float %elt0
3106}
3107
3108declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3109
3110; --------------------------------------------------------------------
3111; llvm.amdgcn.image.sample.c.b
3112; --------------------------------------------------------------------
3113
3114; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(
3115; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3116; CHECK-NEXT: ret float %data
3117define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3118  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3119  %elt0 = extractelement <4 x float> %data, i32 0
3120  ret float %elt0
3121}
3122
3123declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3124
3125; --------------------------------------------------------------------
3126; llvm.amdgcn.image.sample.c.b.cl
3127; --------------------------------------------------------------------
3128
3129; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(
3130; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3131; CHECK-NEXT: ret float %data
3132define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3133  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3134  %elt0 = extractelement <4 x float> %data, i32 0
3135  ret float %elt0
3136}
3137
3138declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3139
3140; --------------------------------------------------------------------
3141; llvm.amdgcn.image.sample.c.lz
3142; --------------------------------------------------------------------
3143
3144; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32(
3145; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3146; CHECK-NEXT: ret float %data
3147define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3148  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3149  %elt0 = extractelement <4 x float> %data, i32 0
3150  ret float %elt0
3151}
3152
3153declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3154
3155; --------------------------------------------------------------------
3156; llvm.amdgcn.image.sample.c.cd
3157; --------------------------------------------------------------------
3158
3159; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(
3160; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3161; CHECK-NEXT: ret float %data
3162define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3163  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3164  %elt0 = extractelement <4 x float> %data, i32 0
3165  ret float %elt0
3166}
3167
3168declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3169
3170; --------------------------------------------------------------------
3171; llvm.amdgcn.image.sample.c.cd.cl
3172; --------------------------------------------------------------------
3173
3174; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(
3175; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3176; CHECK-NEXT: ret float %data
3177define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3178  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3179  %elt0 = extractelement <4 x float> %data, i32 0
3180  ret float %elt0
3181}
3182
3183declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3184
3185; --------------------------------------------------------------------
3186; llvm.amdgcn.image.sample.o
3187; --------------------------------------------------------------------
3188
3189; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32(
3190; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3191; CHECK-NEXT: ret float %data
3192define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3193  %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3194  %elt0 = extractelement <4 x float> %data, i32 0
3195  ret float %elt0
3196}
3197
3198declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3199
3200; --------------------------------------------------------------------
3201; llvm.amdgcn.image.sample.cl.o
3202; --------------------------------------------------------------------
3203
3204; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32(
3205; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3206; CHECK-NEXT: ret float %data
3207define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3208  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3209  %elt0 = extractelement <4 x float> %data, i32 0
3210  ret float %elt0
3211}
3212
3213declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3214
3215; --------------------------------------------------------------------
3216; llvm.amdgcn.image.sample.d.o
3217; --------------------------------------------------------------------
3218
3219; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(
3220; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3221; CHECK-NEXT: ret float %data
3222define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3223  %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3224  %elt0 = extractelement <4 x float> %data, i32 0
3225  ret float %elt0
3226}
3227
3228declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3229
3230; --------------------------------------------------------------------
3231; llvm.amdgcn.image.sample.d.cl.o
3232; --------------------------------------------------------------------
3233
3234; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(
3235; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3236; CHECK-NEXT: ret float %data
3237define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3238  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3239  %elt0 = extractelement <4 x float> %data, i32 0
3240  ret float %elt0
3241}
3242
3243declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3244
3245; --------------------------------------------------------------------
3246; llvm.amdgcn.image.sample.l.o
3247; --------------------------------------------------------------------
3248
3249; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32(
3250; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3251; CHECK-NEXT: ret float %data
3252define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3253  %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3254  %elt0 = extractelement <4 x float> %data, i32 0
3255  ret float %elt0
3256}
3257
3258declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3259
3260; --------------------------------------------------------------------
3261; llvm.amdgcn.image.sample.b.o
3262; --------------------------------------------------------------------
3263
3264; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(
3265; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3266; CHECK-NEXT: ret float %data
3267define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3268  %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3269  %elt0 = extractelement <4 x float> %data, i32 0
3270  ret float %elt0
3271}
3272
3273declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3274
3275; --------------------------------------------------------------------
3276; llvm.amdgcn.image.sample.b.cl.o
3277; --------------------------------------------------------------------
3278
3279; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(
3280; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3281; CHECK-NEXT: ret float %data
3282define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3283  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3284  %elt0 = extractelement <4 x float> %data, i32 0
3285  ret float %elt0
3286}
3287
3288declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3289
3290; --------------------------------------------------------------------
3291; llvm.amdgcn.image.sample.lz.o
3292; --------------------------------------------------------------------
3293
3294; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32(
3295; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3296; CHECK-NEXT: ret float %data
3297define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3298  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3299  %elt0 = extractelement <4 x float> %data, i32 0
3300  ret float %elt0
3301}
3302
3303declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3304
3305; --------------------------------------------------------------------
3306; llvm.amdgcn.image.sample.cd.o
3307; --------------------------------------------------------------------
3308
3309; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(
3310; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3311; CHECK-NEXT: ret float %data
3312define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3313  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3314  %elt0 = extractelement <4 x float> %data, i32 0
3315  ret float %elt0
3316}
3317
3318declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3319
3320; --------------------------------------------------------------------
3321; llvm.amdgcn.image.sample.cd.cl.o
3322; --------------------------------------------------------------------
3323
3324; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(
3325; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3326; CHECK-NEXT: ret float %data
3327define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3328  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3329  %elt0 = extractelement <4 x float> %data, i32 0
3330  ret float %elt0
3331}
3332
3333declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3334
3335; --------------------------------------------------------------------
3336; llvm.amdgcn.image.sample.c.o
3337; --------------------------------------------------------------------
3338
3339; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32(
3340; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3341; CHECK-NEXT: ret float %data
3342define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3343  %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3344  %elt0 = extractelement <4 x float> %data, i32 0
3345  ret float %elt0
3346}
3347
3348declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3349
3350; --------------------------------------------------------------------
3351; llvm.amdgcn.image.sample.c.cl.o
3352; --------------------------------------------------------------------
3353
3354; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(
3355; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3356; CHECK-NEXT: ret float %data
3357define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3358  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3359  %elt0 = extractelement <4 x float> %data, i32 0
3360  ret float %elt0
3361}
3362
3363declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3364
3365; --------------------------------------------------------------------
3366; llvm.amdgcn.image.sample.c.d.o
3367; --------------------------------------------------------------------
3368
3369; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(
3370; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3371; CHECK-NEXT: ret float %data
3372define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3373  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3374  %elt0 = extractelement <4 x float> %data, i32 0
3375  ret float %elt0
3376}
3377
3378declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3379
3380; --------------------------------------------------------------------
3381; llvm.amdgcn.image.sample.c.d.cl.o
3382; --------------------------------------------------------------------
3383
3384; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(
3385; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3386; CHECK-NEXT: ret float %data
3387define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3388  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3389  %elt0 = extractelement <4 x float> %data, i32 0
3390  ret float %elt0
3391}
3392
3393declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3394
3395; --------------------------------------------------------------------
3396; llvm.amdgcn.image.sample.c.l.o
3397; --------------------------------------------------------------------
3398
3399; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(
3400; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3401; CHECK-NEXT: ret float %data
3402define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3403  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3404  %elt0 = extractelement <4 x float> %data, i32 0
3405  ret float %elt0
3406}
3407
3408declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3409
3410; --------------------------------------------------------------------
3411; llvm.amdgcn.image.sample.c.b.o
3412; --------------------------------------------------------------------
3413
3414; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(
3415; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3416; CHECK-NEXT: ret float %data
3417define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3418  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3419  %elt0 = extractelement <4 x float> %data, i32 0
3420  ret float %elt0
3421}
3422
3423declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3424
3425; --------------------------------------------------------------------
3426; llvm.amdgcn.image.sample.c.b.cl.o
3427; --------------------------------------------------------------------
3428
3429; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(
3430; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3431; CHECK-NEXT: ret float %data
3432define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3433  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3434  %elt0 = extractelement <4 x float> %data, i32 0
3435  ret float %elt0
3436}
3437
3438declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3439
3440; --------------------------------------------------------------------
3441; llvm.amdgcn.image.sample.c.lz.o
3442; --------------------------------------------------------------------
3443
3444; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(
3445; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3446; CHECK-NEXT: ret float %data
3447define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3448  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3449  %elt0 = extractelement <4 x float> %data, i32 0
3450  ret float %elt0
3451}
3452
3453declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3454
3455; --------------------------------------------------------------------
3456; llvm.amdgcn.image.sample.c.cd.o
3457; --------------------------------------------------------------------
3458
3459; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(
3460; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3461; CHECK-NEXT: ret float %data
3462define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3463  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3464  %elt0 = extractelement <4 x float> %data, i32 0
3465  ret float %elt0
3466}
3467
3468declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3469
3470; --------------------------------------------------------------------
3471; llvm.amdgcn.image.sample.c.cd.cl.o
3472; --------------------------------------------------------------------
3473
3474; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(
3475; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3476; CHECK-NEXT: ret float %data
3477define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3478  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3479  %elt0 = extractelement <4 x float> %data, i32 0
3480  ret float %elt0
3481}
3482
3483declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3484
3485; --------------------------------------------------------------------
3486; llvm.amdgcn.image.gather4
3487; --------------------------------------------------------------------
3488
3489; Don't handle gather4*
3490
3491; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32(
3492; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3493define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3494  %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3495  %elt0 = extractelement <4 x float> %data, i32 0
3496  ret float %elt0
3497}
3498
3499declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3500
3501; --------------------------------------------------------------------
3502; llvm.amdgcn.image.gather4.cl
3503; --------------------------------------------------------------------
3504
3505; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32(
3506; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3507define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3508  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3509  %elt0 = extractelement <4 x float> %data, i32 0
3510  ret float %elt0
3511}
3512
3513declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3514
3515; --------------------------------------------------------------------
3516; llvm.amdgcn.image.gather4.l
3517; --------------------------------------------------------------------
3518
3519; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32(
3520; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3521define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3522  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3523  %elt0 = extractelement <4 x float> %data, i32 0
3524  ret float %elt0
3525}
3526
3527declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3528
3529; --------------------------------------------------------------------
3530; llvm.amdgcn.image.gather4.b
3531; --------------------------------------------------------------------
3532
3533; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(
3534; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3535define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3536  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3537  %elt0 = extractelement <4 x float> %data, i32 0
3538  ret float %elt0
3539}
3540
3541declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3542
3543; --------------------------------------------------------------------
3544; llvm.amdgcn.image.gather4.b.cl
3545; --------------------------------------------------------------------
3546
3547; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(
3548; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3549define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3550  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3551  %elt0 = extractelement <4 x float> %data, i32 0
3552  ret float %elt0
3553}
3554
3555declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3556
3557; --------------------------------------------------------------------
3558; llvm.amdgcn.image.gather4.lz
3559; --------------------------------------------------------------------
3560
3561; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16(
3562; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3563define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3564  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3565  %elt0 = extractelement <4 x float> %data, i32 0
3566  ret float %elt0
3567}
3568
3569declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3570
3571; --------------------------------------------------------------------
3572; llvm.amdgcn.image.gather4.o
3573; --------------------------------------------------------------------
3574
3575; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32(
3576; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3577define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3578  %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3579  %elt0 = extractelement <4 x float> %data, i32 0
3580  ret float %elt0
3581}
3582
3583declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3584
3585; --------------------------------------------------------------------
3586; llvm.amdgcn.image.gather4.cl.o
3587; --------------------------------------------------------------------
3588
3589; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(
3590; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3591define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3592  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3593  %elt0 = extractelement <4 x float> %data, i32 0
3594  ret float %elt0
3595}
3596
3597declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3598
3599; --------------------------------------------------------------------
3600; llvm.amdgcn.image.gather4.l.o
3601; --------------------------------------------------------------------
3602
3603; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32(
3604; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3605define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3606  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3607  %elt0 = extractelement <4 x float> %data, i32 0
3608  ret float %elt0
3609}
3610
3611declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3612
3613; --------------------------------------------------------------------
3614; llvm.amdgcn.image.gather4.b.o
3615; --------------------------------------------------------------------
3616
3617; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(
3618; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3619define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3620  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3621  %elt0 = extractelement <4 x float> %data, i32 0
3622  ret float %elt0
3623}
3624
3625declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3626
3627; --------------------------------------------------------------------
3628; llvm.amdgcn.image.gather4.b.cl.o
3629; --------------------------------------------------------------------
3630
3631; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(
3632; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3633define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3634  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3635  %elt0 = extractelement <4 x float> %data, i32 0
3636  ret float %elt0
3637}
3638
3639declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3640
3641; --------------------------------------------------------------------
3642; llvm.amdgcn.image.gather4.lz.o
3643; --------------------------------------------------------------------
3644
3645; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(
3646; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3647define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3648  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3649  %elt0 = extractelement <4 x float> %data, i32 0
3650  ret float %elt0
3651}
3652
3653declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3654
3655; --------------------------------------------------------------------
3656; llvm.amdgcn.image.gather4.c.o
3657; --------------------------------------------------------------------
3658
3659; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32(
3660; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3661define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3662  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3663  %elt0 = extractelement <4 x float> %data, i32 0
3664  ret float %elt0
3665}
3666
3667declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3668
3669; --------------------------------------------------------------------
3670; llvm.amdgcn.image.gather4.c.cl.o
3671; --------------------------------------------------------------------
3672
3673; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(
3674; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3675define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3676  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3677  %elt0 = extractelement <4 x float> %data, i32 0
3678  ret float %elt0
3679}
3680
3681declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3682
3683; --------------------------------------------------------------------
3684; llvm.amdgcn.image.gather4.c.l.o
3685; --------------------------------------------------------------------
3686
3687; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(
3688; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3689define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3690  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3691  %elt0 = extractelement <4 x float> %data, i32 0
3692  ret float %elt0
3693}
3694
3695declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3696
3697; --------------------------------------------------------------------
3698; llvm.amdgcn.image.gather4.c.b.o
3699; --------------------------------------------------------------------
3700
3701; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(
3702; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3703define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3704  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3705  %elt0 = extractelement <4 x float> %data, i32 0
3706  ret float %elt0
3707}
3708
3709declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3710
3711; --------------------------------------------------------------------
3712; llvm.amdgcn.image.gather4.c.b.cl.o
3713; --------------------------------------------------------------------
3714
3715; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(
3716; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3717define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3718  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3719  %elt0 = extractelement <4 x float> %data, i32 0
3720  ret float %elt0
3721}
3722
3723declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3724
3725; --------------------------------------------------------------------
3726; llvm.amdgcn.image.gather4.c.lz.o
3727; --------------------------------------------------------------------
3728
3729; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(
3730; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3731define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
3732  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3733  %elt0 = extractelement <4 x float> %data, i32 0
3734  ret float %elt0
3735}
3736
3737declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3738
3739; --------------------------------------------------------------------
3740; llvm.amdgcn.image.getlod
3741; --------------------------------------------------------------------
3742
3743; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32(
3744; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3745; CHECK-NEXT: ret float %data
3746define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
3747  %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
3748  %elt0 = extractelement <4 x float> %data, i32 0
3749  ret float %elt0
3750}
3751
3752declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
3753
3754; --------------------------------------------------------------------
3755; llvm.amdgcn.image.load
3756; --------------------------------------------------------------------
3757
3758; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32(
3759; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
3760; CHECK-NEXT: ret float %data
3761define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 {
3762  %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
3763  %elt0 = extractelement <4 x float> %data, i32 0
3764  ret float %elt0
3765}
3766
3767declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
3768
3769; --------------------------------------------------------------------
3770; llvm.amdgcn.image.load.mip
3771; --------------------------------------------------------------------
3772
3773; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32(
3774; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 1, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
3775; CHECK-NEXT: ret float %data
3776define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 {
3777  %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
3778  %elt0 = extractelement <4 x float> %data, i32 0
3779  ret float %elt0
3780}
3781
3782declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
3783
3784; --------------------------------------------------------------------
3785; llvm.amdgcn.image.getresinfo
3786; --------------------------------------------------------------------
3787
3788; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32(
3789; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 1, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
3790; CHECK-NEXT: ret float %data
3791define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 {
3792  %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
3793  %elt0 = extractelement <4 x float> %data, i32 0
3794  ret float %elt0
3795}
3796
3797declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
3798
3799; --------------------------------------------------------------------
3800; TFE / LWE
3801; --------------------------------------------------------------------
3802
3803; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32(
3804; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
3805define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 {
3806  %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
3807  %rgba = extractvalue { <4 x float>, i32 } %data, 0
3808  %elt0 = extractelement <4 x float> %rgba, i32 0
3809  ret float %elt0
3810}
3811
3812declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32, i32, <8 x i32>, i32, i32) #1
3813
3814; CHECK: @tfe_check_assert(
3815; CHECK: %data = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1)
3816; CHECK-NEXT: ret float %data
3817define amdgpu_hs float @tfe_check_assert() #0 {
3818  %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2
3819  %elt0 = extractelement <4 x float> %data, i32 0
3820  ret float %elt0
3821}
3822
3823declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
3824
3825attributes #0 = { nounwind }
3826attributes #1 = { nounwind readonly }
3827
3828!0 = !{float 2.500000e+00}
3829