1;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s 2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s 3 4;CHECK-LABEL: {{^}}gather4_v2: 5;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 6define amdgpu_ps void @gather4_v2() { 7main_body: 8 %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 9 %r0 = extractelement <4 x float> %r, i32 0 10 %r1 = extractelement <4 x float> %r, i32 1 11 %r2 = extractelement <4 x float> %r, i32 2 12 %r3 = extractelement <4 x float> %r, i32 3 13 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 14 ret void 15} 16 17;CHECK-LABEL: {{^}}gather4: 18;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 19define amdgpu_ps void @gather4() { 20main_body: 21 %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 22 %r0 = extractelement <4 x float> %r, i32 0 23 %r1 = extractelement <4 x float> %r, i32 1 24 %r2 = extractelement <4 x float> %r, i32 2 25 %r3 = extractelement <4 x float> %r, i32 3 26 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 27 ret void 28} 29 30;CHECK-LABEL: {{^}}gather4_cl: 31;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 32define amdgpu_ps void @gather4_cl() { 33main_body: 34 %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 35 %r0 = extractelement <4 x float> %r, i32 0 36 %r1 = extractelement <4 x float> %r, i32 1 37 %r2 = extractelement <4 x float> %r, i32 2 38 %r3 = extractelement <4 x float> %r, i32 3 39 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 40 ret void 41} 42 43;CHECK-LABEL: {{^}}gather4_l: 44;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 45define amdgpu_ps void @gather4_l() { 46main_body: 47 %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 48 %r0 = extractelement <4 x float> %r, i32 0 49 %r1 = extractelement <4 x float> %r, i32 1 50 %r2 = extractelement <4 x float> %r, i32 2 51 %r3 = extractelement <4 x float> %r, i32 3 52 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 53 ret void 54} 55 56;CHECK-LABEL: {{^}}gather4_b: 57;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 58define amdgpu_ps void @gather4_b() { 59main_body: 60 %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 61 %r0 = extractelement <4 x float> %r, i32 0 62 %r1 = extractelement <4 x float> %r, i32 1 63 %r2 = extractelement <4 x float> %r, i32 2 64 %r3 = extractelement <4 x float> %r, i32 3 65 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 66 ret void 67} 68 69;CHECK-LABEL: {{^}}gather4_b_cl: 70;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 71define amdgpu_ps void @gather4_b_cl() { 72main_body: 73 %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 74 %r0 = extractelement <4 x float> %r, i32 0 75 %r1 = extractelement <4 x float> %r, i32 1 76 %r2 = extractelement <4 x float> %r, i32 2 77 %r3 = extractelement <4 x float> %r, i32 3 78 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 79 ret void 80} 81 82;CHECK-LABEL: {{^}}gather4_b_cl_v8: 83;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 84define amdgpu_ps void @gather4_b_cl_v8() { 85main_body: 86 %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 87 %r0 = extractelement <4 x float> %r, i32 0 88 %r1 = extractelement <4 x float> %r, i32 1 89 %r2 = extractelement <4 x float> %r, i32 2 90 %r3 = extractelement <4 x float> %r, i32 3 91 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 92 ret void 93} 94 95;CHECK-LABEL: {{^}}gather4_lz_v2: 96;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 97define amdgpu_ps void @gather4_lz_v2() { 98main_body: 99 %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 100 %r0 = extractelement <4 x float> %r, i32 0 101 %r1 = extractelement <4 x float> %r, i32 1 102 %r2 = extractelement <4 x float> %r, i32 2 103 %r3 = extractelement <4 x float> %r, i32 3 104 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 105 ret void 106} 107 108;CHECK-LABEL: {{^}}gather4_lz: 109;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 110define amdgpu_ps void @gather4_lz() { 111main_body: 112 %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 113 %r0 = extractelement <4 x float> %r, i32 0 114 %r1 = extractelement <4 x float> %r, i32 1 115 %r2 = extractelement <4 x float> %r, i32 2 116 %r3 = extractelement <4 x float> %r, i32 3 117 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 118 ret void 119} 120 121 122 123;CHECK-LABEL: {{^}}gather4_o: 124;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 125define amdgpu_ps void @gather4_o() { 126main_body: 127 %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 128 %r0 = extractelement <4 x float> %r, i32 0 129 %r1 = extractelement <4 x float> %r, i32 1 130 %r2 = extractelement <4 x float> %r, i32 2 131 %r3 = extractelement <4 x float> %r, i32 3 132 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 133 ret void 134} 135 136;CHECK-LABEL: {{^}}gather4_cl_o: 137;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 138define amdgpu_ps void @gather4_cl_o() { 139main_body: 140 %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 141 %r0 = extractelement <4 x float> %r, i32 0 142 %r1 = extractelement <4 x float> %r, i32 1 143 %r2 = extractelement <4 x float> %r, i32 2 144 %r3 = extractelement <4 x float> %r, i32 3 145 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 146 ret void 147} 148 149;CHECK-LABEL: {{^}}gather4_cl_o_v8: 150;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 151define amdgpu_ps void @gather4_cl_o_v8() { 152main_body: 153 %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 154 %r0 = extractelement <4 x float> %r, i32 0 155 %r1 = extractelement <4 x float> %r, i32 1 156 %r2 = extractelement <4 x float> %r, i32 2 157 %r3 = extractelement <4 x float> %r, i32 3 158 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 159 ret void 160} 161 162;CHECK-LABEL: {{^}}gather4_l_o: 163;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 164define amdgpu_ps void @gather4_l_o() { 165main_body: 166 %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 167 %r0 = extractelement <4 x float> %r, i32 0 168 %r1 = extractelement <4 x float> %r, i32 1 169 %r2 = extractelement <4 x float> %r, i32 2 170 %r3 = extractelement <4 x float> %r, i32 3 171 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 172 ret void 173} 174 175;CHECK-LABEL: {{^}}gather4_l_o_v8: 176;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 177define amdgpu_ps void @gather4_l_o_v8() { 178main_body: 179 %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 180 %r0 = extractelement <4 x float> %r, i32 0 181 %r1 = extractelement <4 x float> %r, i32 1 182 %r2 = extractelement <4 x float> %r, i32 2 183 %r3 = extractelement <4 x float> %r, i32 3 184 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 185 ret void 186} 187 188;CHECK-LABEL: {{^}}gather4_b_o: 189;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 190define amdgpu_ps void @gather4_b_o() { 191main_body: 192 %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 193 %r0 = extractelement <4 x float> %r, i32 0 194 %r1 = extractelement <4 x float> %r, i32 1 195 %r2 = extractelement <4 x float> %r, i32 2 196 %r3 = extractelement <4 x float> %r, i32 3 197 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 198 ret void 199} 200 201;CHECK-LABEL: {{^}}gather4_b_o_v8: 202;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 203define amdgpu_ps void @gather4_b_o_v8() { 204main_body: 205 %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 206 %r0 = extractelement <4 x float> %r, i32 0 207 %r1 = extractelement <4 x float> %r, i32 1 208 %r2 = extractelement <4 x float> %r, i32 2 209 %r3 = extractelement <4 x float> %r, i32 3 210 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 211 ret void 212} 213 214;CHECK-LABEL: {{^}}gather4_b_cl_o: 215;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 216define amdgpu_ps void @gather4_b_cl_o() { 217main_body: 218 %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 219 %r0 = extractelement <4 x float> %r, i32 0 220 %r1 = extractelement <4 x float> %r, i32 1 221 %r2 = extractelement <4 x float> %r, i32 2 222 %r3 = extractelement <4 x float> %r, i32 3 223 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 224 ret void 225} 226 227;CHECK-LABEL: {{^}}gather4_lz_o: 228;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 229define amdgpu_ps void @gather4_lz_o() { 230main_body: 231 %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 232 %r0 = extractelement <4 x float> %r, i32 0 233 %r1 = extractelement <4 x float> %r, i32 1 234 %r2 = extractelement <4 x float> %r, i32 2 235 %r3 = extractelement <4 x float> %r, i32 3 236 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 237 ret void 238} 239 240 241 242;CHECK-LABEL: {{^}}gather4_c: 243;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 244define amdgpu_ps void @gather4_c() { 245main_body: 246 %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 247 %r0 = extractelement <4 x float> %r, i32 0 248 %r1 = extractelement <4 x float> %r, i32 1 249 %r2 = extractelement <4 x float> %r, i32 2 250 %r3 = extractelement <4 x float> %r, i32 3 251 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 252 ret void 253} 254 255;CHECK-LABEL: {{^}}gather4_c_cl: 256;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 257define amdgpu_ps void @gather4_c_cl() { 258main_body: 259 %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 260 %r0 = extractelement <4 x float> %r, i32 0 261 %r1 = extractelement <4 x float> %r, i32 1 262 %r2 = extractelement <4 x float> %r, i32 2 263 %r3 = extractelement <4 x float> %r, i32 3 264 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 265 ret void 266} 267 268;CHECK-LABEL: {{^}}gather4_c_cl_v8: 269;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 270define amdgpu_ps void @gather4_c_cl_v8() { 271main_body: 272 %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 273 %r0 = extractelement <4 x float> %r, i32 0 274 %r1 = extractelement <4 x float> %r, i32 1 275 %r2 = extractelement <4 x float> %r, i32 2 276 %r3 = extractelement <4 x float> %r, i32 3 277 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 278 ret void 279} 280 281;CHECK-LABEL: {{^}}gather4_c_l: 282;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 283define amdgpu_ps void @gather4_c_l() { 284main_body: 285 %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 286 %r0 = extractelement <4 x float> %r, i32 0 287 %r1 = extractelement <4 x float> %r, i32 1 288 %r2 = extractelement <4 x float> %r, i32 2 289 %r3 = extractelement <4 x float> %r, i32 3 290 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 291 ret void 292} 293 294;CHECK-LABEL: {{^}}gather4_c_l_v8: 295;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 296define amdgpu_ps void @gather4_c_l_v8() { 297main_body: 298 %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 299 %r0 = extractelement <4 x float> %r, i32 0 300 %r1 = extractelement <4 x float> %r, i32 1 301 %r2 = extractelement <4 x float> %r, i32 2 302 %r3 = extractelement <4 x float> %r, i32 3 303 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 304 ret void 305} 306 307;CHECK-LABEL: {{^}}gather4_c_b: 308;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 309define amdgpu_ps void @gather4_c_b() { 310main_body: 311 %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 312 %r0 = extractelement <4 x float> %r, i32 0 313 %r1 = extractelement <4 x float> %r, i32 1 314 %r2 = extractelement <4 x float> %r, i32 2 315 %r3 = extractelement <4 x float> %r, i32 3 316 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 317 ret void 318} 319 320;CHECK-LABEL: {{^}}gather4_c_b_v8: 321;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 322define amdgpu_ps void @gather4_c_b_v8() { 323main_body: 324 %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 325 %r0 = extractelement <4 x float> %r, i32 0 326 %r1 = extractelement <4 x float> %r, i32 1 327 %r2 = extractelement <4 x float> %r, i32 2 328 %r3 = extractelement <4 x float> %r, i32 3 329 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 330 ret void 331} 332 333;CHECK-LABEL: {{^}}gather4_c_b_cl: 334;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 335define amdgpu_ps void @gather4_c_b_cl() { 336main_body: 337 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 338 %r0 = extractelement <4 x float> %r, i32 0 339 %r1 = extractelement <4 x float> %r, i32 1 340 %r2 = extractelement <4 x float> %r, i32 2 341 %r3 = extractelement <4 x float> %r, i32 3 342 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 343 ret void 344} 345 346;CHECK-LABEL: {{^}}gather4_c_lz: 347;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 348define amdgpu_ps void @gather4_c_lz() { 349main_body: 350 %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 351 %r0 = extractelement <4 x float> %r, i32 0 352 %r1 = extractelement <4 x float> %r, i32 1 353 %r2 = extractelement <4 x float> %r, i32 2 354 %r3 = extractelement <4 x float> %r, i32 3 355 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 356 ret void 357} 358 359 360 361;CHECK-LABEL: {{^}}gather4_c_o: 362;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 363define amdgpu_ps void @gather4_c_o() { 364main_body: 365 %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 366 %r0 = extractelement <4 x float> %r, i32 0 367 %r1 = extractelement <4 x float> %r, i32 1 368 %r2 = extractelement <4 x float> %r, i32 2 369 %r3 = extractelement <4 x float> %r, i32 3 370 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 371 ret void 372} 373 374;CHECK-LABEL: {{^}}gather4_c_o_v8: 375;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 376define amdgpu_ps void @gather4_c_o_v8() { 377main_body: 378 %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 379 %r0 = extractelement <4 x float> %r, i32 0 380 %r1 = extractelement <4 x float> %r, i32 1 381 %r2 = extractelement <4 x float> %r, i32 2 382 %r3 = extractelement <4 x float> %r, i32 3 383 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 384 ret void 385} 386 387;CHECK-LABEL: {{^}}gather4_c_cl_o: 388;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 389define amdgpu_ps void @gather4_c_cl_o() { 390main_body: 391 %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 392 %r0 = extractelement <4 x float> %r, i32 0 393 %r1 = extractelement <4 x float> %r, i32 1 394 %r2 = extractelement <4 x float> %r, i32 2 395 %r3 = extractelement <4 x float> %r, i32 3 396 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 397 ret void 398} 399 400;CHECK-LABEL: {{^}}gather4_c_l_o: 401;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 402define amdgpu_ps void @gather4_c_l_o() { 403main_body: 404 %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 405 %r0 = extractelement <4 x float> %r, i32 0 406 %r1 = extractelement <4 x float> %r, i32 1 407 %r2 = extractelement <4 x float> %r, i32 2 408 %r3 = extractelement <4 x float> %r, i32 3 409 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 410 ret void 411} 412 413;CHECK-LABEL: {{^}}gather4_c_b_o: 414;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 415define amdgpu_ps void @gather4_c_b_o() { 416main_body: 417 %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 418 %r0 = extractelement <4 x float> %r, i32 0 419 %r1 = extractelement <4 x float> %r, i32 1 420 %r2 = extractelement <4 x float> %r, i32 2 421 %r3 = extractelement <4 x float> %r, i32 3 422 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 423 ret void 424} 425 426;CHECK-LABEL: {{^}}gather4_c_b_cl_o: 427;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 428define amdgpu_ps void @gather4_c_b_cl_o() { 429main_body: 430 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 431 %r0 = extractelement <4 x float> %r, i32 0 432 %r1 = extractelement <4 x float> %r, i32 1 433 %r2 = extractelement <4 x float> %r, i32 2 434 %r3 = extractelement <4 x float> %r, i32 3 435 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 436 ret void 437} 438 439;CHECK-LABEL: {{^}}gather4_c_lz_o: 440;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 441define amdgpu_ps void @gather4_c_lz_o() { 442main_body: 443 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 444 %r0 = extractelement <4 x float> %r, i32 0 445 %r1 = extractelement <4 x float> %r, i32 1 446 %r2 = extractelement <4 x float> %r, i32 2 447 %r3 = extractelement <4 x float> %r, i32 3 448 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 449 ret void 450} 451 452;CHECK-LABEL: {{^}}gather4_c_lz_o_v8: 453;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da 454define amdgpu_ps void @gather4_c_lz_o_v8() { 455main_body: 456 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) 457 %r0 = extractelement <4 x float> %r, i32 0 458 %r1 = extractelement <4 x float> %r, i32 1 459 %r2 = extractelement <4 x float> %r, i32 2 460 %r3 = extractelement <4 x float> %r, i32 3 461 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) 462 ret void 463} 464 465;CHECK-LABEL: {{^}}gather4_sgpr_bug: 466; 467; This crashed at some point due to a bug in FixSGPRCopies. Derived from the 468; report in https://bugs.freedesktop.org/show_bug.cgi?id=96877 469; 470;CHECK: s_load_dwordx4 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0 471;CHECK: s_waitcnt lgkmcnt(0) 472;CHECK: s_mov_b32 s[[LO]], 0 473;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]] dmask:0x8 474define amdgpu_ps float @gather4_sgpr_bug() { 475main_body: 476 %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef, align 16 477 %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0 478 %tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) 479 %tmp4 = extractelement <4 x float> %tmp2, i32 1 480 %tmp9 = fadd float undef, %tmp4 481 ret float %tmp9 482} 483 484declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 485declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 486declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 487declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 488declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 489declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 490declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 491declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 492declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 493 494declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 495declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 496declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 497declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 498declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 499declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 500declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 501declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 502declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 503 504declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 505declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 506declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 507declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 508declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 509declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 510declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 511declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 512declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 513 514declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 515declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 516declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 517declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 518declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 519declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 520declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 521declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 522 523declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) 524 525attributes #0 = { nounwind readnone } 526