• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Bas Nieuwenhuizen
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "ac_nir_to_llvm.h"
25 
26 #include "ac_binary.h"
27 #include "ac_llvm_build.h"
28 #include "ac_llvm_util.h"
29 #include "ac_shader_abi.h"
30 #include "ac_shader_util.h"
31 #include "nir/nir.h"
32 #include "nir/nir_deref.h"
33 #include "sid.h"
34 #include "util/bitscan.h"
35 #include "util/u_math.h"
36 #include <llvm/Config/llvm-config.h>
37 
38 struct ac_nir_context {
39    struct ac_llvm_context ac;
40    struct ac_shader_abi *abi;
41    const struct ac_shader_args *args;
42 
43    gl_shader_stage stage;
44    shader_info *info;
45 
46    LLVMValueRef *ssa_defs;
47 
48    LLVMValueRef scratch;
49    LLVMValueRef constant_data;
50 
51    struct hash_table *defs;
52    struct hash_table *phis;
53    struct hash_table *vars;
54    struct hash_table *verified_interp;
55 
56    LLVMValueRef main_function;
57    LLVMBasicBlockRef continue_block;
58    LLVMBasicBlockRef break_block;
59 };
60 
61 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
62                                            const nir_instr *instr, bool image);
63 
64 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
65                                      enum ac_descriptor_type desc_type, const nir_instr *instr,
66                                      LLVMValueRef index, bool image, bool write);
67 
get_def_type(struct ac_nir_context * ctx,const nir_ssa_def * def)68 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)
69 {
70    LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
71    if (def->num_components > 1) {
72       type = LLVMVectorType(type, def->num_components);
73    }
74    return type;
75 }
76 
get_src(struct ac_nir_context * nir,nir_src src)77 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
78 {
79    assert(src.is_ssa);
80    return nir->ssa_defs[src.ssa->index];
81 }
82 
get_memory_ptr(struct ac_nir_context * ctx,nir_src src,unsigned bit_size)83 static LLVMValueRef get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size)
84 {
85    LLVMValueRef ptr = get_src(ctx, src);
86    ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
87    int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
88 
89    LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
90 
91    return LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(type, addr_space), "");
92 }
93 
get_block(struct ac_nir_context * nir,const struct nir_block * b)94 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b)
95 {
96    struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
97    return (LLVMBasicBlockRef)entry->data;
98 }
99 
get_alu_src(struct ac_nir_context * ctx,nir_alu_src src,unsigned num_components)100 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src,
101                                 unsigned num_components)
102 {
103    LLVMValueRef value = get_src(ctx, src.src);
104    bool need_swizzle = false;
105 
106    assert(value);
107    unsigned src_components = ac_get_llvm_num_components(value);
108    for (unsigned i = 0; i < num_components; ++i) {
109       assert(src.swizzle[i] < src_components);
110       if (src.swizzle[i] != i)
111          need_swizzle = true;
112    }
113 
114    if (need_swizzle || num_components != src_components) {
115       LLVMValueRef masks[] = {LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
116                               LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
117                               LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
118                               LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
119 
120       if (src_components > 1 && num_components == 1) {
121          value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], "");
122       } else if (src_components == 1 && num_components > 1) {
123          LLVMValueRef values[] = {value, value, value, value};
124          value = ac_build_gather_values(&ctx->ac, values, num_components);
125       } else {
126          LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
127          value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, "");
128       }
129    }
130    assert(!src.negate);
131    assert(!src.abs);
132    return value;
133 }
134 
emit_int_cmp(struct ac_llvm_context * ctx,LLVMIntPredicate pred,LLVMValueRef src0,LLVMValueRef src1)135 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred,
136                                  LLVMValueRef src0, LLVMValueRef src1)
137 {
138    LLVMTypeRef src0_type = LLVMTypeOf(src0);
139    LLVMTypeRef src1_type = LLVMTypeOf(src1);
140 
141    if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
142        LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
143       src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, "");
144    } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
145               LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
146       src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, "");
147    }
148 
149    return LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
150 }
151 
emit_float_cmp(struct ac_llvm_context * ctx,LLVMRealPredicate pred,LLVMValueRef src0,LLVMValueRef src1)152 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred,
153                                    LLVMValueRef src0, LLVMValueRef src1)
154 {
155    src0 = ac_to_float(ctx, src0);
156    src1 = ac_to_float(ctx, src1);
157    return LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
158 }
159 
emit_intrin_1f_param(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0)160 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin,
161                                          LLVMTypeRef result_type, LLVMValueRef src0)
162 {
163    char name[64], type[64];
164    LLVMValueRef params[] = {
165       ac_to_float(ctx, src0),
166    };
167 
168    ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
169    ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
170    assert(length < sizeof(name));
171    return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
172 }
173 
emit_intrin_1f_param_scalar(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0)174 static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx, const char *intrin,
175                                                 LLVMTypeRef result_type, LLVMValueRef src0)
176 {
177    if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
178       return emit_intrin_1f_param(ctx, intrin, result_type, src0);
179 
180    LLVMTypeRef elem_type = LLVMGetElementType(result_type);
181    LLVMValueRef ret = LLVMGetUndef(result_type);
182 
183    /* Scalarize the intrinsic, because vectors are not supported. */
184    for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
185       char name[64], type[64];
186       LLVMValueRef params[] = {
187          ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
188       };
189 
190       ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
191       ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
192       assert(length < sizeof(name));
193       ret = LLVMBuildInsertElement(
194          ctx->builder, ret,
195          ac_build_intrinsic(ctx, name, elem_type, params, 1, AC_FUNC_ATTR_READNONE),
196          LLVMConstInt(ctx->i32, i, 0), "");
197    }
198    return ret;
199 }
200 
emit_intrin_2f_param(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0,LLVMValueRef src1)201 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin,
202                                          LLVMTypeRef result_type, LLVMValueRef src0,
203                                          LLVMValueRef src1)
204 {
205    char name[64], type[64];
206    LLVMValueRef params[] = {
207       ac_to_float(ctx, src0),
208       ac_to_float(ctx, src1),
209    };
210 
211    ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
212    ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
213    assert(length < sizeof(name));
214    return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
215 }
216 
emit_intrin_3f_param(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0,LLVMValueRef src1,LLVMValueRef src2)217 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin,
218                                          LLVMTypeRef result_type, LLVMValueRef src0,
219                                          LLVMValueRef src1, LLVMValueRef src2)
220 {
221    char name[64], type[64];
222    LLVMValueRef params[] = {
223       ac_to_float(ctx, src0),
224       ac_to_float(ctx, src1),
225       ac_to_float(ctx, src2),
226    };
227 
228    ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
229    ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
230    assert(length < sizeof(name));
231    return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
232 }
233 
emit_bcsel(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef src1,LLVMValueRef src2)234 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
235                                LLVMValueRef src2)
236 {
237    LLVMTypeRef src1_type = LLVMTypeOf(src1);
238    LLVMTypeRef src2_type = LLVMTypeOf(src2);
239 
240    if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
241        LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
242       src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
243    } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
244               LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
245       src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
246    }
247 
248    return LLVMBuildSelect(ctx->builder, src0, ac_to_integer_or_pointer(ctx, src1),
249                           ac_to_integer_or_pointer(ctx, src2), "");
250 }
251 
emit_iabs(struct ac_llvm_context * ctx,LLVMValueRef src0)252 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0)
253 {
254    return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
255 }
256 
emit_uint_carry(struct ac_llvm_context * ctx,const char * intrin,LLVMValueRef src0,LLVMValueRef src1)257 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin,
258                                     LLVMValueRef src0, LLVMValueRef src1)
259 {
260    LLVMTypeRef ret_type;
261    LLVMTypeRef types[] = {ctx->i32, ctx->i1};
262    LLVMValueRef res;
263    LLVMValueRef params[] = {src0, src1};
264    ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
265 
266    res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE);
267 
268    res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
269    res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
270    return res;
271 }
272 
emit_b2f(struct ac_llvm_context * ctx,LLVMValueRef src0,unsigned bitsize)273 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
274 {
275    assert(ac_get_elem_bits(ctx, LLVMTypeOf(src0)) == 1);
276 
277    switch (bitsize) {
278    case 16:
279       if (LLVMGetTypeKind(LLVMTypeOf(src0)) == LLVMVectorTypeKind) {
280          assert(LLVMGetVectorSize(LLVMTypeOf(src0)) == 2);
281          LLVMValueRef f[] = {
282             LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 0),
283                             ctx->f16_1, ctx->f16_0, ""),
284             LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 1),
285                             ctx->f16_1, ctx->f16_0, ""),
286          };
287          return ac_build_gather_values(ctx, f, 2);
288       }
289       return LLVMBuildSelect(ctx->builder, src0, ctx->f16_1, ctx->f16_0, "");
290    case 32:
291       return LLVMBuildSelect(ctx->builder, src0, ctx->f32_1, ctx->f32_0, "");
292    case 64:
293       return LLVMBuildSelect(ctx->builder, src0, ctx->f64_1, ctx->f64_0, "");
294    default:
295       unreachable("Unsupported bit size.");
296    }
297 }
298 
emit_f2b(struct ac_llvm_context * ctx,LLVMValueRef src0)299 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
300 {
301    src0 = ac_to_float(ctx, src0);
302    LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
303    return LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, "");
304 }
305 
emit_b2i(struct ac_llvm_context * ctx,LLVMValueRef src0,unsigned bitsize)306 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
307 {
308    switch (bitsize) {
309    case 8:
310       return LLVMBuildSelect(ctx->builder, src0, ctx->i8_1, ctx->i8_0, "");
311    case 16:
312       return LLVMBuildSelect(ctx->builder, src0, ctx->i16_1, ctx->i16_0, "");
313    case 32:
314       return LLVMBuildSelect(ctx->builder, src0, ctx->i32_1, ctx->i32_0, "");
315    case 64:
316       return LLVMBuildSelect(ctx->builder, src0, ctx->i64_1, ctx->i64_0, "");
317    default:
318       unreachable("Unsupported bit size.");
319    }
320 }
321 
emit_i2b(struct ac_llvm_context * ctx,LLVMValueRef src0)322 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
323 {
324    LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
325    return LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, "");
326 }
327 
emit_f2f16(struct ac_llvm_context * ctx,LLVMValueRef src0)328 static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)
329 {
330    LLVMValueRef result;
331    LLVMValueRef cond = NULL;
332 
333    src0 = ac_to_float(ctx, src0);
334    result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
335 
336    if (ctx->chip_class >= GFX8) {
337       LLVMValueRef args[2];
338       /* Check if the result is a denormal - and flush to 0 if so. */
339       args[0] = result;
340       args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
341       cond =
342          ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
343    }
344 
345    /* need to convert back up to f32 */
346    result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
347 
348    if (ctx->chip_class >= GFX8)
349       result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
350    else {
351       /* for GFX6-GFX7 */
352       /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
353        * so compare the result and flush to 0 if it's smaller.
354        */
355       LLVMValueRef temp, cond2;
356       temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
357       cond = LLVMBuildFCmp(
358          ctx->builder, LLVMRealOGT,
359          LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
360          temp, "");
361       cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, temp, ctx->f32_0, "");
362       cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
363       result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
364    }
365    return result;
366 }
367 
emit_umul_high(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef src1)368 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
369                                    LLVMValueRef src1)
370 {
371    LLVMValueRef dst64, result;
372    src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
373    src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
374 
375    dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
376    dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
377    result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
378    return result;
379 }
380 
emit_imul_high(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef src1)381 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
382                                    LLVMValueRef src1)
383 {
384    LLVMValueRef dst64, result;
385    src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
386    src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
387 
388    dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
389    dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
390    result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
391    return result;
392 }
393 
emit_bfm(struct ac_llvm_context * ctx,LLVMValueRef bits,LLVMValueRef offset)394 static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
395 {
396    /* mask = ((1 << bits) - 1) << offset */
397    return LLVMBuildShl(
398       ctx->builder,
399       LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
400       offset, "");
401 }
402 
emit_bitfield_select(struct ac_llvm_context * ctx,LLVMValueRef mask,LLVMValueRef insert,LLVMValueRef base)403 static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, LLVMValueRef mask,
404                                          LLVMValueRef insert, LLVMValueRef base)
405 {
406    /* Calculate:
407     *   (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
408     * Use the right-hand side, which the LLVM backend can convert to V_BFI.
409     */
410    return LLVMBuildXor(
411       ctx->builder, base,
412       LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
413 }
414 
emit_pack_2x16(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef (* pack)(struct ac_llvm_context * ctx,LLVMValueRef args[2]))415 static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0,
416                                    LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
417                                                         LLVMValueRef args[2]))
418 {
419    LLVMValueRef comp[2];
420 
421    src0 = ac_to_float(ctx, src0);
422    comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
423    comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
424 
425    return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
426 }
427 
emit_unpack_half_2x16(struct ac_llvm_context * ctx,LLVMValueRef src0)428 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0)
429 {
430    LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
431    LLVMValueRef temps[2], val;
432    int i;
433 
434    for (i = 0; i < 2; i++) {
435       val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
436       val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
437       val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
438       temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
439    }
440    return ac_build_gather_values(ctx, temps, 2);
441 }
442 
emit_ddxy(struct ac_nir_context * ctx,nir_op op,LLVMValueRef src0)443 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0)
444 {
445    unsigned mask;
446    int idx;
447    LLVMValueRef result;
448 
449    if (op == nir_op_fddx_fine)
450       mask = AC_TID_MASK_LEFT;
451    else if (op == nir_op_fddy_fine)
452       mask = AC_TID_MASK_TOP;
453    else
454       mask = AC_TID_MASK_TOP_LEFT;
455 
456    /* for DDX we want to next X pixel, DDY next Y pixel. */
457    if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
458       idx = 1;
459    else
460       idx = 2;
461 
462    result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
463    return result;
464 }
465 
466 struct waterfall_context {
467    LLVMBasicBlockRef phi_bb[2];
468    bool use_waterfall;
469 };
470 
471 /* To deal with divergent descriptors we can create a loop that handles all
472  * lanes with the same descriptor on a given iteration (henceforth a
473  * waterfall loop).
474  *
475  * These helper create the begin and end of the loop leaving the caller
476  * to implement the body.
477  *
478  * params:
479  *  - ctx is the usal nir context
480  *  - wctx is a temporary struct containing some loop info. Can be left uninitialized.
481  *  - value is the possibly divergent value for which we built the loop
482  *  - divergent is whether value is actually divergent. If false we just pass
483  *     things through.
484  */
enter_waterfall(struct ac_nir_context * ctx,struct waterfall_context * wctx,LLVMValueRef value,bool divergent)485 static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
486                                     LLVMValueRef value, bool divergent)
487 {
488    /* If the app claims the value is divergent but it is constant we can
489     * end up with a dynamic index of NULL. */
490    if (!value)
491       divergent = false;
492 
493    wctx->use_waterfall = divergent;
494    if (!divergent)
495       return value;
496 
497    ac_build_bgnloop(&ctx->ac, 6000);
498 
499    LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);
500 
501    LLVMValueRef active =
502       LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value, scalar_value, "uniform_active");
503 
504    wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
505    ac_build_ifcc(&ctx->ac, active, 6001);
506 
507    return scalar_value;
508 }
509 
exit_waterfall(struct ac_nir_context * ctx,struct waterfall_context * wctx,LLVMValueRef value)510 static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
511                                    LLVMValueRef value)
512 {
513    LLVMValueRef ret = NULL;
514    LLVMValueRef phi_src[2];
515    LLVMValueRef cc_phi_src[2] = {
516       LLVMConstInt(ctx->ac.i32, 0, false),
517       LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
518    };
519 
520    if (!wctx->use_waterfall)
521       return value;
522 
523    wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
524 
525    ac_build_endif(&ctx->ac, 6001);
526 
527    if (value) {
528       phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
529       phi_src[1] = value;
530 
531       ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
532    }
533 
534    /*
535     * By using the optimization barrier on the exit decision, we decouple
536     * the operations from the break, and hence avoid LLVM hoisting the
537     * opteration into the break block.
538     */
539    LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
540    ac_build_optimization_barrier(&ctx->ac, &cc);
541 
542    LLVMValueRef active =
543       LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
544    ac_build_ifcc(&ctx->ac, active, 6002);
545    ac_build_break(&ctx->ac);
546    ac_build_endif(&ctx->ac, 6002);
547 
548    ac_build_endloop(&ctx->ac, 6000);
549    return ret;
550 }
551 
visit_alu(struct ac_nir_context * ctx,const nir_alu_instr * instr)552 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
553 {
554    LLVMValueRef src[4], result = NULL;
555    unsigned num_components = instr->dest.dest.ssa.num_components;
556    unsigned src_components;
557    LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
558 
559    assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
560    switch (instr->op) {
561    case nir_op_vec2:
562    case nir_op_vec3:
563    case nir_op_vec4:
564    case nir_op_unpack_32_2x16:
565    case nir_op_unpack_64_2x32:
566    case nir_op_unpack_64_4x16:
567       src_components = 1;
568       break;
569    case nir_op_pack_half_2x16:
570    case nir_op_pack_snorm_2x16:
571    case nir_op_pack_unorm_2x16:
572    case nir_op_pack_32_2x16:
573    case nir_op_pack_64_2x32:
574       src_components = 2;
575       break;
576    case nir_op_unpack_half_2x16:
577       src_components = 1;
578       break;
579    case nir_op_cube_face_coord:
580    case nir_op_cube_face_index:
581       src_components = 3;
582       break;
583    case nir_op_pack_64_4x16:
584       src_components = 4;
585       break;
586    default:
587       src_components = num_components;
588       break;
589    }
590    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
591       src[i] = get_alu_src(ctx, instr->src[i], src_components);
592 
593    switch (instr->op) {
594    case nir_op_mov:
595       result = src[0];
596       break;
597    case nir_op_fneg:
598       src[0] = ac_to_float(&ctx->ac, src[0]);
599       result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
600       if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
601          /* fneg will be optimized by backend compiler with sign
602           * bit removed via XOR. This is probably a LLVM bug.
603           */
604          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
605       }
606       break;
607    case nir_op_ineg:
608       result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
609       break;
610    case nir_op_inot:
611       result = LLVMBuildNot(ctx->ac.builder, src[0], "");
612       break;
613    case nir_op_iadd:
614       result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
615       break;
616    case nir_op_fadd:
617       src[0] = ac_to_float(&ctx->ac, src[0]);
618       src[1] = ac_to_float(&ctx->ac, src[1]);
619       result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
620       break;
621    case nir_op_fsub:
622       src[0] = ac_to_float(&ctx->ac, src[0]);
623       src[1] = ac_to_float(&ctx->ac, src[1]);
624       result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
625       break;
626    case nir_op_isub:
627       result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
628       break;
629    case nir_op_imul:
630       result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
631       break;
632    case nir_op_imod:
633       result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
634       break;
635    case nir_op_umod:
636       result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
637       break;
638    case nir_op_irem:
639       result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
640       break;
641    case nir_op_idiv:
642       result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
643       break;
644    case nir_op_udiv:
645       result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
646       break;
647    case nir_op_fmul:
648       src[0] = ac_to_float(&ctx->ac, src[0]);
649       src[1] = ac_to_float(&ctx->ac, src[1]);
650       result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
651       break;
652    case nir_op_frcp:
653       /* For doubles, we need precise division to pass GLCTS. */
654       if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {
655          result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, ac_to_float(&ctx->ac, src[0]), "");
656       } else {
657          result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
658                                               ac_to_float_type(&ctx->ac, def_type), src[0]);
659       }
660       if (ctx->abi->clamp_div_by_zero)
661          result = ac_build_fmin(&ctx->ac, result,
662                                 LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
663       break;
664    case nir_op_iand:
665       result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
666       break;
667    case nir_op_ior:
668       result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
669       break;
670    case nir_op_ixor:
671       result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
672       break;
673    case nir_op_ishl:
674       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
675           ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
676          src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
677       else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
678                ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
679          src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
680       result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
681       break;
682    case nir_op_ishr:
683       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
684           ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
685          src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
686       else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
687                ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
688          src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
689       result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
690       break;
691    case nir_op_ushr:
692       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
693           ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
694          src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
695       else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
696                ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
697          src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
698       result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
699       break;
700    case nir_op_ilt:
701       result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
702       break;
703    case nir_op_ine:
704       result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
705       break;
706    case nir_op_ieq:
707       result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
708       break;
709    case nir_op_ige:
710       result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
711       break;
712    case nir_op_ult:
713       result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
714       break;
715    case nir_op_uge:
716       result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
717       break;
718    case nir_op_feq:
719       result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
720       break;
721    case nir_op_fneu:
722       result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
723       break;
724    case nir_op_flt:
725       result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
726       break;
727    case nir_op_fge:
728       result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
729       break;
730    case nir_op_fabs:
731       result =
732          emit_intrin_1f_param(&ctx->ac, "llvm.fabs", ac_to_float_type(&ctx->ac, def_type), src[0]);
733       if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
734          /* fabs will be optimized by backend compiler with sign
735           * bit removed via AND.
736           */
737          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
738       }
739       break;
740    case nir_op_fsat:
741       src[0] = ac_to_float(&ctx->ac, src[0]);
742       result = ac_build_fsat(&ctx->ac, src[0],
743                              ac_to_float_type(&ctx->ac, def_type));
744       break;
745    case nir_op_iabs:
746       result = emit_iabs(&ctx->ac, src[0]);
747       break;
748    case nir_op_imax:
749       result = ac_build_imax(&ctx->ac, src[0], src[1]);
750       break;
751    case nir_op_imin:
752       result = ac_build_imin(&ctx->ac, src[0], src[1]);
753       break;
754    case nir_op_umax:
755       result = ac_build_umax(&ctx->ac, src[0], src[1]);
756       break;
757    case nir_op_umin:
758       result = ac_build_umin(&ctx->ac, src[0], src[1]);
759       break;
760    case nir_op_isign:
761       result = ac_build_isign(&ctx->ac, src[0]);
762       break;
763    case nir_op_fsign:
764       src[0] = ac_to_float(&ctx->ac, src[0]);
765       result = ac_build_fsign(&ctx->ac, src[0]);
766       break;
767    case nir_op_ffloor:
768       result =
769          emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), src[0]);
770       break;
771    case nir_op_ftrunc:
772       result =
773          emit_intrin_1f_param(&ctx->ac, "llvm.trunc", ac_to_float_type(&ctx->ac, def_type), src[0]);
774       break;
775    case nir_op_fceil:
776       result =
777          emit_intrin_1f_param(&ctx->ac, "llvm.ceil", ac_to_float_type(&ctx->ac, def_type), src[0]);
778       break;
779    case nir_op_fround_even:
780       result =
781          emit_intrin_1f_param(&ctx->ac, "llvm.rint", ac_to_float_type(&ctx->ac, def_type), src[0]);
782       break;
783    case nir_op_ffract:
784       result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
785                                            ac_to_float_type(&ctx->ac, def_type), src[0]);
786       break;
787    case nir_op_fsin:
788       result =
789          emit_intrin_1f_param(&ctx->ac, "llvm.sin", ac_to_float_type(&ctx->ac, def_type), src[0]);
790       break;
791    case nir_op_fcos:
792       result =
793          emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
794       break;
795    case nir_op_fsqrt:
796       result =
797          emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
798       break;
799    case nir_op_fexp2:
800       result =
801          emit_intrin_1f_param(&ctx->ac, "llvm.exp2", ac_to_float_type(&ctx->ac, def_type), src[0]);
802       break;
803    case nir_op_flog2:
804       result =
805          emit_intrin_1f_param(&ctx->ac, "llvm.log2", ac_to_float_type(&ctx->ac, def_type), src[0]);
806       break;
807    case nir_op_frsq:
808       result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
809                                            ac_to_float_type(&ctx->ac, def_type), src[0]);
810       if (ctx->abi->clamp_div_by_zero)
811          result = ac_build_fmin(&ctx->ac, result,
812                                 LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
813       break;
814    case nir_op_frexp_exp:
815       src[0] = ac_to_float(&ctx->ac, src[0]);
816       result = ac_build_frexp_exp(&ctx->ac, src[0], ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
817       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
818          result = LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
819       break;
820    case nir_op_frexp_sig:
821       src[0] = ac_to_float(&ctx->ac, src[0]);
822       result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
823       break;
824    case nir_op_fpow:
825       result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
826                                     src[0], src[1]);
827       break;
828    case nir_op_fmax:
829       result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
830                                     src[0], src[1]);
831       if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
832          /* Only pre-GFX9 chips do not flush denorms. */
833          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
834       }
835       break;
836    case nir_op_fmin:
837       result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type),
838                                     src[0], src[1]);
839       if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
840          /* Only pre-GFX9 chips do not flush denorms. */
841          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
842       }
843       break;
844    case nir_op_ffma:
845       /* FMA is slow on gfx6-8, so it shouldn't be used. */
846       assert(instr->dest.dest.ssa.bit_size != 32 || ctx->ac.chip_class >= GFX9);
847       result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
848                                     src[0], src[1], src[2]);
849       break;
850    case nir_op_ldexp:
851       src[0] = ac_to_float(&ctx->ac, src[0]);
852       if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
853          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2,
854                                      AC_FUNC_ATTR_READNONE);
855       else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
856          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2,
857                                      AC_FUNC_ATTR_READNONE);
858       else
859          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2,
860                                      AC_FUNC_ATTR_READNONE);
861       break;
862    case nir_op_bfm:
863       result = emit_bfm(&ctx->ac, src[0], src[1]);
864       break;
865    case nir_op_bitfield_select:
866       result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
867       break;
868    case nir_op_ubfe:
869       result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
870       break;
871    case nir_op_ibfe:
872       result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
873       break;
874    case nir_op_bitfield_reverse:
875       result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
876       break;
877    case nir_op_bit_count:
878       result = ac_build_bit_count(&ctx->ac, src[0]);
879       break;
880    case nir_op_vec2:
881    case nir_op_vec3:
882    case nir_op_vec4:
883       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
884          src[i] = ac_to_integer(&ctx->ac, src[i]);
885       result = ac_build_gather_values(&ctx->ac, src, num_components);
886       break;
887    case nir_op_f2i8:
888    case nir_op_f2i16:
889    case nir_op_f2imp:
890    case nir_op_f2i32:
891    case nir_op_f2i64:
892       src[0] = ac_to_float(&ctx->ac, src[0]);
893       result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
894       break;
895    case nir_op_f2u8:
896    case nir_op_f2u16:
897    case nir_op_f2ump:
898    case nir_op_f2u32:
899    case nir_op_f2u64:
900       src[0] = ac_to_float(&ctx->ac, src[0]);
901       result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
902       break;
903    case nir_op_i2f16:
904    case nir_op_i2fmp:
905    case nir_op_i2f32:
906    case nir_op_i2f64:
907       result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
908       break;
909    case nir_op_u2f16:
910    case nir_op_u2fmp:
911    case nir_op_u2f32:
912    case nir_op_u2f64:
913       result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
914       break;
915    case nir_op_f2f16_rtz:
916    case nir_op_f2f16:
917    case nir_op_f2fmp:
918       src[0] = ac_to_float(&ctx->ac, src[0]);
919 
920       /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
921        * all f32->f16 conversions have to round towards zero, because both scalar
922        * and vec2 down-conversions have to round equally.
923        */
924       if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
925          src[0] = ac_to_float(&ctx->ac, src[0]);
926 
927          if (LLVMTypeOf(src[0]) == ctx->ac.f64)
928             src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
929 
930          /* Fast path conversion. This only works if NIR is vectorized
931           * to vec2 16.
932           */
933          if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
934             LLVMValueRef args[] = {
935                ac_llvm_extract_elem(&ctx->ac, src[0], 0),
936                ac_llvm_extract_elem(&ctx->ac, src[0], 1),
937             };
938             result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
939             break;
940          }
941 
942          assert(ac_get_llvm_num_components(src[0]) == 1);
943          LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};
944          result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
945          result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
946       } else {
947          if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
948             result =
949                LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
950          else
951             result =
952                LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
953       }
954       break;
955    case nir_op_f2f16_rtne:
956    case nir_op_f2f32:
957    case nir_op_f2f64:
958       src[0] = ac_to_float(&ctx->ac, src[0]);
959       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
960          result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
961       else
962          result =
963             LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
964       break;
965    case nir_op_u2u8:
966    case nir_op_u2u16:
967    case nir_op_u2u32:
968    case nir_op_u2u64:
969       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
970          result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
971       else
972          result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
973       break;
974    case nir_op_i2i8:
975    case nir_op_i2i16:
976    case nir_op_i2imp:
977    case nir_op_i2i32:
978    case nir_op_i2i64:
979       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
980          result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
981       else
982          result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
983       break;
984    case nir_op_bcsel:
985       result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
986       break;
987    case nir_op_find_lsb:
988       result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
989       break;
990    case nir_op_ufind_msb:
991       result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
992       break;
993    case nir_op_ifind_msb:
994       result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
995       break;
996    case nir_op_uadd_carry:
997       result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
998       break;
999    case nir_op_usub_borrow:
1000       result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
1001       break;
1002    case nir_op_b2f16:
1003    case nir_op_b2f32:
1004    case nir_op_b2f64:
1005       result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1006       break;
1007    case nir_op_f2b1:
1008       result = emit_f2b(&ctx->ac, src[0]);
1009       break;
1010    case nir_op_b2i8:
1011    case nir_op_b2i16:
1012    case nir_op_b2i32:
1013    case nir_op_b2i64:
1014       result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1015       break;
1016    case nir_op_i2b1:
1017    case nir_op_b2b1: /* after loads */
1018       result = emit_i2b(&ctx->ac, src[0]);
1019       break;
1020    case nir_op_b2b16: /* before stores */
1021       result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i16, "");
1022       break;
1023    case nir_op_b2b32: /* before stores */
1024       result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i32, "");
1025       break;
1026    case nir_op_fquantize2f16:
1027       result = emit_f2f16(&ctx->ac, src[0]);
1028       break;
1029    case nir_op_umul_high:
1030       result = emit_umul_high(&ctx->ac, src[0], src[1]);
1031       break;
1032    case nir_op_imul_high:
1033       result = emit_imul_high(&ctx->ac, src[0], src[1]);
1034       break;
1035    case nir_op_pack_half_2x16:
1036       result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
1037       break;
1038    case nir_op_pack_half_2x16_split:
1039       src[0] = ac_to_float(&ctx->ac, src[0]);
1040       src[1] = ac_to_float(&ctx->ac, src[1]);
1041       result = LLVMBuildBitCast(ctx->ac.builder,
1042                                 ac_build_cvt_pkrtz_f16(&ctx->ac, src),
1043                                 ctx->ac.i32, "");
1044       break;
1045    case nir_op_pack_snorm_2x16:
1046       result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
1047       break;
1048    case nir_op_pack_unorm_2x16:
1049       result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
1050       break;
1051    case nir_op_unpack_half_2x16:
1052       result = emit_unpack_half_2x16(&ctx->ac, src[0]);
1053       break;
1054    case nir_op_unpack_half_2x16_split_x: {
1055       assert(ac_get_llvm_num_components(src[0]) == 1);
1056       LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
1057       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1058       break;
1059    }
1060    case nir_op_unpack_half_2x16_split_y: {
1061       assert(ac_get_llvm_num_components(src[0]) == 1);
1062       LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
1063       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1064       break;
1065    }
1066    case nir_op_fddx:
1067    case nir_op_fddy:
1068    case nir_op_fddx_fine:
1069    case nir_op_fddy_fine:
1070    case nir_op_fddx_coarse:
1071    case nir_op_fddy_coarse:
1072       result = emit_ddxy(ctx, instr->op, src[0]);
1073       break;
1074 
1075    case nir_op_unpack_64_4x16: {
1076       result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v4i16, "");
1077       break;
1078    }
1079    case nir_op_pack_64_4x16: {
1080       result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.i64, "");
1081       break;
1082    }
1083 
1084    case nir_op_unpack_64_2x32: {
1085       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1086             ctx->ac.v2i32, "");
1087       break;
1088    }
1089    case nir_op_unpack_64_2x32_split_x: {
1090       assert(ac_get_llvm_num_components(src[0]) == 1);
1091       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
1092       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1093       break;
1094    }
1095    case nir_op_unpack_64_2x32_split_y: {
1096       assert(ac_get_llvm_num_components(src[0]) == 1);
1097       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
1098       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1099       break;
1100    }
1101 
1102    case nir_op_pack_64_2x32: {
1103       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1104             ctx->ac.i64, "");
1105       break;
1106    }
1107    case nir_op_pack_64_2x32_split: {
1108       LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1109       result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
1110       break;
1111    }
1112 
1113    case nir_op_pack_32_2x16: {
1114       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1115             ctx->ac.i32, "");
1116       break;
1117    }
1118    case nir_op_pack_32_2x16_split: {
1119       LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1120       result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
1121       break;
1122    }
1123 
1124    case nir_op_unpack_32_2x16: {
1125       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1126             ctx->ac.v2i16, "");
1127       break;
1128    }
1129    case nir_op_unpack_32_2x16_split_x: {
1130       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
1131       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1132       break;
1133    }
1134    case nir_op_unpack_32_2x16_split_y: {
1135       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
1136       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1137       break;
1138    }
1139 
1140    case nir_op_cube_face_coord: {
1141       src[0] = ac_to_float(&ctx->ac, src[0]);
1142       LLVMValueRef results[2];
1143       LLVMValueRef in[3];
1144       for (unsigned chan = 0; chan < 3; chan++)
1145          in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1146       results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3,
1147                                       AC_FUNC_ATTR_READNONE);
1148       results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3,
1149                                       AC_FUNC_ATTR_READNONE);
1150       LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3,
1151                                            AC_FUNC_ATTR_READNONE);
1152       results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
1153       results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
1154       LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
1155       results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
1156       results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
1157       result = ac_build_gather_values(&ctx->ac, results, 2);
1158       break;
1159    }
1160 
1161    case nir_op_cube_face_index: {
1162       src[0] = ac_to_float(&ctx->ac, src[0]);
1163       LLVMValueRef in[3];
1164       for (unsigned chan = 0; chan < 3; chan++)
1165          in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1166       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3,
1167                                   AC_FUNC_ATTR_READNONE);
1168       break;
1169    }
1170 
1171    default:
1172       fprintf(stderr, "Unknown NIR alu instr: ");
1173       nir_print_instr(&instr->instr, stderr);
1174       fprintf(stderr, "\n");
1175       abort();
1176    }
1177 
1178    if (result) {
1179       assert(instr->dest.dest.is_ssa);
1180       result = ac_to_integer_or_pointer(&ctx->ac, result);
1181       ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
1182    }
1183 }
1184 
visit_load_const(struct ac_nir_context * ctx,const nir_load_const_instr * instr)1185 static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr)
1186 {
1187    LLVMValueRef values[4], value = NULL;
1188    LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
1189 
1190    for (unsigned i = 0; i < instr->def.num_components; ++i) {
1191       switch (instr->def.bit_size) {
1192       case 1:
1193          values[i] = LLVMConstInt(element_type, instr->value[i].b, false);
1194          break;
1195       case 8:
1196          values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
1197          break;
1198       case 16:
1199          values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
1200          break;
1201       case 32:
1202          values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
1203          break;
1204       case 64:
1205          values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
1206          break;
1207       default:
1208          fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
1209          abort();
1210       }
1211    }
1212    if (instr->def.num_components > 1) {
1213       value = LLVMConstVector(values, instr->def.num_components);
1214    } else
1215       value = values[0];
1216 
1217    ctx->ssa_defs[instr->def.index] = value;
1218 }
1219 
get_buffer_size(struct ac_nir_context * ctx,LLVMValueRef descriptor,bool in_elements)1220 static LLVMValueRef get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor,
1221                                     bool in_elements)
1222 {
1223    LLVMValueRef size =
1224       LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), "");
1225 
1226    /* GFX8 only */
1227    if (ctx->ac.chip_class == GFX8 && in_elements) {
1228       /* On GFX8, the descriptor contains the size in bytes,
1229        * but TXQ must return the size in elements.
1230        * The stride is always non-zero for resources using TXQ.
1231        */
1232       LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, descriptor, ctx->ac.i32_1, "");
1233       stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, false), "");
1234       stride = LLVMBuildAnd(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
1235 
1236       size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
1237    }
1238    return size;
1239 }
1240 
1241 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
1242  * incorrectly forces nearest filtering if the texture format is integer.
1243  * The only effect it has on Gather4, which always returns 4 texels for
1244  * bilinear filtering, is that the final coordinates are off by 0.5 of
1245  * the texel size.
1246  *
1247  * The workaround is to subtract 0.5 from the unnormalized coordinates,
1248  * or (0.5 / size) from the normalized coordinates.
1249  *
1250  * However, cube textures with 8_8_8_8 data formats require a different
1251  * workaround of overriding the num format to USCALED/SSCALED. This would lose
1252  * precision in 32-bit data formats, so it needs to be applied dynamically at
1253  * runtime. In this case, return an i1 value that indicates whether the
1254  * descriptor was overridden (and hence a fixup of the sampler result is needed).
1255  */
lower_gather4_integer(struct ac_llvm_context * ctx,nir_variable * var,struct ac_image_args * args,const nir_tex_instr * instr)1256 static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, nir_variable *var,
1257                                           struct ac_image_args *args, const nir_tex_instr *instr)
1258 {
1259    const struct glsl_type *type = glsl_without_array(var->type);
1260    enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1261    LLVMValueRef wa_8888 = NULL;
1262    LLVMValueRef half_texel[2];
1263    LLVMValueRef result;
1264 
1265    assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
1266 
1267    if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1268       LLVMValueRef formats;
1269       LLVMValueRef data_format;
1270       LLVMValueRef wa_formats;
1271 
1272       formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
1273 
1274       data_format = LLVMBuildLShr(ctx->builder, formats, LLVMConstInt(ctx->i32, 20, false), "");
1275       data_format =
1276          LLVMBuildAnd(ctx->builder, data_format, LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
1277       wa_8888 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, data_format,
1278                               LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
1279 
1280       uint32_t wa_num_format = stype == GLSL_TYPE_UINT
1281                                   ? S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED)
1282                                   : S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
1283       wa_formats = LLVMBuildAnd(ctx->builder, formats,
1284                                 LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");
1285       wa_formats =
1286          LLVMBuildOr(ctx->builder, wa_formats, LLVMConstInt(ctx->i32, wa_num_format, false), "");
1287 
1288       formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
1289       args->resource =
1290          LLVMBuildInsertElement(ctx->builder, args->resource, formats, ctx->i32_1, "");
1291    }
1292 
1293    if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1294       assert(!wa_8888);
1295       half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
1296    } else {
1297       struct ac_image_args resinfo = {0};
1298       LLVMBasicBlockRef bbs[2];
1299 
1300       LLVMValueRef unnorm = NULL;
1301       LLVMValueRef default_offset = ctx->f32_0;
1302       if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) {
1303          /* In vulkan, whether the sampler uses unnormalized
1304           * coordinates or not is a dynamic property of the
1305           * sampler. Hence, to figure out whether or not we
1306           * need to divide by the texture size, we need to test
1307           * the sampler at runtime. This tests the bit set by
1308           * radv_init_sampler().
1309           */
1310          LLVMValueRef sampler0 =
1311             LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
1312          sampler0 = LLVMBuildLShr(ctx->builder, sampler0, LLVMConstInt(ctx->i32, 15, false), "");
1313          sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
1314          unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
1315          default_offset = LLVMConstReal(ctx->f32, -0.5);
1316       }
1317 
1318       bbs[0] = LLVMGetInsertBlock(ctx->builder);
1319       if (wa_8888 || unnorm) {
1320          assert(!(wa_8888 && unnorm));
1321          LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
1322          /* Skip the texture size query entirely if we don't need it. */
1323          ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
1324          bbs[1] = LLVMGetInsertBlock(ctx->builder);
1325       }
1326 
1327       /* Query the texture size. */
1328       resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
1329       resinfo.opcode = ac_image_get_resinfo;
1330       resinfo.dmask = 0xf;
1331       resinfo.lod = ctx->i32_0;
1332       resinfo.resource = args->resource;
1333       resinfo.attributes = AC_FUNC_ATTR_READNONE;
1334       LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
1335 
1336       /* Compute -0.5 / size. */
1337       for (unsigned c = 0; c < 2; c++) {
1338          half_texel[c] =
1339             LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, 0), "");
1340          half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1341          half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
1342          half_texel[c] =
1343             LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), "");
1344       }
1345 
1346       if (wa_8888 || unnorm) {
1347          ac_build_endif(ctx, 2000);
1348 
1349          for (unsigned c = 0; c < 2; c++) {
1350             LLVMValueRef values[2] = {default_offset, half_texel[c]};
1351             half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, values, bbs);
1352          }
1353       }
1354    }
1355 
1356    for (unsigned c = 0; c < 2; c++) {
1357       LLVMValueRef tmp;
1358       tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
1359       args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1360    }
1361 
1362    args->attributes = AC_FUNC_ATTR_READNONE;
1363    result = ac_build_image_opcode(ctx, args);
1364 
1365    if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1366       LLVMValueRef tmp, tmp2;
1367 
1368       /* if the cube workaround is in place, f2i the result. */
1369       for (unsigned c = 0; c < 4; c++) {
1370          tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
1371          if (stype == GLSL_TYPE_UINT)
1372             tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
1373          else
1374             tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
1375          tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1376          tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
1377          tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
1378          tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1379          result =
1380             LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
1381       }
1382    }
1383    return result;
1384 }
1385 
get_tex_texture_deref(const nir_tex_instr * instr)1386 static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
1387 {
1388    nir_deref_instr *texture_deref_instr = NULL;
1389 
1390    for (unsigned i = 0; i < instr->num_srcs; i++) {
1391       switch (instr->src[i].src_type) {
1392       case nir_tex_src_texture_deref:
1393          texture_deref_instr = nir_src_as_deref(instr->src[i].src);
1394          break;
1395       default:
1396          break;
1397       }
1398    }
1399    return texture_deref_instr;
1400 }
1401 
build_tex_intrinsic(struct ac_nir_context * ctx,const nir_tex_instr * instr,struct ac_image_args * args)1402 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr,
1403                                         struct ac_image_args *args)
1404 {
1405    if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
1406       unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
1407 
1408       assert(instr->dest.is_ssa);
1409       return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
1410                                          util_last_bit(mask), 0, true,
1411                                          instr->dest.ssa.bit_size == 16);
1412    }
1413 
1414    args->opcode = ac_image_sample;
1415 
1416    switch (instr->op) {
1417    case nir_texop_txf:
1418    case nir_texop_txf_ms:
1419    case nir_texop_samples_identical:
1420       args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS
1421                         ? ac_image_load
1422                         : ac_image_load_mip;
1423       args->level_zero = false;
1424       break;
1425    case nir_texop_txs:
1426    case nir_texop_query_levels:
1427       args->opcode = ac_image_get_resinfo;
1428       if (!args->lod)
1429          args->lod = ctx->ac.i32_0;
1430       args->level_zero = false;
1431       break;
1432    case nir_texop_tex:
1433       if (ctx->stage != MESA_SHADER_FRAGMENT) {
1434          assert(!args->lod);
1435          args->level_zero = true;
1436       }
1437       break;
1438    case nir_texop_tg4:
1439       args->opcode = ac_image_gather4;
1440       if (!args->lod && !args->bias)
1441          args->level_zero = true;
1442       break;
1443    case nir_texop_lod:
1444       args->opcode = ac_image_get_lod;
1445       break;
1446    case nir_texop_fragment_fetch:
1447    case nir_texop_fragment_mask_fetch:
1448       args->opcode = ac_image_load;
1449       args->level_zero = false;
1450       break;
1451    default:
1452       break;
1453    }
1454 
1455    if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
1456       nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
1457       nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
1458       const struct glsl_type *type = glsl_without_array(var->type);
1459       enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1460       if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1461          return lower_gather4_integer(&ctx->ac, var, args, instr);
1462       }
1463    }
1464 
1465    /* Fixup for GFX9 which allocates 1D textures as 2D. */
1466    if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
1467       if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
1468          args->coords[1] = ctx->ac.i32_0;
1469       }
1470    }
1471 
1472    args->attributes = AC_FUNC_ATTR_READNONE;
1473    bool cs_derivs =
1474       ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
1475    if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
1476       /* Prevent texture instructions with implicit derivatives from being
1477        * sinked into branches. */
1478       switch (instr->op) {
1479       case nir_texop_tex:
1480       case nir_texop_txb:
1481       case nir_texop_lod:
1482          args->attributes |= AC_FUNC_ATTR_CONVERGENT;
1483          break;
1484       default:
1485          break;
1486       }
1487    }
1488 
1489    return ac_build_image_opcode(&ctx->ac, args);
1490 }
1491 
visit_vulkan_resource_reindex(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1492 static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
1493                                                   nir_intrinsic_instr *instr)
1494 {
1495    LLVMValueRef ptr = get_src(ctx, instr->src[0]);
1496    LLVMValueRef index = get_src(ctx, instr->src[1]);
1497 
1498    LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
1499    LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
1500    return result;
1501 }
1502 
visit_load_push_constant(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1503 static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1504 {
1505    LLVMValueRef ptr, addr;
1506    LLVMValueRef src0 = get_src(ctx, instr->src[0]);
1507    unsigned index = nir_intrinsic_base(instr);
1508 
1509    addr = LLVMConstInt(ctx->ac.i32, index, 0);
1510    addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
1511 
1512    /* Load constant values from user SGPRS when possible, otherwise
1513     * fallback to the default path that loads directly from memory.
1514     */
1515    if (LLVMIsConstant(src0) && instr->dest.ssa.bit_size == 32) {
1516       unsigned count = instr->dest.ssa.num_components;
1517       unsigned offset = index;
1518 
1519       offset += LLVMConstIntGetZExtValue(src0);
1520       offset /= 4;
1521 
1522       offset -= ctx->args->base_inline_push_consts;
1523 
1524       unsigned num_inline_push_consts = ctx->args->num_inline_push_consts;
1525       if (offset + count <= num_inline_push_consts) {
1526          LLVMValueRef *const push_constants = alloca(num_inline_push_consts * sizeof(LLVMValueRef));
1527          for (unsigned i = 0; i < num_inline_push_consts; i++)
1528             push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[i]);
1529          return ac_build_gather_values(&ctx->ac, push_constants + offset, count);
1530       }
1531    }
1532 
1533    ptr =
1534       LLVMBuildGEP(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");
1535 
1536    if (instr->dest.ssa.bit_size == 8) {
1537       unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
1538       LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
1539       ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1540       LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1541 
1542       LLVMValueRef params[3];
1543       if (load_dwords > 1) {
1544          LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
1545          params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
1546                                              LLVMConstInt(ctx->ac.i32, 1, false), "");
1547          params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
1548                                              LLVMConstInt(ctx->ac.i32, 0, false), "");
1549       } else {
1550          res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
1551          params[0] = ctx->ac.i32_0;
1552          params[1] = res;
1553       }
1554       params[2] = addr;
1555       res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
1556 
1557       res = LLVMBuildTrunc(
1558          ctx->ac.builder, res,
1559          LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
1560       if (instr->dest.ssa.num_components > 1)
1561          res = LLVMBuildBitCast(ctx->ac.builder, res,
1562                                 LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
1563       return res;
1564    } else if (instr->dest.ssa.bit_size == 16) {
1565       unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
1566       LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
1567       ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1568       LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1569       res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
1570       LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
1571       cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
1572       LLVMValueRef mask[] = {
1573          LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
1574          LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
1575          LLVMConstInt(ctx->ac.i32, 4, false)};
1576       LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
1577       LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
1578       LLVMValueRef shuffle_aligned =
1579          LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
1580       LLVMValueRef shuffle_unaligned =
1581          LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
1582       res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
1583       return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
1584    }
1585 
1586    ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
1587 
1588    return LLVMBuildLoad(ctx->ac.builder, ptr, "");
1589 }
1590 
visit_get_ssbo_size(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)1591 static LLVMValueRef visit_get_ssbo_size(struct ac_nir_context *ctx,
1592                                         const nir_intrinsic_instr *instr)
1593 {
1594    LLVMValueRef index = get_src(ctx, instr->src[0]);
1595 
1596    return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
1597 }
1598 
extract_vector_range(struct ac_llvm_context * ctx,LLVMValueRef src,unsigned start,unsigned count)1599 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
1600                                          unsigned start, unsigned count)
1601 {
1602    LLVMValueRef mask[] = {ctx->i32_0, ctx->i32_1, LLVMConstInt(ctx->i32, 2, false),
1603                           LLVMConstInt(ctx->i32, 3, false)};
1604 
1605    unsigned src_elements = ac_get_llvm_num_components(src);
1606 
1607    if (count == src_elements) {
1608       assert(start == 0);
1609       return src;
1610    } else if (count == 1) {
1611       assert(start < src_elements);
1612       return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
1613    } else {
1614       assert(start + count <= src_elements);
1615       assert(count <= 4);
1616       LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
1617       return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
1618    }
1619 }
1620 
get_cache_policy(struct ac_nir_context * ctx,enum gl_access_qualifier access,bool may_store_unaligned,bool writeonly_memory)1621 static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access,
1622                                  bool may_store_unaligned, bool writeonly_memory)
1623 {
1624    unsigned cache_policy = 0;
1625 
1626    /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.  All
1627     * store opcodes not aligned to a dword are affected. The only way to
1628     * get unaligned stores is through shader images.
1629     */
1630    if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
1631         /* If this is write-only, don't keep data in L1 to prevent
1632          * evicting L1 cache lines that may be needed by other
1633          * instructions.
1634          */
1635         writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
1636       cache_policy |= ac_glc;
1637    }
1638 
1639    if (access & ACCESS_STREAM_CACHE_POLICY)
1640       cache_policy |= ac_slc | ac_glc;
1641 
1642    return cache_policy;
1643 }
1644 
enter_waterfall_ssbo(struct ac_nir_context * ctx,struct waterfall_context * wctx,const nir_intrinsic_instr * instr,nir_src src)1645 static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
1646                                          const nir_intrinsic_instr *instr, nir_src src)
1647 {
1648    return enter_waterfall(ctx, wctx, get_src(ctx, src),
1649                           nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
1650 }
1651 
visit_store_ssbo(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1652 static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1653 {
1654    if (ctx->ac.postponed_kill) {
1655       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
1656       ac_build_ifcc(&ctx->ac, cond, 7000);
1657    }
1658 
1659    LLVMValueRef src_data = get_src(ctx, instr->src[0]);
1660    int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
1661    unsigned writemask = nir_intrinsic_write_mask(instr);
1662    enum gl_access_qualifier access = nir_intrinsic_access(instr);
1663    bool writeonly_memory = access & ACCESS_NON_READABLE;
1664    unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
1665 
1666    struct waterfall_context wctx;
1667    LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
1668 
1669    LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
1670    LLVMValueRef base_data = src_data;
1671    base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
1672    LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
1673 
1674    while (writemask) {
1675       int start, count;
1676       LLVMValueRef data, offset;
1677       LLVMTypeRef data_type;
1678 
1679       u_bit_scan_consecutive_range(&writemask, &start, &count);
1680 
1681       /* Due to an LLVM limitation with LLVM < 9, split 3-element
1682        * writes into a 2-element and a 1-element write. */
1683       if (count == 3 && (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
1684          writemask |= 1 << (start + 2);
1685          count = 2;
1686       }
1687       int num_bytes = count * elem_size_bytes; /* count in bytes */
1688 
1689       /* we can only store 4 DWords at the same time.
1690        * can only happen for 64 Bit vectors. */
1691       if (num_bytes > 16) {
1692          writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
1693          count = 2;
1694          num_bytes = 16;
1695       }
1696 
1697       /* check alignment of 16 Bit stores */
1698       if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
1699          writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1700          count = 1;
1701          num_bytes = 2;
1702       }
1703 
1704       /* Due to alignment issues, split stores of 8-bit/16-bit
1705        * vectors.
1706        */
1707       if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
1708          writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1709          count = 1;
1710          num_bytes = elem_size_bytes;
1711       }
1712 
1713       data = extract_vector_range(&ctx->ac, base_data, start, count);
1714 
1715       offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
1716                             LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
1717 
1718       if (num_bytes == 1) {
1719          ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
1720       } else if (num_bytes == 2) {
1721          ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
1722       } else {
1723          int num_channels = num_bytes / 4;
1724 
1725          switch (num_bytes) {
1726          case 16: /* v4f32 */
1727             data_type = ctx->ac.v4f32;
1728             break;
1729          case 12: /* v3f32 */
1730             data_type = ctx->ac.v3f32;
1731             break;
1732          case 8: /* v2f32 */
1733             data_type = ctx->ac.v2f32;
1734             break;
1735          case 4: /* f32 */
1736             data_type = ctx->ac.f32;
1737             break;
1738          default:
1739             unreachable("Malformed vector store.");
1740          }
1741          data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
1742 
1743          ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, offset, ctx->ac.i32_0, 0,
1744                                      cache_policy);
1745       }
1746    }
1747 
1748    exit_waterfall(ctx, &wctx, NULL);
1749 
1750    if (ctx->ac.postponed_kill)
1751       ac_build_endif(&ctx->ac, 7000);
1752 }
1753 
emit_ssbo_comp_swap_64(struct ac_nir_context * ctx,LLVMValueRef descriptor,LLVMValueRef offset,LLVMValueRef compare,LLVMValueRef exchange,bool image)1754 static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor,
1755                                            LLVMValueRef offset, LLVMValueRef compare,
1756                                            LLVMValueRef exchange, bool image)
1757 {
1758    LLVMBasicBlockRef start_block = NULL, then_block = NULL;
1759    if (ctx->abi->robust_buffer_access || image) {
1760       LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
1761 
1762       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
1763       start_block = LLVMGetInsertBlock(ctx->ac.builder);
1764 
1765       ac_build_ifcc(&ctx->ac, cond, -1);
1766 
1767       then_block = LLVMGetInsertBlock(ctx->ac.builder);
1768    }
1769 
1770    if (image)
1771       offset = LLVMBuildMul(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, 8, false), "");
1772 
1773    LLVMValueRef ptr_parts[2] = {
1774       ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
1775       LLVMBuildAnd(ctx->ac.builder, ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
1776                    LLVMConstInt(ctx->ac.i32, 65535, 0), "")};
1777 
1778    ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
1779    ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
1780 
1781    offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
1782 
1783    LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
1784    ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
1785    ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
1786    ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL),
1787                            "");
1788 
1789    LLVMValueRef result =
1790       ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
1791    result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
1792 
1793    if (ctx->abi->robust_buffer_access || image) {
1794       ac_build_endif(&ctx->ac, -1);
1795 
1796       LLVMBasicBlockRef incoming_blocks[2] = {
1797          start_block,
1798          then_block,
1799       };
1800 
1801       LLVMValueRef incoming_values[2] = {
1802          LLVMConstInt(ctx->ac.i64, 0, 0),
1803          result,
1804       };
1805       LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
1806       LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
1807       return ret;
1808    } else {
1809       return result;
1810    }
1811 }
1812 
visit_atomic_ssbo(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1813 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1814 {
1815    if (ctx->ac.postponed_kill) {
1816       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
1817       ac_build_ifcc(&ctx->ac, cond, 7001);
1818    }
1819 
1820    LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
1821    const char *op;
1822    char name[64], type[8];
1823    LLVMValueRef params[6], descriptor;
1824    LLVMValueRef result;
1825    int arg_count = 0;
1826 
1827    struct waterfall_context wctx;
1828    LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
1829 
1830    switch (instr->intrinsic) {
1831    case nir_intrinsic_ssbo_atomic_add:
1832       op = "add";
1833       break;
1834    case nir_intrinsic_ssbo_atomic_imin:
1835       op = "smin";
1836       break;
1837    case nir_intrinsic_ssbo_atomic_umin:
1838       op = "umin";
1839       break;
1840    case nir_intrinsic_ssbo_atomic_imax:
1841       op = "smax";
1842       break;
1843    case nir_intrinsic_ssbo_atomic_umax:
1844       op = "umax";
1845       break;
1846    case nir_intrinsic_ssbo_atomic_and:
1847       op = "and";
1848       break;
1849    case nir_intrinsic_ssbo_atomic_or:
1850       op = "or";
1851       break;
1852    case nir_intrinsic_ssbo_atomic_xor:
1853       op = "xor";
1854       break;
1855    case nir_intrinsic_ssbo_atomic_exchange:
1856       op = "swap";
1857       break;
1858    case nir_intrinsic_ssbo_atomic_comp_swap:
1859       op = "cmpswap";
1860       break;
1861    default:
1862       abort();
1863    }
1864 
1865    descriptor = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
1866 
1867    if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && return_type == ctx->ac.i64) {
1868       result = emit_ssbo_comp_swap_64(ctx, descriptor, get_src(ctx, instr->src[1]),
1869                                       get_src(ctx, instr->src[2]), get_src(ctx, instr->src[3]), false);
1870    } else {
1871       if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1872          params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
1873       }
1874       params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
1875       params[arg_count++] = descriptor;
1876 
1877       if (LLVM_VERSION_MAJOR >= 9) {
1878          /* XXX: The new raw/struct atomic intrinsics are buggy with
1879           * LLVM 8, see r358579.
1880           */
1881          params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1882          params[arg_count++] = ctx->ac.i32_0;               /* soffset */
1883          params[arg_count++] = ctx->ac.i32_0;               /* slc */
1884 
1885          ac_build_type_name_for_intr(return_type, type, sizeof(type));
1886          snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
1887       } else {
1888          params[arg_count++] = ctx->ac.i32_0;               /* vindex */
1889          params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1890          params[arg_count++] = ctx->ac.i1false;             /* slc */
1891 
1892          assert(return_type == ctx->ac.i32);
1893          snprintf(name, sizeof(name), "llvm.amdgcn.buffer.atomic.%s", op);
1894       }
1895 
1896       result = ac_build_intrinsic(&ctx->ac, name, return_type, params, arg_count, 0);
1897    }
1898 
1899    result = exit_waterfall(ctx, &wctx, result);
1900    if (ctx->ac.postponed_kill)
1901       ac_build_endif(&ctx->ac, 7001);
1902    return result;
1903 }
1904 
visit_load_buffer(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1905 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1906 {
1907    struct waterfall_context wctx;
1908    LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
1909 
1910    int elem_size_bytes = instr->dest.ssa.bit_size / 8;
1911    int num_components = instr->num_components;
1912    enum gl_access_qualifier access = nir_intrinsic_access(instr);
1913    unsigned cache_policy = get_cache_policy(ctx, access, false, false);
1914 
1915    LLVMValueRef offset = get_src(ctx, instr->src[1]);
1916    LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false);
1917    LLVMValueRef vindex = ctx->ac.i32_0;
1918 
1919    LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
1920    LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
1921 
1922    LLVMValueRef results[4];
1923    for (int i = 0; i < num_components;) {
1924       int num_elems = num_components - i;
1925       if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
1926          num_elems = 1;
1927       if (num_elems * elem_size_bytes > 16)
1928          num_elems = 16 / elem_size_bytes;
1929       int load_bytes = num_elems * elem_size_bytes;
1930 
1931       LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
1932 
1933       LLVMValueRef ret;
1934 
1935       if (load_bytes == 1) {
1936          ret = ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
1937                                           cache_policy);
1938       } else if (load_bytes == 2) {
1939          ret = ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
1940                                            cache_policy);
1941       } else {
1942          int num_channels = util_next_power_of_two(load_bytes) / 4;
1943          bool can_speculate = access & ACCESS_CAN_REORDER;
1944 
1945          ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0,
1946                                     cache_policy, can_speculate, false);
1947       }
1948 
1949       LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
1950       ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
1951       ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
1952 
1953       LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
1954       ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
1955 
1956       for (unsigned j = 0; j < num_elems; j++) {
1957          results[i + j] =
1958             LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
1959       }
1960       i += num_elems;
1961    }
1962 
1963    LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
1964    return exit_waterfall(ctx, &wctx, ret);
1965 }
1966 
enter_waterfall_ubo(struct ac_nir_context * ctx,struct waterfall_context * wctx,const nir_intrinsic_instr * instr)1967 static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
1968                                         const nir_intrinsic_instr *instr)
1969 {
1970    return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
1971                           nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
1972 }
1973 
visit_load_global(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1974 static LLVMValueRef visit_load_global(struct ac_nir_context *ctx,
1975                                       nir_intrinsic_instr *instr)
1976 {
1977    LLVMValueRef addr = get_src(ctx, instr->src[0]);
1978    LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
1979    LLVMValueRef val;
1980 
1981    LLVMTypeRef ptr_type = LLVMPointerType(result_type, AC_ADDR_SPACE_GLOBAL);
1982 
1983    addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
1984 
1985    val = LLVMBuildLoad(ctx->ac.builder, addr, "");
1986 
1987    if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
1988       LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
1989       LLVMSetAlignment(val, ac_get_type_size(result_type));
1990    }
1991 
1992    return val;
1993 }
1994 
visit_store_global(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1995 static void visit_store_global(struct ac_nir_context *ctx,
1996 				     nir_intrinsic_instr *instr)
1997 {
1998    if (ctx->ac.postponed_kill) {
1999       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2000       ac_build_ifcc(&ctx->ac, cond, 7002);
2001    }
2002 
2003    LLVMValueRef data = get_src(ctx, instr->src[0]);
2004    LLVMValueRef addr = get_src(ctx, instr->src[1]);
2005    LLVMTypeRef type = LLVMTypeOf(data);
2006    LLVMValueRef val;
2007 
2008    LLVMTypeRef ptr_type = LLVMPointerType(type, AC_ADDR_SPACE_GLOBAL);
2009 
2010    addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2011 
2012    val = LLVMBuildStore(ctx->ac.builder, data, addr);
2013 
2014    if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
2015       LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
2016       LLVMSetAlignment(val, ac_get_type_size(type));
2017    }
2018 
2019    if (ctx->ac.postponed_kill)
2020       ac_build_endif(&ctx->ac, 7002);
2021 }
2022 
visit_global_atomic(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2023 static LLVMValueRef visit_global_atomic(struct ac_nir_context *ctx,
2024 					nir_intrinsic_instr *instr)
2025 {
2026    if (ctx->ac.postponed_kill) {
2027       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2028       ac_build_ifcc(&ctx->ac, cond, 7002);
2029    }
2030 
2031    LLVMValueRef addr = get_src(ctx, instr->src[0]);
2032    LLVMValueRef data = get_src(ctx, instr->src[1]);
2033    LLVMAtomicRMWBinOp op;
2034    LLVMValueRef result;
2035 
2036    /* use "singlethread" sync scope to implement relaxed ordering */
2037    const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread";
2038 
2039    LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(data), AC_ADDR_SPACE_GLOBAL);
2040 
2041    addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2042 
2043    if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap) {
2044       LLVMValueRef data1 = get_src(ctx, instr->src[2]);
2045       result = ac_build_atomic_cmp_xchg(&ctx->ac, addr, data, data1, sync_scope);
2046       result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
2047    } else {
2048       switch (instr->intrinsic) {
2049       case nir_intrinsic_global_atomic_add:
2050          op = LLVMAtomicRMWBinOpAdd;
2051          break;
2052       case nir_intrinsic_global_atomic_umin:
2053          op = LLVMAtomicRMWBinOpUMin;
2054          break;
2055       case nir_intrinsic_global_atomic_umax:
2056          op = LLVMAtomicRMWBinOpUMax;
2057          break;
2058       case nir_intrinsic_global_atomic_imin:
2059          op = LLVMAtomicRMWBinOpMin;
2060          break;
2061       case nir_intrinsic_global_atomic_imax:
2062          op = LLVMAtomicRMWBinOpMax;
2063          break;
2064       case nir_intrinsic_global_atomic_and:
2065          op = LLVMAtomicRMWBinOpAnd;
2066          break;
2067       case nir_intrinsic_global_atomic_or:
2068          op = LLVMAtomicRMWBinOpOr;
2069          break;
2070       case nir_intrinsic_global_atomic_xor:
2071          op = LLVMAtomicRMWBinOpXor;
2072          break;
2073       case nir_intrinsic_global_atomic_exchange:
2074          op = LLVMAtomicRMWBinOpXchg;
2075          break;
2076       default:
2077          unreachable("Invalid global atomic operation");
2078       }
2079 
2080       result = ac_build_atomic_rmw(&ctx->ac, op, addr, ac_to_integer(&ctx->ac, data), sync_scope);
2081    }
2082 
2083    if (ctx->ac.postponed_kill)
2084       ac_build_endif(&ctx->ac, 7002);
2085 
2086    return result;
2087 }
2088 
visit_load_ubo_buffer(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2089 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2090 {
2091    struct waterfall_context wctx;
2092    LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
2093 
2094    LLVMValueRef ret;
2095    LLVMValueRef rsrc = rsrc_base;
2096    LLVMValueRef offset = get_src(ctx, instr->src[1]);
2097    int num_components = instr->num_components;
2098    unsigned desc_set = 0, binding = 0;
2099    bool valid_binding = false;
2100 
2101    /* Look for vulkan_resource_index to get the desc_set/binding values which
2102     * are used to determine if it's an inline uniform UBO block.
2103     */
2104    if (instr->src[0].ssa->parent_instr->type == nir_instr_type_alu) {
2105       nir_alu_instr *mov_instr = nir_instr_as_alu(instr->src[0].ssa->parent_instr);
2106       if (mov_instr->src[0].src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
2107          nir_intrinsic_instr *idx_instr = nir_instr_as_intrinsic(mov_instr->src[0].src.ssa->parent_instr);
2108          if (idx_instr->intrinsic == nir_intrinsic_vulkan_resource_index) {
2109             desc_set = nir_intrinsic_desc_set(idx_instr);
2110             binding = nir_intrinsic_binding(idx_instr);
2111             valid_binding = true;
2112          }
2113       }
2114    }
2115 
2116    if (ctx->abi->load_ubo)
2117       rsrc = ctx->abi->load_ubo(ctx->abi, desc_set, binding, valid_binding, rsrc);
2118 
2119    if (instr->dest.ssa.bit_size == 64)
2120       num_components *= 2;
2121 
2122    if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
2123       unsigned load_bytes = instr->dest.ssa.bit_size / 8;
2124       LLVMValueRef *const results = alloca(num_components * sizeof(LLVMValueRef));
2125       for (unsigned i = 0; i < num_components; ++i) {
2126          LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, load_bytes * i, 0);
2127 
2128          if (load_bytes == 1) {
2129             results[i] =
2130                ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0);
2131          } else {
2132             assert(load_bytes == 2);
2133             results[i] =
2134                ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0);
2135          }
2136       }
2137       ret = ac_build_gather_values(&ctx->ac, results, num_components);
2138    } else {
2139       ret =
2140          ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0, 0, true, true);
2141 
2142       ret = ac_trim_vector(&ctx->ac, ret, num_components);
2143    }
2144 
2145    ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2146 
2147    return exit_waterfall(ctx, &wctx, ret);
2148 }
2149 
type_scalar_size_bytes(const struct glsl_type * type)2150 static unsigned type_scalar_size_bytes(const struct glsl_type *type)
2151 {
2152    assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
2153    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
2154 }
2155 
visit_store_output(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2156 static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2157 {
2158    if (ctx->ac.postponed_kill) {
2159       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2160       ac_build_ifcc(&ctx->ac, cond, 7002);
2161    }
2162 
2163    unsigned base = nir_intrinsic_base(instr);
2164    unsigned writemask = nir_intrinsic_write_mask(instr);
2165    unsigned component = nir_intrinsic_component(instr);
2166    LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2167    nir_src offset = *nir_get_io_offset_src(instr);
2168    LLVMValueRef indir_index = NULL;
2169 
2170    if (nir_src_is_const(offset))
2171       assert(nir_src_as_uint(offset) == 0);
2172    else
2173       indir_index = get_src(ctx, offset);
2174 
2175    switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
2176    case 16:
2177    case 32:
2178       break;
2179    case 64:
2180       unreachable("64-bit IO should have been lowered to 32 bits");
2181       return;
2182    default:
2183       unreachable("unhandled store_output bit size");
2184       return;
2185    }
2186 
2187    writemask <<= component;
2188 
2189    if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2190       nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
2191       LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
2192       unsigned location = nir_intrinsic_io_semantics(instr).location;
2193 
2194       ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, src,
2195                                   writemask, component, location, base);
2196       return;
2197    }
2198 
2199    /* No indirect indexing is allowed after this point. */
2200    assert(!indir_index);
2201 
2202    for (unsigned chan = 0; chan < 8; chan++) {
2203       if (!(writemask & (1 << chan)))
2204          continue;
2205 
2206       LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
2207       LLVMBuildStore(ctx->ac.builder, value, ctx->abi->outputs[base * 4 + chan]);
2208    }
2209 
2210    if (ctx->ac.postponed_kill)
2211       ac_build_endif(&ctx->ac, 7002);
2212 }
2213 
image_type_to_components_count(enum glsl_sampler_dim dim,bool array)2214 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2215 {
2216    switch (dim) {
2217    case GLSL_SAMPLER_DIM_BUF:
2218       return 1;
2219    case GLSL_SAMPLER_DIM_1D:
2220       return array ? 2 : 1;
2221    case GLSL_SAMPLER_DIM_2D:
2222       return array ? 3 : 2;
2223    case GLSL_SAMPLER_DIM_MS:
2224       return array ? 4 : 3;
2225    case GLSL_SAMPLER_DIM_3D:
2226    case GLSL_SAMPLER_DIM_CUBE:
2227       return 3;
2228    case GLSL_SAMPLER_DIM_RECT:
2229    case GLSL_SAMPLER_DIM_SUBPASS:
2230       return 2;
2231    case GLSL_SAMPLER_DIM_SUBPASS_MS:
2232       return 3;
2233    default:
2234       break;
2235    }
2236    return 0;
2237 }
2238 
adjust_sample_index_using_fmask(struct ac_llvm_context * ctx,LLVMValueRef coord_x,LLVMValueRef coord_y,LLVMValueRef coord_z,LLVMValueRef sample_index,LLVMValueRef fmask_desc_ptr)2239 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
2240                                                     LLVMValueRef coord_x, LLVMValueRef coord_y,
2241                                                     LLVMValueRef coord_z, LLVMValueRef sample_index,
2242                                                     LLVMValueRef fmask_desc_ptr)
2243 {
2244    unsigned sample_chan = coord_z ? 3 : 2;
2245    LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
2246    addr[sample_chan] = sample_index;
2247 
2248    ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
2249    return addr[sample_chan];
2250 }
2251 
get_image_deref(const nir_intrinsic_instr * instr)2252 static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
2253 {
2254    assert(instr->src[0].is_ssa);
2255    return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2256 }
2257 
get_image_descriptor(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,LLVMValueRef dynamic_index,enum ac_descriptor_type desc_type,bool write)2258 static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
2259                                          const nir_intrinsic_instr *instr,
2260                                          LLVMValueRef dynamic_index,
2261                                          enum ac_descriptor_type desc_type, bool write)
2262 {
2263    nir_deref_instr *deref_instr = instr->src[0].ssa->parent_instr->type == nir_instr_type_deref
2264                                      ? nir_instr_as_deref(instr->src[0].ssa->parent_instr)
2265                                      : NULL;
2266 
2267    return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
2268 }
2269 
get_image_coords(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,LLVMValueRef dynamic_desc_index,struct ac_image_args * args,enum glsl_sampler_dim dim,bool is_array)2270 static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2271                              LLVMValueRef dynamic_desc_index, struct ac_image_args *args,
2272                              enum glsl_sampler_dim dim, bool is_array)
2273 {
2274    LLVMValueRef src0 = get_src(ctx, instr->src[1]);
2275    LLVMValueRef masks[] = {
2276       LLVMConstInt(ctx->ac.i32, 0, false),
2277       LLVMConstInt(ctx->ac.i32, 1, false),
2278       LLVMConstInt(ctx->ac.i32, 2, false),
2279       LLVMConstInt(ctx->ac.i32, 3, false),
2280    };
2281    LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2282 
2283    int count;
2284    ASSERTED bool add_frag_pos =
2285       (dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2286    bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2287    bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
2288    assert(!add_frag_pos && "Input attachments should be lowered by this point.");
2289    count = image_type_to_components_count(dim, is_array);
2290 
2291    if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
2292                  instr->intrinsic == nir_intrinsic_bindless_image_load)) {
2293       LLVMValueRef fmask_load_address[3];
2294 
2295       fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2296       fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
2297       if (is_array)
2298          fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
2299       else
2300          fmask_load_address[2] = NULL;
2301 
2302       sample_index = adjust_sample_index_using_fmask(
2303          &ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2],
2304          sample_index,
2305          get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), AC_DESC_FMASK,
2306                           &instr->instr, dynamic_desc_index, true, false));
2307    }
2308    if (count == 1 && !gfx9_1d) {
2309       if (instr->src[1].ssa->num_components)
2310          args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2311       else
2312          args->coords[0] = src0;
2313    } else {
2314       int chan;
2315       if (is_ms)
2316          count--;
2317       for (chan = 0; chan < count; ++chan) {
2318          args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
2319       }
2320 
2321       if (gfx9_1d) {
2322          if (is_array) {
2323             args->coords[2] = args->coords[1];
2324             args->coords[1] = ctx->ac.i32_0;
2325          } else
2326             args->coords[1] = ctx->ac.i32_0;
2327          count++;
2328       }
2329       if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) {
2330          /* The hw can't bind a slice of a 3D image as a 2D
2331           * image, because it ignores BASE_ARRAY if the target
2332           * is 3D. The workaround is to read BASE_ARRAY and set
2333           * it as the 3rd address operand for all 2D images.
2334           */
2335          LLVMValueRef first_layer, const5, mask;
2336 
2337          const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
2338          mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
2339          first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
2340          first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
2341 
2342          args->coords[count] = first_layer;
2343          count++;
2344       }
2345 
2346       if (is_ms) {
2347          args->coords[count] = sample_index;
2348          count++;
2349       }
2350    }
2351 }
2352 
get_image_buffer_descriptor(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,LLVMValueRef dynamic_index,bool write,bool atomic)2353 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
2354                                                 const nir_intrinsic_instr *instr,
2355                                                 LLVMValueRef dynamic_index, bool write, bool atomic)
2356 {
2357    LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write);
2358    if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
2359       LLVMValueRef elem_count =
2360          LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
2361       LLVMValueRef stride =
2362          LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
2363       stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
2364 
2365       LLVMValueRef new_elem_count = LLVMBuildSelect(
2366          ctx->ac.builder, LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
2367          elem_count, stride, "");
2368 
2369       rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
2370                                     LLVMConstInt(ctx->ac.i32, 2, 0), "");
2371    }
2372    return rsrc;
2373 }
2374 
enter_waterfall_image(struct ac_nir_context * ctx,struct waterfall_context * wctx,const nir_intrinsic_instr * instr)2375 static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
2376                                           struct waterfall_context *wctx,
2377                                           const nir_intrinsic_instr *instr)
2378 {
2379    nir_deref_instr *deref_instr = NULL;
2380 
2381    if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
2382       deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2383 
2384    LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
2385    return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2386 }
2387 
visit_image_load(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2388 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2389                                      bool bindless)
2390 {
2391    LLVMValueRef res;
2392 
2393    enum glsl_sampler_dim dim;
2394    enum gl_access_qualifier access = nir_intrinsic_access(instr);
2395    bool is_array;
2396    if (bindless) {
2397       dim = nir_intrinsic_image_dim(instr);
2398       is_array = nir_intrinsic_image_array(instr);
2399    } else {
2400       const nir_deref_instr *image_deref = get_image_deref(instr);
2401       const struct glsl_type *type = image_deref->type;
2402       const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2403       dim = glsl_get_sampler_dim(type);
2404       access |= var->data.access;
2405       is_array = glsl_sampler_type_is_array(type);
2406    }
2407 
2408    struct waterfall_context wctx;
2409    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2410 
2411    struct ac_image_args args = {0};
2412 
2413    args.cache_policy = get_cache_policy(ctx, access, false, false);
2414 
2415    if (dim == GLSL_SAMPLER_DIM_BUF) {
2416       unsigned num_channels = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));
2417       if (instr->dest.ssa.bit_size == 64)
2418          num_channels = num_channels < 4 ? 2 : 4;
2419       LLVMValueRef rsrc, vindex;
2420 
2421       rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false);
2422       vindex =
2423          LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
2424 
2425       assert(instr->dest.is_ssa);
2426       bool can_speculate = access & ACCESS_CAN_REORDER;
2427       res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels,
2428                                         args.cache_policy, can_speculate,
2429                                         instr->dest.ssa.bit_size == 16);
2430       res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
2431 
2432       res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
2433       res = ac_to_integer(&ctx->ac, res);
2434    } else {
2435       bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
2436 
2437       args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
2438       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2439       get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2440       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2441       if (!level_zero)
2442          args.lod = get_src(ctx, instr->src[3]);
2443       args.dmask = 15;
2444       args.attributes = AC_FUNC_ATTR_READONLY;
2445 
2446       assert(instr->dest.is_ssa);
2447       args.d16 = instr->dest.ssa.bit_size == 16;
2448 
2449       res = ac_build_image_opcode(&ctx->ac, &args);
2450    }
2451 
2452    if (instr->dest.ssa.bit_size == 64) {
2453       res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i64, 2), "");
2454       LLVMValueRef x = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_0, "");
2455       LLVMValueRef w = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_1, "");
2456 
2457       LLVMValueRef values[4] = {x, ctx->ac.i64_0, ctx->ac.i64_0, w};
2458       res = ac_build_gather_values(&ctx->ac, values, 4);
2459    }
2460 
2461    return exit_waterfall(ctx, &wctx, res);
2462 }
2463 
visit_image_store(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2464 static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2465                               bool bindless)
2466 {
2467    if (ctx->ac.postponed_kill) {
2468       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2469       ac_build_ifcc(&ctx->ac, cond, 7003);
2470    }
2471 
2472    enum glsl_sampler_dim dim;
2473    enum gl_access_qualifier access = nir_intrinsic_access(instr);
2474    bool is_array;
2475 
2476    if (bindless) {
2477       dim = nir_intrinsic_image_dim(instr);
2478       is_array = nir_intrinsic_image_array(instr);
2479    } else {
2480       const nir_deref_instr *image_deref = get_image_deref(instr);
2481       const struct glsl_type *type = image_deref->type;
2482       const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2483       dim = glsl_get_sampler_dim(type);
2484       access |= var->data.access;
2485       is_array = glsl_sampler_type_is_array(type);
2486    }
2487 
2488    struct waterfall_context wctx;
2489    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2490 
2491    bool writeonly_memory = access & ACCESS_NON_READABLE;
2492    struct ac_image_args args = {0};
2493 
2494    args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
2495 
2496    LLVMValueRef src = get_src(ctx, instr->src[3]);
2497    if (instr->src[3].ssa->bit_size == 64) {
2498       /* only R64_UINT and R64_SINT supported */
2499       src = ac_llvm_extract_elem(&ctx->ac, src, 0);
2500       src = LLVMBuildBitCast(ctx->ac.builder, src, ctx->ac.v2f32, "");
2501    } else {
2502       src = ac_to_float(&ctx->ac, src);
2503    }
2504 
2505    if (dim == GLSL_SAMPLER_DIM_BUF) {
2506       LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false);
2507       unsigned src_channels = ac_get_llvm_num_components(src);
2508       LLVMValueRef vindex;
2509 
2510       if (src_channels == 3)
2511          src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
2512 
2513       vindex =
2514          LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
2515 
2516       ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.cache_policy);
2517    } else {
2518       bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
2519 
2520       args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
2521       args.data[0] = src;
2522       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
2523       get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2524       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2525       if (!level_zero)
2526          args.lod = get_src(ctx, instr->src[4]);
2527       args.dmask = 15;
2528       args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
2529 
2530       ac_build_image_opcode(&ctx->ac, &args);
2531    }
2532 
2533    exit_waterfall(ctx, &wctx, NULL);
2534    if (ctx->ac.postponed_kill)
2535       ac_build_endif(&ctx->ac, 7003);
2536 }
2537 
visit_image_atomic(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2538 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2539                                        bool bindless)
2540 {
2541    if (ctx->ac.postponed_kill) {
2542       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2543       ac_build_ifcc(&ctx->ac, cond, 7004);
2544    }
2545 
2546    LLVMValueRef params[7];
2547    int param_count = 0;
2548 
2549    bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
2550                   instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
2551    const char *atomic_name;
2552    char intrinsic_name[64];
2553    enum ac_atomic_op atomic_subop;
2554    ASSERTED int length;
2555 
2556    enum glsl_sampler_dim dim;
2557    bool is_array;
2558    if (bindless) {
2559       dim = nir_intrinsic_image_dim(instr);
2560       is_array = nir_intrinsic_image_array(instr);
2561    } else {
2562       const struct glsl_type *type = get_image_deref(instr)->type;
2563       dim = glsl_get_sampler_dim(type);
2564       is_array = glsl_sampler_type_is_array(type);
2565    }
2566 
2567    struct waterfall_context wctx;
2568    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2569 
2570    switch (instr->intrinsic) {
2571    case nir_intrinsic_bindless_image_atomic_add:
2572    case nir_intrinsic_image_deref_atomic_add:
2573       atomic_name = "add";
2574       atomic_subop = ac_atomic_add;
2575       break;
2576    case nir_intrinsic_bindless_image_atomic_imin:
2577    case nir_intrinsic_image_deref_atomic_imin:
2578       atomic_name = "smin";
2579       atomic_subop = ac_atomic_smin;
2580       break;
2581    case nir_intrinsic_bindless_image_atomic_umin:
2582    case nir_intrinsic_image_deref_atomic_umin:
2583       atomic_name = "umin";
2584       atomic_subop = ac_atomic_umin;
2585       break;
2586    case nir_intrinsic_bindless_image_atomic_imax:
2587    case nir_intrinsic_image_deref_atomic_imax:
2588       atomic_name = "smax";
2589       atomic_subop = ac_atomic_smax;
2590       break;
2591    case nir_intrinsic_bindless_image_atomic_umax:
2592    case nir_intrinsic_image_deref_atomic_umax:
2593       atomic_name = "umax";
2594       atomic_subop = ac_atomic_umax;
2595       break;
2596    case nir_intrinsic_bindless_image_atomic_and:
2597    case nir_intrinsic_image_deref_atomic_and:
2598       atomic_name = "and";
2599       atomic_subop = ac_atomic_and;
2600       break;
2601    case nir_intrinsic_bindless_image_atomic_or:
2602    case nir_intrinsic_image_deref_atomic_or:
2603       atomic_name = "or";
2604       atomic_subop = ac_atomic_or;
2605       break;
2606    case nir_intrinsic_bindless_image_atomic_xor:
2607    case nir_intrinsic_image_deref_atomic_xor:
2608       atomic_name = "xor";
2609       atomic_subop = ac_atomic_xor;
2610       break;
2611    case nir_intrinsic_bindless_image_atomic_exchange:
2612    case nir_intrinsic_image_deref_atomic_exchange:
2613       atomic_name = "swap";
2614       atomic_subop = ac_atomic_swap;
2615       break;
2616    case nir_intrinsic_bindless_image_atomic_comp_swap:
2617    case nir_intrinsic_image_deref_atomic_comp_swap:
2618       atomic_name = "cmpswap";
2619       atomic_subop = 0; /* not used */
2620       break;
2621    case nir_intrinsic_bindless_image_atomic_inc_wrap:
2622    case nir_intrinsic_image_deref_atomic_inc_wrap: {
2623       atomic_name = "inc";
2624       atomic_subop = ac_atomic_inc_wrap;
2625       break;
2626    }
2627    case nir_intrinsic_bindless_image_atomic_dec_wrap:
2628    case nir_intrinsic_image_deref_atomic_dec_wrap:
2629       atomic_name = "dec";
2630       atomic_subop = ac_atomic_dec_wrap;
2631       break;
2632    default:
2633       abort();
2634    }
2635 
2636    if (cmpswap)
2637       params[param_count++] = get_src(ctx, instr->src[4]);
2638    params[param_count++] = get_src(ctx, instr->src[3]);
2639 
2640    LLVMValueRef result;
2641    if (dim == GLSL_SAMPLER_DIM_BUF) {
2642       params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true);
2643       params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
2644                                                       ctx->ac.i32_0, ""); /* vindex */
2645       params[param_count++] = ctx->ac.i32_0;                              /* voffset */
2646       if (cmpswap && instr->dest.ssa.bit_size == 64) {
2647          result = emit_ssbo_comp_swap_64(ctx, params[2], params[3], params[1], params[0], true);
2648       } else {
2649          if (LLVM_VERSION_MAJOR >= 9) {
2650             /* XXX: The new raw/struct atomic intrinsics are buggy
2651              * with LLVM 8, see r358579.
2652              */
2653             params[param_count++] = ctx->ac.i32_0; /* soffset */
2654             params[param_count++] = ctx->ac.i32_0; /* slc */
2655 
2656             length = snprintf(intrinsic_name, sizeof(intrinsic_name),
2657                               "llvm.amdgcn.struct.buffer.atomic.%s.%s", atomic_name,
2658                               instr->dest.ssa.bit_size == 64 ? "i64" : "i32");
2659          } else {
2660             assert(instr->dest.ssa.bit_size == 64);
2661             params[param_count++] = ctx->ac.i1false; /* slc */
2662 
2663             length = snprintf(intrinsic_name, sizeof(intrinsic_name), "llvm.amdgcn.buffer.atomic.%s",
2664                               atomic_name);
2665          }
2666 
2667          assert(length < sizeof(intrinsic_name));
2668          result = ac_build_intrinsic(&ctx->ac, intrinsic_name, LLVMTypeOf(params[0]), params, param_count, 0);
2669       }
2670    } else {
2671       struct ac_image_args args = {0};
2672       args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
2673       args.atomic = atomic_subop;
2674       args.data[0] = params[0];
2675       if (cmpswap)
2676          args.data[1] = params[1];
2677       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
2678       get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2679       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2680 
2681       result = ac_build_image_opcode(&ctx->ac, &args);
2682    }
2683 
2684    result = exit_waterfall(ctx, &wctx, result);
2685    if (ctx->ac.postponed_kill)
2686       ac_build_endif(&ctx->ac, 7004);
2687    return result;
2688 }
2689 
visit_image_samples(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2690 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2691 {
2692    struct waterfall_context wctx;
2693    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2694    LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2695 
2696    LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
2697 
2698    return exit_waterfall(ctx, &wctx, ret);
2699 }
2700 
visit_image_size(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2701 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2702                                      bool bindless)
2703 {
2704    LLVMValueRef res;
2705 
2706    enum glsl_sampler_dim dim;
2707    bool is_array;
2708    if (bindless) {
2709       dim = nir_intrinsic_image_dim(instr);
2710       is_array = nir_intrinsic_image_array(instr);
2711    } else {
2712       const struct glsl_type *type = get_image_deref(instr)->type;
2713       dim = glsl_get_sampler_dim(type);
2714       is_array = glsl_sampler_type_is_array(type);
2715    }
2716 
2717    struct waterfall_context wctx;
2718    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2719 
2720    if (dim == GLSL_SAMPLER_DIM_BUF) {
2721       res = get_buffer_size(
2722          ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
2723    } else {
2724 
2725       struct ac_image_args args = {0};
2726 
2727       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2728       args.dmask = 0xf;
2729       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2730       args.opcode = ac_image_get_resinfo;
2731       assert(nir_src_as_uint(instr->src[1]) == 0);
2732       args.lod = ctx->ac.i32_0;
2733       args.attributes = AC_FUNC_ATTR_READNONE;
2734 
2735       res = ac_build_image_opcode(&ctx->ac, &args);
2736 
2737       LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
2738 
2739       if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
2740          LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
2741          LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2742          z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
2743          res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
2744       }
2745 
2746       if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
2747          LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2748          res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, "");
2749       }
2750    }
2751    return exit_waterfall(ctx, &wctx, res);
2752 }
2753 
emit_membar(struct ac_llvm_context * ac,const nir_intrinsic_instr * instr)2754 static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr)
2755 {
2756    unsigned wait_flags = 0;
2757 
2758    switch (instr->intrinsic) {
2759    case nir_intrinsic_memory_barrier:
2760    case nir_intrinsic_group_memory_barrier:
2761       wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2762       break;
2763    case nir_intrinsic_memory_barrier_buffer:
2764    case nir_intrinsic_memory_barrier_image:
2765       wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2766       break;
2767    case nir_intrinsic_memory_barrier_shared:
2768       wait_flags = AC_WAIT_LGKM;
2769       break;
2770    default:
2771       break;
2772    }
2773 
2774    ac_build_waitcnt(ac, wait_flags);
2775 }
2776 
ac_emit_barrier(struct ac_llvm_context * ac,gl_shader_stage stage)2777 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
2778 {
2779    /* GFX6 only (thanks to a hw bug workaround):
2780     * The real barrier instruction isn’t needed, because an entire patch
2781     * always fits into a single wave.
2782     */
2783    if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
2784       ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
2785       return;
2786    }
2787    ac_build_s_barrier(ac);
2788 }
2789 
emit_discard(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)2790 static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2791 {
2792    LLVMValueRef cond;
2793 
2794    if (instr->intrinsic == nir_intrinsic_discard_if ||
2795        instr->intrinsic == nir_intrinsic_terminate_if) {
2796       cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");
2797    } else {
2798       assert(instr->intrinsic == nir_intrinsic_discard);
2799       cond = ctx->ac.i1false;
2800    }
2801 
2802    ac_build_kill_if_false(&ctx->ac, cond);
2803 }
2804 
emit_demote(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)2805 static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2806 {
2807    LLVMValueRef cond;
2808 
2809    if (instr->intrinsic == nir_intrinsic_demote_if) {
2810       cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");
2811    } else {
2812       assert(instr->intrinsic == nir_intrinsic_demote);
2813       cond = ctx->ac.i1false;
2814    }
2815 
2816    /* Kill immediately while maintaining WQM. */
2817    ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
2818 
2819    LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2820    mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
2821    LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
2822    return;
2823 }
2824 
visit_load_local_invocation_index(struct ac_nir_context * ctx)2825 static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)
2826 {
2827    LLVMValueRef result;
2828    LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
2829    result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
2830                          LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2831 
2832    if (ctx->ac.wave_size == 32)
2833       result = LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 1, false), "");
2834 
2835    return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
2836 }
2837 
visit_load_subgroup_id(struct ac_nir_context * ctx)2838 static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx)
2839 {
2840    if (ctx->stage == MESA_SHADER_COMPUTE) {
2841       LLVMValueRef result;
2842       result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
2843                             LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2844       return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
2845    } else {
2846       return LLVMConstInt(ctx->ac.i32, 0, false);
2847    }
2848 }
2849 
visit_load_num_subgroups(struct ac_nir_context * ctx)2850 static LLVMValueRef visit_load_num_subgroups(struct ac_nir_context *ctx)
2851 {
2852    if (ctx->stage == MESA_SHADER_COMPUTE) {
2853       return LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
2854                           LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
2855    } else {
2856       return LLVMConstInt(ctx->ac.i32, 1, false);
2857    }
2858 }
2859 
visit_first_invocation(struct ac_nir_context * ctx)2860 static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx)
2861 {
2862    LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
2863    const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
2864 
2865    /* The second argument is whether cttz(0) should be defined, but we do not care. */
2866    LLVMValueRef args[] = {active_set, ctx->ac.i1false};
2867    LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, ctx->ac.iN_wavemask, args, 2,
2868                                             AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE);
2869 
2870    return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
2871 }
2872 
visit_load_shared(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)2873 static LLVMValueRef visit_load_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2874 {
2875    LLVMValueRef values[4], derived_ptr, index, ret;
2876 
2877    LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size);
2878 
2879    for (int chan = 0; chan < instr->num_components; chan++) {
2880       index = LLVMConstInt(ctx->ac.i32, chan, 0);
2881       derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
2882       values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
2883    }
2884 
2885    ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
2886    return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2887 }
2888 
visit_store_shared(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)2889 static void visit_store_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2890 {
2891    LLVMValueRef derived_ptr, data, index;
2892    LLVMBuilderRef builder = ctx->ac.builder;
2893 
2894    LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size);
2895    LLVMValueRef src = get_src(ctx, instr->src[0]);
2896 
2897    int writemask = nir_intrinsic_write_mask(instr);
2898    for (int chan = 0; chan < 4; chan++) {
2899       if (!(writemask & (1 << chan))) {
2900          continue;
2901       }
2902       data = ac_llvm_extract_elem(&ctx->ac, src, chan);
2903       index = LLVMConstInt(ctx->ac.i32, chan, 0);
2904       derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
2905       LLVMBuildStore(builder, data, derived_ptr);
2906    }
2907 }
2908 
visit_var_atomic(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,LLVMValueRef ptr,int src_idx)2909 static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2910                                      LLVMValueRef ptr, int src_idx)
2911 {
2912    if (ctx->ac.postponed_kill) {
2913       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2914       ac_build_ifcc(&ctx->ac, cond, 7005);
2915    }
2916 
2917    LLVMValueRef result;
2918    LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
2919 
2920    const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
2921 
2922    if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) {
2923       LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
2924       result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
2925       result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
2926    } else {
2927       LLVMAtomicRMWBinOp op;
2928       switch (instr->intrinsic) {
2929       case nir_intrinsic_shared_atomic_add:
2930          op = LLVMAtomicRMWBinOpAdd;
2931          break;
2932       case nir_intrinsic_shared_atomic_umin:
2933          op = LLVMAtomicRMWBinOpUMin;
2934          break;
2935       case nir_intrinsic_shared_atomic_umax:
2936          op = LLVMAtomicRMWBinOpUMax;
2937          break;
2938       case nir_intrinsic_shared_atomic_imin:
2939          op = LLVMAtomicRMWBinOpMin;
2940          break;
2941       case nir_intrinsic_shared_atomic_imax:
2942          op = LLVMAtomicRMWBinOpMax;
2943          break;
2944       case nir_intrinsic_shared_atomic_and:
2945          op = LLVMAtomicRMWBinOpAnd;
2946          break;
2947       case nir_intrinsic_shared_atomic_or:
2948          op = LLVMAtomicRMWBinOpOr;
2949          break;
2950       case nir_intrinsic_shared_atomic_xor:
2951          op = LLVMAtomicRMWBinOpXor;
2952          break;
2953       case nir_intrinsic_shared_atomic_exchange:
2954          op = LLVMAtomicRMWBinOpXchg;
2955          break;
2956 #if LLVM_VERSION_MAJOR >= 10
2957       case nir_intrinsic_shared_atomic_fadd:
2958          op = LLVMAtomicRMWBinOpFAdd;
2959          break;
2960 #endif
2961       default:
2962          return NULL;
2963       }
2964 
2965       LLVMValueRef val;
2966 
2967       if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd) {
2968          val = ac_to_float(&ctx->ac, src);
2969 
2970          LLVMTypeRef ptr_type =
2971             LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
2972          ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");
2973       } else {
2974          val = ac_to_integer(&ctx->ac, src);
2975       }
2976 
2977       result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
2978 
2979       if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
2980           instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
2981          result = ac_to_integer(&ctx->ac, result);
2982       }
2983    }
2984 
2985    if (ctx->ac.postponed_kill)
2986       ac_build_endif(&ctx->ac, 7005);
2987    return result;
2988 }
2989 
load_sample_pos(struct ac_nir_context * ctx)2990 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
2991 {
2992    LLVMValueRef values[2];
2993    LLVMValueRef pos[2];
2994 
2995    pos[0] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));
2996    pos[1] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));
2997 
2998    values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
2999    values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
3000    return ac_build_gather_values(&ctx->ac, values, 2);
3001 }
3002 
lookup_interp_param(struct ac_nir_context * ctx,enum glsl_interp_mode interp,unsigned location)3003 static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx, enum glsl_interp_mode interp,
3004                                         unsigned location)
3005 {
3006    switch (interp) {
3007    case INTERP_MODE_FLAT:
3008    default:
3009       return NULL;
3010    case INTERP_MODE_SMOOTH:
3011    case INTERP_MODE_NONE:
3012       if (location == INTERP_CENTER)
3013          return ac_get_arg(&ctx->ac, ctx->args->persp_center);
3014       else if (location == INTERP_CENTROID)
3015          return ctx->abi->persp_centroid;
3016       else if (location == INTERP_SAMPLE)
3017          return ac_get_arg(&ctx->ac, ctx->args->persp_sample);
3018       break;
3019    case INTERP_MODE_NOPERSPECTIVE:
3020       if (location == INTERP_CENTER)
3021          return ac_get_arg(&ctx->ac, ctx->args->linear_center);
3022       else if (location == INTERP_CENTROID)
3023          return ctx->abi->linear_centroid;
3024       else if (location == INTERP_SAMPLE)
3025          return ac_get_arg(&ctx->ac, ctx->args->linear_sample);
3026       break;
3027    }
3028    return NULL;
3029 }
3030 
barycentric_center(struct ac_nir_context * ctx,unsigned mode)3031 static LLVMValueRef barycentric_center(struct ac_nir_context *ctx, unsigned mode)
3032 {
3033    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3034    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3035 }
3036 
barycentric_offset(struct ac_nir_context * ctx,unsigned mode,LLVMValueRef offset)3037 static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx, unsigned mode,
3038                                        LLVMValueRef offset)
3039 {
3040    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3041    LLVMValueRef src_c0 =
3042       ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
3043    LLVMValueRef src_c1 =
3044       ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
3045 
3046    LLVMValueRef ij_out[2];
3047    LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
3048 
3049    /*
3050     * take the I then J parameters, and the DDX/Y for it, and
3051     * calculate the IJ inputs for the interpolator.
3052     * temp1 = ddx * offset/sample.x + I;
3053     * interp_param.I = ddy * offset/sample.y + temp1;
3054     * temp1 = ddx * offset/sample.x + J;
3055     * interp_param.J = ddy * offset/sample.y + temp1;
3056     */
3057    for (unsigned i = 0; i < 2; i++) {
3058       LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
3059       LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
3060       LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, ix_ll, "");
3061       LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, iy_ll, "");
3062       LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ix_ll, "");
3063       LLVMValueRef temp1, temp2;
3064 
3065       interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, "");
3066 
3067       temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
3068       temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
3069 
3070       ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, "");
3071    }
3072    interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3073    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3074 }
3075 
barycentric_centroid(struct ac_nir_context * ctx,unsigned mode)3076 static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx, unsigned mode)
3077 {
3078    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
3079    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3080 }
3081 
barycentric_at_sample(struct ac_nir_context * ctx,unsigned mode,LLVMValueRef sample_id)3082 static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx, unsigned mode,
3083                                           LLVMValueRef sample_id)
3084 {
3085    if (ctx->abi->interp_at_sample_force_center)
3086       return barycentric_center(ctx, mode);
3087 
3088    LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
3089 
3090    /* fetch sample ID */
3091    LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
3092 
3093    LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
3094    src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
3095    LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
3096    src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
3097    LLVMValueRef coords[] = {src_c0, src_c1};
3098    LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
3099 
3100    return barycentric_offset(ctx, mode, offset);
3101 }
3102 
barycentric_sample(struct ac_nir_context * ctx,unsigned mode)3103 static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx, unsigned mode)
3104 {
3105    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
3106    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3107 }
3108 
barycentric_model(struct ac_nir_context * ctx)3109 static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)
3110 {
3111    return LLVMBuildBitCast(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->pull_model),
3112                            ctx->ac.v3i32, "");
3113 }
3114 
load_interpolated_input(struct ac_nir_context * ctx,LLVMValueRef interp_param,unsigned index,unsigned comp_start,unsigned num_components,unsigned bitsize)3115 static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValueRef interp_param,
3116                                             unsigned index, unsigned comp_start,
3117                                             unsigned num_components, unsigned bitsize)
3118 {
3119    LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
3120    LLVMValueRef interp_param_f;
3121 
3122    interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");
3123    LLVMValueRef i = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
3124    LLVMValueRef j = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
3125 
3126    /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
3127    if (ctx->verified_interp && !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
3128       LLVMValueRef args[2];
3129       args[0] = i;
3130       args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
3131       LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, args, 2,
3132                                              AC_FUNC_ATTR_READNONE);
3133       ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
3134       _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
3135    }
3136 
3137    LLVMValueRef values[4];
3138    assert(bitsize == 16 || bitsize == 32);
3139    for (unsigned comp = 0; comp < num_components; comp++) {
3140       LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
3141       if (bitsize == 16) {
3142          values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
3143                                                ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
3144       } else {
3145          values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
3146                                            ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
3147       }
3148    }
3149 
3150    return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
3151 }
3152 
visit_load(struct ac_nir_context * ctx,nir_intrinsic_instr * instr,bool is_output)3153 static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,
3154                                bool is_output)
3155 {
3156    LLVMValueRef values[8];
3157    LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
3158    LLVMTypeRef component_type;
3159    unsigned base = nir_intrinsic_base(instr);
3160    unsigned component = nir_intrinsic_component(instr);
3161    unsigned count = instr->dest.ssa.num_components;
3162    nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
3163    LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
3164    nir_src offset = *nir_get_io_offset_src(instr);
3165    LLVMValueRef indir_index = NULL;
3166 
3167    switch (instr->dest.ssa.bit_size) {
3168    case 16:
3169    case 32:
3170       break;
3171    case 64:
3172       unreachable("64-bit IO should have been lowered");
3173       return NULL;
3174    default:
3175       unreachable("unhandled load type");
3176       return NULL;
3177    }
3178 
3179    if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
3180       component_type = LLVMGetElementType(dest_type);
3181    else
3182       component_type = dest_type;
3183 
3184    if (nir_src_is_const(offset))
3185       assert(nir_src_as_uint(offset) == 0);
3186    else
3187       indir_index = get_src(ctx, offset);
3188 
3189    if (ctx->stage == MESA_SHADER_TESS_CTRL ||
3190        (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
3191       LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
3192                                                          vertex_index, indir_index,
3193                                                          base, component,
3194                                                          count, !is_output);
3195       if (instr->dest.ssa.bit_size == 16) {
3196          result = ac_to_integer(&ctx->ac, result);
3197          result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
3198       }
3199       return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3200    }
3201 
3202    /* No indirect indexing is allowed after this point. */
3203    assert(!indir_index);
3204 
3205    if (ctx->stage == MESA_SHADER_GEOMETRY) {
3206       assert(nir_src_is_const(*vertex_index_src));
3207 
3208       return ctx->abi->load_inputs(ctx->abi, base, component, count,
3209                                    nir_src_as_uint(*vertex_index_src), component_type);
3210    }
3211 
3212    if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
3213        nir_intrinsic_io_semantics(instr).fb_fetch_output)
3214       return ctx->abi->emit_fbfetch(ctx->abi);
3215 
3216    /* Other non-fragment cases have inputs and outputs in temporaries. */
3217    if (ctx->stage != MESA_SHADER_FRAGMENT) {
3218       for (unsigned chan = component; chan < count + component; chan++) {
3219          if (is_output) {
3220             values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->abi->outputs[base * 4 + chan], "");
3221          } else {
3222             values[chan] = ctx->abi->inputs[base * 4 + chan];
3223             if (!values[chan])
3224                values[chan] = LLVMGetUndef(ctx->ac.i32);
3225          }
3226       }
3227       LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
3228       return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3229    }
3230 
3231    /* Fragment shader inputs. */
3232    unsigned vertex_id = 2; /* P0 */
3233 
3234    if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
3235       nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
3236 
3237       switch (src0[0].i32) {
3238       case 0:
3239          vertex_id = 2;
3240          break;
3241       case 1:
3242          vertex_id = 0;
3243          break;
3244       case 2:
3245          vertex_id = 1;
3246          break;
3247       default:
3248          unreachable("Invalid vertex index");
3249       }
3250    }
3251 
3252    LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
3253 
3254    for (unsigned chan = 0; chan < count; chan++) {
3255       LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
3256       values[chan] =
3257          ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan,
3258                                 attr_number, ac_get_arg(&ctx->ac, ctx->args->prim_mask));
3259       values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
3260       values[chan] =
3261          LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
3262                                  instr->dest.ssa.bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
3263    }
3264 
3265    LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
3266    return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3267 }
3268 
visit_intrinsic(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)3269 static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
3270 {
3271    LLVMValueRef result = NULL;
3272 
3273    switch (instr->intrinsic) {
3274    case nir_intrinsic_ballot:
3275       result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
3276       if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
3277          result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
3278       break;
3279    case nir_intrinsic_read_invocation:
3280       result =
3281          ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
3282       break;
3283    case nir_intrinsic_read_first_invocation:
3284       result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
3285       break;
3286    case nir_intrinsic_load_subgroup_invocation:
3287       result = ac_get_thread_id(&ctx->ac);
3288       break;
3289    case nir_intrinsic_load_work_group_id: {
3290       LLVMValueRef values[3];
3291 
3292       for (int i = 0; i < 3; i++) {
3293          values[i] = ctx->args->workgroup_ids[i].used
3294                         ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
3295                         : ctx->ac.i32_0;
3296       }
3297 
3298       result = ac_build_gather_values(&ctx->ac, values, 3);
3299       break;
3300    }
3301    case nir_intrinsic_load_base_vertex:
3302    case nir_intrinsic_load_first_vertex:
3303       result = ctx->abi->load_base_vertex(ctx->abi);
3304       break;
3305    case nir_intrinsic_load_local_group_size:
3306       result = ctx->abi->load_local_group_size(ctx->abi);
3307       break;
3308    case nir_intrinsic_load_vertex_id:
3309       result = LLVMBuildAdd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->vertex_id),
3310                             ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
3311       break;
3312    case nir_intrinsic_load_vertex_id_zero_base: {
3313       result = ctx->abi->vertex_id;
3314       break;
3315    }
3316    case nir_intrinsic_load_local_invocation_id: {
3317       result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
3318       break;
3319    }
3320    case nir_intrinsic_load_base_instance:
3321       result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
3322       break;
3323    case nir_intrinsic_load_draw_id:
3324       result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
3325       break;
3326    case nir_intrinsic_load_view_index:
3327       result = ac_get_arg(&ctx->ac, ctx->args->view_index);
3328       break;
3329    case nir_intrinsic_load_invocation_id:
3330       if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3331          result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
3332       } else {
3333          if (ctx->ac.chip_class >= GFX10) {
3334             result =
3335                LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
3336                             LLVMConstInt(ctx->ac.i32, 127, 0), "");
3337          } else {
3338             result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
3339          }
3340       }
3341       break;
3342    case nir_intrinsic_load_primitive_id:
3343       if (ctx->stage == MESA_SHADER_GEOMETRY) {
3344          result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
3345       } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3346          result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
3347       } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
3348          result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
3349       } else
3350          fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
3351       break;
3352    case nir_intrinsic_load_sample_id:
3353       result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
3354       break;
3355    case nir_intrinsic_load_sample_pos:
3356       result = load_sample_pos(ctx);
3357       break;
3358    case nir_intrinsic_load_sample_mask_in:
3359       result = ctx->abi->load_sample_mask_in(ctx->abi);
3360       break;
3361    case nir_intrinsic_load_frag_coord: {
3362       LLVMValueRef values[4] = {
3363          ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
3364          ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
3365          ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
3366       result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
3367       break;
3368    }
3369    case nir_intrinsic_load_layer_id:
3370       result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
3371       break;
3372    case nir_intrinsic_load_front_face:
3373       result = emit_i2b(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->front_face));
3374       break;
3375    case nir_intrinsic_load_helper_invocation:
3376       result = ac_build_load_helper_invocation(&ctx->ac);
3377       break;
3378    case nir_intrinsic_is_helper_invocation:
3379       result = ac_build_is_helper_invocation(&ctx->ac);
3380       break;
3381    case nir_intrinsic_load_color0:
3382       result = ctx->abi->color0;
3383       break;
3384    case nir_intrinsic_load_color1:
3385       result = ctx->abi->color1;
3386       break;
3387    case nir_intrinsic_load_user_data_amd:
3388       assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
3389       result = ctx->abi->user_data;
3390       break;
3391    case nir_intrinsic_load_instance_id:
3392       result = ctx->abi->instance_id;
3393       break;
3394    case nir_intrinsic_load_num_work_groups:
3395       result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
3396       break;
3397    case nir_intrinsic_load_local_invocation_index:
3398       result = visit_load_local_invocation_index(ctx);
3399       break;
3400    case nir_intrinsic_load_subgroup_id:
3401       result = visit_load_subgroup_id(ctx);
3402       break;
3403    case nir_intrinsic_load_num_subgroups:
3404       result = visit_load_num_subgroups(ctx);
3405       break;
3406    case nir_intrinsic_first_invocation:
3407       result = visit_first_invocation(ctx);
3408       break;
3409    case nir_intrinsic_load_push_constant:
3410       result = visit_load_push_constant(ctx, instr);
3411       break;
3412    case nir_intrinsic_vulkan_resource_index: {
3413       LLVMValueRef index = get_src(ctx, instr->src[0]);
3414       unsigned desc_set = nir_intrinsic_desc_set(instr);
3415       unsigned binding = nir_intrinsic_binding(instr);
3416 
3417       result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
3418       break;
3419    }
3420    case nir_intrinsic_vulkan_resource_reindex:
3421       result = visit_vulkan_resource_reindex(ctx, instr);
3422       break;
3423    case nir_intrinsic_store_ssbo:
3424       visit_store_ssbo(ctx, instr);
3425       break;
3426    case nir_intrinsic_load_ssbo:
3427       result = visit_load_buffer(ctx, instr);
3428       break;
3429    case nir_intrinsic_load_global:
3430       result = visit_load_global(ctx, instr);
3431       break;
3432    case nir_intrinsic_store_global:
3433       visit_store_global(ctx, instr);
3434       break;
3435    case nir_intrinsic_global_atomic_add:
3436    case nir_intrinsic_global_atomic_imin:
3437    case nir_intrinsic_global_atomic_umin:
3438    case nir_intrinsic_global_atomic_imax:
3439    case nir_intrinsic_global_atomic_umax:
3440    case nir_intrinsic_global_atomic_and:
3441    case nir_intrinsic_global_atomic_or:
3442    case nir_intrinsic_global_atomic_xor:
3443    case nir_intrinsic_global_atomic_exchange:
3444    case nir_intrinsic_global_atomic_comp_swap:
3445       result = visit_global_atomic(ctx, instr);
3446       break;
3447    case nir_intrinsic_ssbo_atomic_add:
3448    case nir_intrinsic_ssbo_atomic_imin:
3449    case nir_intrinsic_ssbo_atomic_umin:
3450    case nir_intrinsic_ssbo_atomic_imax:
3451    case nir_intrinsic_ssbo_atomic_umax:
3452    case nir_intrinsic_ssbo_atomic_and:
3453    case nir_intrinsic_ssbo_atomic_or:
3454    case nir_intrinsic_ssbo_atomic_xor:
3455    case nir_intrinsic_ssbo_atomic_exchange:
3456    case nir_intrinsic_ssbo_atomic_comp_swap:
3457       result = visit_atomic_ssbo(ctx, instr);
3458       break;
3459    case nir_intrinsic_load_ubo:
3460       result = visit_load_ubo_buffer(ctx, instr);
3461       break;
3462    case nir_intrinsic_get_ssbo_size:
3463       result = visit_get_ssbo_size(ctx, instr);
3464       break;
3465    case nir_intrinsic_load_input:
3466    case nir_intrinsic_load_input_vertex:
3467    case nir_intrinsic_load_per_vertex_input:
3468       result = visit_load(ctx, instr, false);
3469       break;
3470    case nir_intrinsic_load_output:
3471    case nir_intrinsic_load_per_vertex_output:
3472       result = visit_load(ctx, instr, true);
3473       break;
3474    case nir_intrinsic_store_output:
3475    case nir_intrinsic_store_per_vertex_output:
3476       visit_store_output(ctx, instr);
3477       break;
3478    case nir_intrinsic_load_shared:
3479       result = visit_load_shared(ctx, instr);
3480       break;
3481    case nir_intrinsic_store_shared:
3482       visit_store_shared(ctx, instr);
3483       break;
3484    case nir_intrinsic_bindless_image_samples:
3485    case nir_intrinsic_image_deref_samples:
3486       result = visit_image_samples(ctx, instr);
3487       break;
3488    case nir_intrinsic_bindless_image_load:
3489       result = visit_image_load(ctx, instr, true);
3490       break;
3491    case nir_intrinsic_image_deref_load:
3492       result = visit_image_load(ctx, instr, false);
3493       break;
3494    case nir_intrinsic_bindless_image_store:
3495       visit_image_store(ctx, instr, true);
3496       break;
3497    case nir_intrinsic_image_deref_store:
3498       visit_image_store(ctx, instr, false);
3499       break;
3500    case nir_intrinsic_bindless_image_atomic_add:
3501    case nir_intrinsic_bindless_image_atomic_imin:
3502    case nir_intrinsic_bindless_image_atomic_umin:
3503    case nir_intrinsic_bindless_image_atomic_imax:
3504    case nir_intrinsic_bindless_image_atomic_umax:
3505    case nir_intrinsic_bindless_image_atomic_and:
3506    case nir_intrinsic_bindless_image_atomic_or:
3507    case nir_intrinsic_bindless_image_atomic_xor:
3508    case nir_intrinsic_bindless_image_atomic_exchange:
3509    case nir_intrinsic_bindless_image_atomic_comp_swap:
3510    case nir_intrinsic_bindless_image_atomic_inc_wrap:
3511    case nir_intrinsic_bindless_image_atomic_dec_wrap:
3512       result = visit_image_atomic(ctx, instr, true);
3513       break;
3514    case nir_intrinsic_image_deref_atomic_add:
3515    case nir_intrinsic_image_deref_atomic_imin:
3516    case nir_intrinsic_image_deref_atomic_umin:
3517    case nir_intrinsic_image_deref_atomic_imax:
3518    case nir_intrinsic_image_deref_atomic_umax:
3519    case nir_intrinsic_image_deref_atomic_and:
3520    case nir_intrinsic_image_deref_atomic_or:
3521    case nir_intrinsic_image_deref_atomic_xor:
3522    case nir_intrinsic_image_deref_atomic_exchange:
3523    case nir_intrinsic_image_deref_atomic_comp_swap:
3524    case nir_intrinsic_image_deref_atomic_inc_wrap:
3525    case nir_intrinsic_image_deref_atomic_dec_wrap:
3526       result = visit_image_atomic(ctx, instr, false);
3527       break;
3528    case nir_intrinsic_bindless_image_size:
3529       result = visit_image_size(ctx, instr, true);
3530       break;
3531    case nir_intrinsic_image_deref_size:
3532       result = visit_image_size(ctx, instr, false);
3533       break;
3534    case nir_intrinsic_shader_clock:
3535       result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
3536       break;
3537    case nir_intrinsic_discard:
3538    case nir_intrinsic_discard_if:
3539    case nir_intrinsic_terminate:
3540    case nir_intrinsic_terminate_if:
3541       emit_discard(ctx, instr);
3542       break;
3543    case nir_intrinsic_demote:
3544    case nir_intrinsic_demote_if:
3545       emit_demote(ctx, instr);
3546       break;
3547    case nir_intrinsic_memory_barrier:
3548    case nir_intrinsic_group_memory_barrier:
3549    case nir_intrinsic_memory_barrier_buffer:
3550    case nir_intrinsic_memory_barrier_image:
3551    case nir_intrinsic_memory_barrier_shared:
3552       emit_membar(&ctx->ac, instr);
3553       break;
3554    case nir_intrinsic_scoped_barrier: {
3555       assert(!(nir_intrinsic_memory_semantics(instr) &
3556                (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
3557 
3558       nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
3559 
3560       unsigned wait_flags = 0;
3561       if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
3562          wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
3563       if (modes & nir_var_mem_shared)
3564          wait_flags |= AC_WAIT_LGKM;
3565 
3566       if (wait_flags)
3567          ac_build_waitcnt(&ctx->ac, wait_flags);
3568 
3569       if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
3570          ac_emit_barrier(&ctx->ac, ctx->stage);
3571       break;
3572    }
3573    case nir_intrinsic_memory_barrier_tcs_patch:
3574       break;
3575    case nir_intrinsic_control_barrier:
3576       ac_emit_barrier(&ctx->ac, ctx->stage);
3577       break;
3578    case nir_intrinsic_shared_atomic_add:
3579    case nir_intrinsic_shared_atomic_imin:
3580    case nir_intrinsic_shared_atomic_umin:
3581    case nir_intrinsic_shared_atomic_imax:
3582    case nir_intrinsic_shared_atomic_umax:
3583    case nir_intrinsic_shared_atomic_and:
3584    case nir_intrinsic_shared_atomic_or:
3585    case nir_intrinsic_shared_atomic_xor:
3586    case nir_intrinsic_shared_atomic_exchange:
3587    case nir_intrinsic_shared_atomic_comp_swap:
3588    case nir_intrinsic_shared_atomic_fadd: {
3589       LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size);
3590       result = visit_var_atomic(ctx, instr, ptr, 1);
3591       break;
3592    }
3593    case nir_intrinsic_deref_atomic_add:
3594    case nir_intrinsic_deref_atomic_imin:
3595    case nir_intrinsic_deref_atomic_umin:
3596    case nir_intrinsic_deref_atomic_imax:
3597    case nir_intrinsic_deref_atomic_umax:
3598    case nir_intrinsic_deref_atomic_and:
3599    case nir_intrinsic_deref_atomic_or:
3600    case nir_intrinsic_deref_atomic_xor:
3601    case nir_intrinsic_deref_atomic_exchange:
3602    case nir_intrinsic_deref_atomic_comp_swap:
3603    case nir_intrinsic_deref_atomic_fadd: {
3604       LLVMValueRef ptr = get_src(ctx, instr->src[0]);
3605       result = visit_var_atomic(ctx, instr, ptr, 1);
3606       break;
3607    }
3608    case nir_intrinsic_load_barycentric_pixel:
3609       result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
3610       break;
3611    case nir_intrinsic_load_barycentric_centroid:
3612       result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
3613       break;
3614    case nir_intrinsic_load_barycentric_sample:
3615       result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
3616       break;
3617    case nir_intrinsic_load_barycentric_model:
3618       result = barycentric_model(ctx);
3619       break;
3620    case nir_intrinsic_load_barycentric_at_offset: {
3621       LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
3622       result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
3623       break;
3624    }
3625    case nir_intrinsic_load_barycentric_at_sample: {
3626       LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
3627       result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
3628       break;
3629    }
3630    case nir_intrinsic_load_interpolated_input: {
3631       /* We assume any indirect loads have been lowered away */
3632       ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
3633       assert(offset);
3634       assert(offset[0].i32 == 0);
3635 
3636       LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
3637       unsigned index = nir_intrinsic_base(instr);
3638       unsigned component = nir_intrinsic_component(instr);
3639       result = load_interpolated_input(ctx, interp_param, index, component,
3640                                        instr->dest.ssa.num_components, instr->dest.ssa.bit_size);
3641       break;
3642    }
3643    case nir_intrinsic_emit_vertex:
3644       ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
3645       break;
3646    case nir_intrinsic_emit_vertex_with_counter: {
3647       unsigned stream = nir_intrinsic_stream_id(instr);
3648       LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
3649       ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
3650       break;
3651    }
3652    case nir_intrinsic_end_primitive:
3653    case nir_intrinsic_end_primitive_with_counter:
3654       ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
3655       break;
3656    case nir_intrinsic_load_tess_coord:
3657       result = ctx->abi->load_tess_coord(ctx->abi);
3658       break;
3659    case nir_intrinsic_load_tess_level_outer:
3660       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
3661       break;
3662    case nir_intrinsic_load_tess_level_inner:
3663       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
3664       break;
3665    case nir_intrinsic_load_tess_level_outer_default:
3666       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
3667       break;
3668    case nir_intrinsic_load_tess_level_inner_default:
3669       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
3670       break;
3671    case nir_intrinsic_load_patch_vertices_in:
3672       result = ctx->abi->load_patch_vertices_in(ctx->abi);
3673       break;
3674    case nir_intrinsic_vote_all: {
3675       result = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
3676       break;
3677    }
3678    case nir_intrinsic_vote_any: {
3679       result = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
3680       break;
3681    }
3682    case nir_intrinsic_shuffle:
3683       if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
3684           (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
3685          result =
3686             ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
3687       } else {
3688          LLVMValueRef src = get_src(ctx, instr->src[0]);
3689          LLVMValueRef index = get_src(ctx, instr->src[1]);
3690          LLVMTypeRef type = LLVMTypeOf(src);
3691          struct waterfall_context wctx;
3692          LLVMValueRef index_val;
3693 
3694          index_val = enter_waterfall(ctx, &wctx, index, true);
3695 
3696          src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
3697 
3698          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
3699                                      (LLVMValueRef[]){src, index_val}, 2,
3700                                      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
3701 
3702          result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
3703 
3704          result = exit_waterfall(ctx, &wctx, result);
3705       }
3706       break;
3707    case nir_intrinsic_reduce:
3708       result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
3709                                instr->const_index[1]);
3710       break;
3711    case nir_intrinsic_inclusive_scan:
3712       result =
3713          ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
3714       break;
3715    case nir_intrinsic_exclusive_scan:
3716       result =
3717          ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
3718       break;
3719    case nir_intrinsic_quad_broadcast: {
3720       unsigned lane = nir_src_as_uint(instr->src[1]);
3721       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
3722       break;
3723    }
3724    case nir_intrinsic_quad_swap_horizontal:
3725       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
3726       break;
3727    case nir_intrinsic_quad_swap_vertical:
3728       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
3729       break;
3730    case nir_intrinsic_quad_swap_diagonal:
3731       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
3732       break;
3733    case nir_intrinsic_quad_swizzle_amd: {
3734       uint32_t mask = nir_intrinsic_swizzle_mask(instr);
3735       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
3736                                      (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
3737       break;
3738    }
3739    case nir_intrinsic_masked_swizzle_amd: {
3740       uint32_t mask = nir_intrinsic_swizzle_mask(instr);
3741       result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
3742       break;
3743    }
3744    case nir_intrinsic_write_invocation_amd:
3745       result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
3746                                   get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
3747       break;
3748    case nir_intrinsic_mbcnt_amd:
3749       result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
3750       break;
3751    case nir_intrinsic_load_scratch: {
3752       LLVMValueRef offset = get_src(ctx, instr->src[0]);
3753       LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
3754       LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
3755       LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
3756                                 ? comp_type
3757                                 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
3758       unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
3759       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
3760       result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
3761       break;
3762    }
3763    case nir_intrinsic_store_scratch: {
3764       LLVMValueRef offset = get_src(ctx, instr->src[1]);
3765       LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
3766       LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
3767       unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
3768       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
3769       LLVMValueRef src = get_src(ctx, instr->src[0]);
3770       unsigned wrmask = nir_intrinsic_write_mask(instr);
3771       while (wrmask) {
3772          int start, count;
3773          u_bit_scan_consecutive_range(&wrmask, &start, &count);
3774 
3775          LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
3776          LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
3777          LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
3778          offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr,
3779                                        LLVMPointerType(vec_type, addr_space), "");
3780          LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count);
3781          LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
3782       }
3783       break;
3784    }
3785    case nir_intrinsic_load_constant: {
3786       unsigned base = nir_intrinsic_base(instr);
3787       unsigned range = nir_intrinsic_range(instr);
3788 
3789       LLVMValueRef offset = get_src(ctx, instr->src[0]);
3790       offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");
3791 
3792       /* Clamp the offset to avoid out-of-bound access because global
3793        * instructions can't handle them.
3794        */
3795       LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
3796       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
3797       offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
3798 
3799       LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset);
3800       LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
3801       LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
3802                                 ? comp_type
3803                                 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
3804       unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
3805       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
3806       result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
3807       break;
3808    }
3809    case nir_intrinsic_set_vertex_and_primitive_count:
3810       /* Currently ignored. */
3811       break;
3812    default:
3813       fprintf(stderr, "Unknown intrinsic: ");
3814       nir_print_instr(&instr->instr, stderr);
3815       fprintf(stderr, "\n");
3816       abort();
3817       break;
3818    }
3819    if (result) {
3820       ctx->ssa_defs[instr->dest.ssa.index] = result;
3821    }
3822 }
3823 
get_bindless_index_from_uniform(struct ac_nir_context * ctx,unsigned base_index,unsigned constant_index,LLVMValueRef dynamic_index)3824 static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, unsigned base_index,
3825                                                     unsigned constant_index,
3826                                                     LLVMValueRef dynamic_index)
3827 {
3828    LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
3829    LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
3830                                      LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
3831 
3832    /* Bindless uniforms are 64bit so multiple index by 8 */
3833    index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
3834    offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
3835 
3836    LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, 0, 0, false, ctx->ac.i32_0);
3837 
3838    LLVMValueRef ret =
3839       ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, NULL, 0, 0, true, true);
3840 
3841    return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
3842 }
3843 
3844 struct sampler_desc_address {
3845    unsigned descriptor_set;
3846    unsigned base_index; /* binding in vulkan */
3847    unsigned constant_index;
3848    LLVMValueRef dynamic_index;
3849    bool image;
3850    bool bindless;
3851 };
3852 
get_sampler_desc_internal(struct ac_nir_context * ctx,nir_deref_instr * deref_instr,const nir_instr * instr,bool image)3853 static struct sampler_desc_address get_sampler_desc_internal(struct ac_nir_context *ctx,
3854                                                              nir_deref_instr *deref_instr,
3855                                                              const nir_instr *instr, bool image)
3856 {
3857    LLVMValueRef index = NULL;
3858    unsigned constant_index = 0;
3859    unsigned descriptor_set;
3860    unsigned base_index;
3861    bool bindless = false;
3862 
3863    if (!deref_instr) {
3864       descriptor_set = 0;
3865       if (image) {
3866          nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
3867          base_index = 0;
3868          bindless = true;
3869          index = get_src(ctx, img_instr->src[0]);
3870       } else {
3871          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
3872          int sampSrcIdx = nir_tex_instr_src_index(tex_instr, nir_tex_src_sampler_handle);
3873          if (sampSrcIdx != -1) {
3874             base_index = 0;
3875             bindless = true;
3876             index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
3877          } else {
3878             assert(tex_instr && !image);
3879             base_index = tex_instr->sampler_index;
3880          }
3881       }
3882    } else {
3883       while (deref_instr->deref_type != nir_deref_type_var) {
3884          if (deref_instr->deref_type == nir_deref_type_array) {
3885             unsigned array_size = glsl_get_aoa_size(deref_instr->type);
3886             if (!array_size)
3887                array_size = 1;
3888 
3889             if (nir_src_is_const(deref_instr->arr.index)) {
3890                constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
3891             } else {
3892                LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
3893 
3894                indirect = LLVMBuildMul(ctx->ac.builder, indirect,
3895                                        LLVMConstInt(ctx->ac.i32, array_size, false), "");
3896 
3897                if (!index)
3898                   index = indirect;
3899                else
3900                   index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
3901             }
3902 
3903             deref_instr = nir_src_as_deref(deref_instr->parent);
3904          } else if (deref_instr->deref_type == nir_deref_type_struct) {
3905             unsigned sidx = deref_instr->strct.index;
3906             deref_instr = nir_src_as_deref(deref_instr->parent);
3907             constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
3908          } else {
3909             unreachable("Unsupported deref type");
3910          }
3911       }
3912       descriptor_set = deref_instr->var->data.descriptor_set;
3913 
3914       if (deref_instr->var->data.bindless) {
3915          /* For now just assert on unhandled variable types */
3916          assert(deref_instr->var->data.mode == nir_var_uniform);
3917 
3918          base_index = deref_instr->var->data.driver_location;
3919          bindless = true;
3920 
3921          index = index ? index : ctx->ac.i32_0;
3922          index = get_bindless_index_from_uniform(ctx, base_index, constant_index, index);
3923       } else
3924          base_index = deref_instr->var->data.binding;
3925    }
3926    return (struct sampler_desc_address){
3927       .descriptor_set = descriptor_set,
3928       .base_index = base_index,
3929       .constant_index = constant_index,
3930       .dynamic_index = index,
3931       .image = image,
3932       .bindless = bindless,
3933    };
3934 }
3935 
3936 /* Extract any possibly divergent index into a separate value that can be fed
3937  * into get_sampler_desc with the same arguments. */
get_sampler_desc_index(struct ac_nir_context * ctx,nir_deref_instr * deref_instr,const nir_instr * instr,bool image)3938 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
3939                                            const nir_instr *instr, bool image)
3940 {
3941    struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
3942    return addr.dynamic_index;
3943 }
3944 
get_sampler_desc(struct ac_nir_context * ctx,nir_deref_instr * deref_instr,enum ac_descriptor_type desc_type,const nir_instr * instr,LLVMValueRef index,bool image,bool write)3945 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
3946                                      enum ac_descriptor_type desc_type, const nir_instr *instr,
3947                                      LLVMValueRef index, bool image, bool write)
3948 {
3949    struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
3950    return ctx->abi->load_sampler_desc(ctx->abi, addr.descriptor_set, addr.base_index,
3951                                       addr.constant_index, index, desc_type, addr.image, write,
3952                                       addr.bindless);
3953 }
3954 
3955 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3956  *
3957  * GFX6-GFX7:
3958  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3959  *   filtering manually. The driver sets img7 to a mask clearing
3960  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3961  *     s_and_b32 samp0, samp0, img7
3962  *
3963  * GFX8:
3964  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
3965  */
sici_fix_sampler_aniso(struct ac_nir_context * ctx,LLVMValueRef res,LLVMValueRef samp)3966 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res,
3967                                            LLVMValueRef samp)
3968 {
3969    LLVMBuilderRef builder = ctx->ac.builder;
3970    LLVMValueRef img7, samp0;
3971 
3972    if (ctx->ac.chip_class >= GFX8)
3973       return samp;
3974 
3975    img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");
3976    samp0 = LLVMBuildExtractElement(builder, samp, LLVMConstInt(ctx->ac.i32, 0, 0), "");
3977    samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3978    return LLVMBuildInsertElement(builder, samp, samp0, LLVMConstInt(ctx->ac.i32, 0, 0), "");
3979 }
3980 
tex_fetch_ptrs(struct ac_nir_context * ctx,nir_tex_instr * instr,struct waterfall_context * wctx,LLVMValueRef * res_ptr,LLVMValueRef * samp_ptr,LLVMValueRef * fmask_ptr)3981 static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
3982                            struct waterfall_context *wctx, LLVMValueRef *res_ptr,
3983                            LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
3984 {
3985    nir_deref_instr *texture_deref_instr = NULL;
3986    nir_deref_instr *sampler_deref_instr = NULL;
3987    int plane = -1;
3988 
3989    for (unsigned i = 0; i < instr->num_srcs; i++) {
3990       switch (instr->src[i].src_type) {
3991       case nir_tex_src_texture_deref:
3992          texture_deref_instr = nir_src_as_deref(instr->src[i].src);
3993          break;
3994       case nir_tex_src_sampler_deref:
3995          sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
3996          break;
3997       case nir_tex_src_plane:
3998          plane = nir_src_as_int(instr->src[i].src);
3999          break;
4000       default:
4001          break;
4002       }
4003    }
4004 
4005    LLVMValueRef texture_dynamic_index =
4006       get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false);
4007    if (!sampler_deref_instr)
4008       sampler_deref_instr = texture_deref_instr;
4009 
4010    LLVMValueRef sampler_dynamic_index =
4011       get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false);
4012    if (instr->texture_non_uniform)
4013       texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
4014 
4015    if (instr->sampler_non_uniform)
4016       sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
4017 
4018    enum ac_descriptor_type main_descriptor =
4019       instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
4020 
4021    if (plane >= 0) {
4022       assert(instr->op != nir_texop_txf_ms && instr->op != nir_texop_samples_identical);
4023       assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
4024 
4025       main_descriptor = AC_DESC_PLANE_0 + plane;
4026    }
4027 
4028    if (instr->op == nir_texop_fragment_mask_fetch) {
4029       /* The fragment mask is fetched from the compressed
4030        * multisampled surface.
4031        */
4032       main_descriptor = AC_DESC_FMASK;
4033    }
4034 
4035    *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
4036                                texture_dynamic_index, false, false);
4037 
4038    if (samp_ptr) {
4039       *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
4040                                    sampler_dynamic_index, false, false);
4041       if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4042          *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4043    }
4044    if (fmask_ptr && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical))
4045       *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr,
4046                                     texture_dynamic_index, false, false);
4047 }
4048 
apply_round_slice(struct ac_llvm_context * ctx,LLVMValueRef coord)4049 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord)
4050 {
4051    coord = ac_to_float(ctx, coord);
4052    coord = ac_build_round(ctx, coord);
4053    coord = ac_to_integer(ctx, coord);
4054    return coord;
4055 }
4056 
visit_tex(struct ac_nir_context * ctx,nir_tex_instr * instr)4057 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
4058 {
4059    LLVMValueRef result = NULL;
4060    struct ac_image_args args = {0};
4061    LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
4062    LLVMValueRef ddx = NULL, ddy = NULL;
4063    unsigned offset_src = 0;
4064    struct waterfall_context wctx[2] = {{{0}}};
4065 
4066    tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
4067 
4068    for (unsigned i = 0; i < instr->num_srcs; i++) {
4069       switch (instr->src[i].src_type) {
4070       case nir_tex_src_coord: {
4071          LLVMValueRef coord = get_src(ctx, instr->src[i].src);
4072          for (unsigned chan = 0; chan < instr->coord_components; ++chan)
4073             args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
4074          break;
4075       }
4076       case nir_tex_src_projector:
4077          break;
4078       case nir_tex_src_comparator:
4079          if (instr->is_shadow) {
4080             args.compare = get_src(ctx, instr->src[i].src);
4081             args.compare = ac_to_float(&ctx->ac, args.compare);
4082          }
4083          break;
4084       case nir_tex_src_offset:
4085          args.offset = get_src(ctx, instr->src[i].src);
4086          offset_src = i;
4087          break;
4088       case nir_tex_src_bias:
4089          args.bias = get_src(ctx, instr->src[i].src);
4090          break;
4091       case nir_tex_src_lod: {
4092          if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
4093             args.level_zero = true;
4094          else
4095             args.lod = get_src(ctx, instr->src[i].src);
4096          break;
4097       }
4098       case nir_tex_src_ms_index:
4099          sample_index = get_src(ctx, instr->src[i].src);
4100          break;
4101       case nir_tex_src_ms_mcs:
4102          break;
4103       case nir_tex_src_ddx:
4104          ddx = get_src(ctx, instr->src[i].src);
4105          break;
4106       case nir_tex_src_ddy:
4107          ddy = get_src(ctx, instr->src[i].src);
4108          break;
4109       case nir_tex_src_min_lod:
4110          args.min_lod = get_src(ctx, instr->src[i].src);
4111          break;
4112       case nir_tex_src_texture_offset:
4113       case nir_tex_src_sampler_offset:
4114       case nir_tex_src_plane:
4115       default:
4116          break;
4117       }
4118    }
4119 
4120    if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4121       result = get_buffer_size(ctx, args.resource, true);
4122       goto write_result;
4123    }
4124 
4125    if (instr->op == nir_texop_texture_samples) {
4126       LLVMValueRef res, samples, is_msaa;
4127       LLVMValueRef default_sample;
4128 
4129       res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
4130       samples =
4131          LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), "");
4132       is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 28, false), "");
4133       is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4134       is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
4135                               LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4136 
4137       samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 16, false), "");
4138       samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 0xf, false), "");
4139       samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, "");
4140 
4141       if (ctx->abi->robust_buffer_access) {
4142          LLVMValueRef dword1, is_null_descriptor;
4143 
4144          /* Extract the second dword of the descriptor, if it's
4145           * all zero, then it's a null descriptor.
4146           */
4147          dword1 =
4148             LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 1, false), "");
4149          is_null_descriptor = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
4150                                             LLVMConstInt(ctx->ac.i32, 0, false), "");
4151          default_sample =
4152             LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, ctx->ac.i32_0, ctx->ac.i32_1, "");
4153       } else {
4154          default_sample = ctx->ac.i32_1;
4155       }
4156 
4157       samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, default_sample, "");
4158       result = samples;
4159       goto write_result;
4160    }
4161 
4162    if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
4163       LLVMValueRef offset[3], pack;
4164       for (unsigned chan = 0; chan < 3; ++chan)
4165          offset[chan] = ctx->ac.i32_0;
4166 
4167       unsigned num_components = ac_get_llvm_num_components(args.offset);
4168       for (unsigned chan = 0; chan < num_components; chan++) {
4169          offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
4170          offset[chan] =
4171             LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
4172          if (chan)
4173             offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
4174                                         LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
4175       }
4176       pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
4177       pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
4178       args.offset = pack;
4179    }
4180 
4181    /* Section 8.23.1 (Depth Texture Comparison Mode) of the
4182     * OpenGL 4.5 spec says:
4183     *
4184     *    "If the texture’s internal format indicates a fixed-point
4185     *     depth texture, then D_t and D_ref are clamped to the
4186     *     range [0, 1]; otherwise no clamping is performed."
4187     *
4188     * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
4189     * so the depth comparison value isn't clamped for Z16 and
4190     * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
4191     * an explicitly clamped 32-bit float format.
4192     */
4193    if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->ac.chip_class <= GFX9 &&
4194        ctx->abi->clamp_shadow_reference) {
4195       LLVMValueRef upgraded, clamped;
4196 
4197       upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
4198                                          LLVMConstInt(ctx->ac.i32, 3, false), "");
4199       upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->ac.i32, 29, false), "");
4200       upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
4201       clamped = ac_build_clamp(&ctx->ac, args.compare);
4202       args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, args.compare, "");
4203    }
4204 
4205    /* pack derivatives */
4206    if (ddx || ddy) {
4207       int num_src_deriv_channels, num_dest_deriv_channels;
4208       switch (instr->sampler_dim) {
4209       case GLSL_SAMPLER_DIM_3D:
4210       case GLSL_SAMPLER_DIM_CUBE:
4211          num_src_deriv_channels = 3;
4212          num_dest_deriv_channels = 3;
4213          break;
4214       case GLSL_SAMPLER_DIM_2D:
4215       default:
4216          num_src_deriv_channels = 2;
4217          num_dest_deriv_channels = 2;
4218          break;
4219       case GLSL_SAMPLER_DIM_1D:
4220          num_src_deriv_channels = 1;
4221          if (ctx->ac.chip_class == GFX9) {
4222             num_dest_deriv_channels = 2;
4223          } else {
4224             num_dest_deriv_channels = 1;
4225          }
4226          break;
4227       }
4228 
4229       for (unsigned i = 0; i < num_src_deriv_channels; i++) {
4230          args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
4231          args.derivs[num_dest_deriv_channels + i] =
4232             ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
4233       }
4234       for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
4235          args.derivs[i] = ctx->ac.f32_0;
4236          args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
4237       }
4238    }
4239 
4240    if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
4241       for (unsigned chan = 0; chan < instr->coord_components; chan++)
4242          args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
4243       if (instr->coord_components == 3)
4244          args.coords[3] = LLVMGetUndef(ctx->ac.f32);
4245       ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
4246                              instr->op == nir_texop_lod, args.coords, args.derivs);
4247    }
4248 
4249    /* Texture coordinates fixups */
4250    if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4251        instr->is_array && instr->op != nir_texop_txf) {
4252       args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
4253    }
4254 
4255    if (instr->coord_components > 2 &&
4256        (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
4257         instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
4258         instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
4259        instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms &&
4260        instr->op != nir_texop_fragment_fetch && instr->op != nir_texop_fragment_mask_fetch) {
4261       args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
4262    }
4263 
4264    if (ctx->ac.chip_class == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4265        instr->op != nir_texop_lod) {
4266       LLVMValueRef filler;
4267       if (instr->op == nir_texop_txf)
4268          filler = ctx->ac.i32_0;
4269       else
4270          filler = LLVMConstReal(ctx->ac.f32, 0.5);
4271 
4272       if (instr->is_array)
4273          args.coords[2] = args.coords[1];
4274       args.coords[1] = filler;
4275    }
4276 
4277    /* Pack sample index */
4278    if (sample_index && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_fragment_fetch))
4279       args.coords[instr->coord_components] = sample_index;
4280 
4281    if (instr->op == nir_texop_samples_identical) {
4282       struct ac_image_args txf_args = {0};
4283       memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
4284 
4285       txf_args.dmask = 0xf;
4286       txf_args.resource = fmask_ptr;
4287       txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
4288       result = build_tex_intrinsic(ctx, instr, &txf_args);
4289 
4290       result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4291       result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
4292       goto write_result;
4293    }
4294 
4295    if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
4296         instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
4297        instr->op != nir_texop_txs && instr->op != nir_texop_fragment_fetch &&
4298        instr->op != nir_texop_fragment_mask_fetch) {
4299       unsigned sample_chan = instr->is_array ? 3 : 2;
4300       args.coords[sample_chan] = adjust_sample_index_using_fmask(
4301          &ctx->ac, args.coords[0], args.coords[1], instr->is_array ? args.coords[2] : NULL,
4302          args.coords[sample_chan], fmask_ptr);
4303    }
4304 
4305    if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
4306       int num_offsets = instr->src[offset_src].src.ssa->num_components;
4307       num_offsets = MIN2(num_offsets, instr->coord_components);
4308       for (unsigned i = 0; i < num_offsets; ++i) {
4309          LLVMValueRef off = ac_llvm_extract_elem(&ctx->ac, args.offset, i);
4310          args.coords[i] = LLVMBuildAdd(ctx->ac.builder, args.coords[i], off, "");
4311       }
4312       args.offset = NULL;
4313    }
4314 
4315    /* DMASK was repurposed for GATHER4. 4 components are always
4316     * returned and DMASK works like a swizzle - it selects
4317     * the component to fetch. The only valid DMASK values are
4318     * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4319     * (red,red,red,red) etc.) The ISA document doesn't mention
4320     * this.
4321     */
4322    args.dmask = 0xf;
4323    if (instr->op == nir_texop_tg4) {
4324       if (instr->is_shadow)
4325          args.dmask = 1;
4326       else
4327          args.dmask = 1 << instr->component;
4328    }
4329 
4330    if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
4331       args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
4332       args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
4333    }
4334 
4335    /* Adjust the number of coordinates because we only need (x,y) for 2D
4336     * multisampled images and (x,y,layer) for 2D multisampled layered
4337     * images or for multisampled input attachments.
4338     */
4339    if (instr->op == nir_texop_fragment_mask_fetch) {
4340       if (args.dim == ac_image_2dmsaa) {
4341          args.dim = ac_image_2d;
4342       } else {
4343          assert(args.dim == ac_image_2darraymsaa);
4344          args.dim = ac_image_2darray;
4345       }
4346    }
4347 
4348    assert(instr->dest.is_ssa);
4349    args.d16 = instr->dest.ssa.bit_size == 16;
4350 
4351    result = build_tex_intrinsic(ctx, instr, &args);
4352 
4353    if (instr->op == nir_texop_query_levels)
4354       result =
4355          LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
4356    else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs &&
4357             instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
4358       result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4359    else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
4360             instr->is_array) {
4361       LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4362       LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
4363       LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4364       z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
4365       result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
4366    } else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&
4367               instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {
4368       LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4369       LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4370       result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, ctx->ac.i32_1, "");
4371    } else if (instr->dest.ssa.num_components != 4)
4372       result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
4373 
4374 write_result:
4375    if (result) {
4376       assert(instr->dest.is_ssa);
4377       result = ac_to_integer(&ctx->ac, result);
4378 
4379       for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
4380          result = exit_waterfall(ctx, wctx + i, result);
4381       }
4382 
4383       ctx->ssa_defs[instr->dest.ssa.index] = result;
4384    }
4385 }
4386 
visit_phi(struct ac_nir_context * ctx,nir_phi_instr * instr)4387 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
4388 {
4389    LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4390    LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
4391 
4392    ctx->ssa_defs[instr->dest.ssa.index] = result;
4393    _mesa_hash_table_insert(ctx->phis, instr, result);
4394 }
4395 
visit_post_phi(struct ac_nir_context * ctx,nir_phi_instr * instr,LLVMValueRef llvm_phi)4396 static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi)
4397 {
4398    nir_foreach_phi_src (src, instr) {
4399       LLVMBasicBlockRef block = get_block(ctx, src->pred);
4400       LLVMValueRef llvm_src = get_src(ctx, src->src);
4401 
4402       LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
4403    }
4404 }
4405 
phi_post_pass(struct ac_nir_context * ctx)4406 static void phi_post_pass(struct ac_nir_context *ctx)
4407 {
4408    hash_table_foreach(ctx->phis, entry)
4409    {
4410       visit_post_phi(ctx, (nir_phi_instr *)entry->key, (LLVMValueRef)entry->data);
4411    }
4412 }
4413 
is_def_used_in_an_export(const nir_ssa_def * def)4414 static bool is_def_used_in_an_export(const nir_ssa_def *def)
4415 {
4416    nir_foreach_use (use_src, def) {
4417       if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
4418          nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
4419          if (instr->intrinsic == nir_intrinsic_store_deref)
4420             return true;
4421       } else if (use_src->parent_instr->type == nir_instr_type_alu) {
4422          nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
4423          if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {
4424             return true;
4425          }
4426       }
4427    }
4428    return false;
4429 }
4430 
visit_ssa_undef(struct ac_nir_context * ctx,const nir_ssa_undef_instr * instr)4431 static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr)
4432 {
4433    unsigned num_components = instr->def.num_components;
4434    LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
4435 
4436    if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
4437       LLVMValueRef undef;
4438 
4439       if (num_components == 1)
4440          undef = LLVMGetUndef(type);
4441       else {
4442          undef = LLVMGetUndef(LLVMVectorType(type, num_components));
4443       }
4444       ctx->ssa_defs[instr->def.index] = undef;
4445    } else {
4446       LLVMValueRef zero = LLVMConstInt(type, 0, false);
4447       if (num_components > 1) {
4448          zero = ac_build_gather_values_extended(&ctx->ac, &zero, 4, 0, false, false);
4449       }
4450       ctx->ssa_defs[instr->def.index] = zero;
4451    }
4452 }
4453 
visit_jump(struct ac_llvm_context * ctx,const nir_jump_instr * instr)4454 static void visit_jump(struct ac_llvm_context *ctx, const nir_jump_instr *instr)
4455 {
4456    switch (instr->type) {
4457    case nir_jump_break:
4458       ac_build_break(ctx);
4459       break;
4460    case nir_jump_continue:
4461       ac_build_continue(ctx);
4462       break;
4463    default:
4464       fprintf(stderr, "Unknown NIR jump instr: ");
4465       nir_print_instr(&instr->instr, stderr);
4466       fprintf(stderr, "\n");
4467       abort();
4468    }
4469 }
4470 
glsl_base_to_llvm_type(struct ac_llvm_context * ac,enum glsl_base_type type)4471 static LLVMTypeRef glsl_base_to_llvm_type(struct ac_llvm_context *ac, enum glsl_base_type type)
4472 {
4473    switch (type) {
4474    case GLSL_TYPE_INT:
4475    case GLSL_TYPE_UINT:
4476    case GLSL_TYPE_BOOL:
4477    case GLSL_TYPE_SUBROUTINE:
4478       return ac->i32;
4479    case GLSL_TYPE_INT8:
4480    case GLSL_TYPE_UINT8:
4481       return ac->i8;
4482    case GLSL_TYPE_INT16:
4483    case GLSL_TYPE_UINT16:
4484       return ac->i16;
4485    case GLSL_TYPE_FLOAT:
4486       return ac->f32;
4487    case GLSL_TYPE_FLOAT16:
4488       return ac->f16;
4489    case GLSL_TYPE_INT64:
4490    case GLSL_TYPE_UINT64:
4491       return ac->i64;
4492    case GLSL_TYPE_DOUBLE:
4493       return ac->f64;
4494    default:
4495       unreachable("unknown GLSL type");
4496    }
4497 }
4498 
glsl_to_llvm_type(struct ac_llvm_context * ac,const struct glsl_type * type)4499 static LLVMTypeRef glsl_to_llvm_type(struct ac_llvm_context *ac, const struct glsl_type *type)
4500 {
4501    if (glsl_type_is_scalar(type)) {
4502       return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
4503    }
4504 
4505    if (glsl_type_is_vector(type)) {
4506       return LLVMVectorType(glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
4507                             glsl_get_vector_elements(type));
4508    }
4509 
4510    if (glsl_type_is_matrix(type)) {
4511       return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_column_type(type)),
4512                            glsl_get_matrix_columns(type));
4513    }
4514 
4515    if (glsl_type_is_array(type)) {
4516       return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_array_element(type)),
4517                            glsl_get_length(type));
4518    }
4519 
4520    assert(glsl_type_is_struct_or_ifc(type));
4521 
4522    LLVMTypeRef *const member_types = alloca(glsl_get_length(type) * sizeof(LLVMTypeRef));
4523 
4524    for (unsigned i = 0; i < glsl_get_length(type); i++) {
4525       member_types[i] = glsl_to_llvm_type(ac, glsl_get_struct_field(type, i));
4526    }
4527 
4528    return LLVMStructTypeInContext(ac->context, member_types, glsl_get_length(type), false);
4529 }
4530 
visit_deref(struct ac_nir_context * ctx,nir_deref_instr * instr)4531 static void visit_deref(struct ac_nir_context *ctx, nir_deref_instr *instr)
4532 {
4533    if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared | nir_var_mem_global))
4534       return;
4535 
4536    LLVMValueRef result = NULL;
4537    switch (instr->deref_type) {
4538    case nir_deref_type_var: {
4539       struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
4540       result = entry->data;
4541       break;
4542    }
4543    case nir_deref_type_struct:
4544       if (nir_deref_mode_is(instr, nir_var_mem_global)) {
4545          nir_deref_instr *parent = nir_deref_instr_parent(instr);
4546          uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);
4547          result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
4548                                    LLVMConstInt(ctx->ac.i32, offset, 0));
4549       } else {
4550          result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
4551                                 LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
4552       }
4553       break;
4554    case nir_deref_type_array:
4555       if (nir_deref_mode_is(instr, nir_var_mem_global)) {
4556          nir_deref_instr *parent = nir_deref_instr_parent(instr);
4557          unsigned stride = glsl_get_explicit_stride(parent->type);
4558 
4559          if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||
4560              (glsl_type_is_vector(parent->type) && stride == 0))
4561             stride = type_scalar_size_bytes(parent->type);
4562 
4563          assert(stride > 0);
4564          LLVMValueRef index = get_src(ctx, instr->arr.index);
4565          if (LLVMTypeOf(index) != ctx->ac.i64)
4566             index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
4567 
4568          LLVMValueRef offset =
4569             LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
4570 
4571          result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
4572       } else {
4573          result =
4574             ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
4575       }
4576       break;
4577    case nir_deref_type_ptr_as_array:
4578       if (nir_deref_mode_is(instr, nir_var_mem_global)) {
4579          unsigned stride = nir_deref_instr_array_stride(instr);
4580 
4581          LLVMValueRef index = get_src(ctx, instr->arr.index);
4582          if (LLVMTypeOf(index) != ctx->ac.i64)
4583             index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
4584 
4585          LLVMValueRef offset =
4586             LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
4587 
4588          result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
4589       } else {
4590          result =
4591             ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
4592       }
4593       break;
4594    case nir_deref_type_cast: {
4595       result = get_src(ctx, instr->parent);
4596 
4597       /* We can't use the structs from LLVM because the shader
4598        * specifies its own offsets. */
4599       LLVMTypeRef pointee_type = ctx->ac.i8;
4600       if (nir_deref_mode_is(instr, nir_var_mem_shared))
4601          pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
4602 
4603       unsigned address_space;
4604 
4605       switch (instr->modes) {
4606       case nir_var_mem_shared:
4607          address_space = AC_ADDR_SPACE_LDS;
4608          break;
4609       case nir_var_mem_global:
4610          address_space = AC_ADDR_SPACE_GLOBAL;
4611          break;
4612       default:
4613          unreachable("Unhandled address space");
4614       }
4615 
4616       LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
4617 
4618       if (LLVMTypeOf(result) != type) {
4619          if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
4620             result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
4621          } else {
4622             result = LLVMBuildIntToPtr(ctx->ac.builder, result, type, "");
4623          }
4624       }
4625       break;
4626    }
4627    default:
4628       unreachable("Unhandled deref_instr deref type");
4629    }
4630 
4631    ctx->ssa_defs[instr->dest.ssa.index] = result;
4632 }
4633 
4634 static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list);
4635 
visit_block(struct ac_nir_context * ctx,nir_block * block)4636 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
4637 {
4638    nir_foreach_instr (instr, block) {
4639       switch (instr->type) {
4640       case nir_instr_type_alu:
4641          visit_alu(ctx, nir_instr_as_alu(instr));
4642          break;
4643       case nir_instr_type_load_const:
4644          visit_load_const(ctx, nir_instr_as_load_const(instr));
4645          break;
4646       case nir_instr_type_intrinsic:
4647          visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4648          break;
4649       case nir_instr_type_tex:
4650          visit_tex(ctx, nir_instr_as_tex(instr));
4651          break;
4652       case nir_instr_type_phi:
4653          visit_phi(ctx, nir_instr_as_phi(instr));
4654          break;
4655       case nir_instr_type_ssa_undef:
4656          visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
4657          break;
4658       case nir_instr_type_jump:
4659          visit_jump(&ctx->ac, nir_instr_as_jump(instr));
4660          break;
4661       case nir_instr_type_deref:
4662          visit_deref(ctx, nir_instr_as_deref(instr));
4663          break;
4664       default:
4665          fprintf(stderr, "Unknown NIR instr type: ");
4666          nir_print_instr(instr, stderr);
4667          fprintf(stderr, "\n");
4668          abort();
4669       }
4670    }
4671 
4672    _mesa_hash_table_insert(ctx->defs, block, LLVMGetInsertBlock(ctx->ac.builder));
4673 }
4674 
visit_if(struct ac_nir_context * ctx,nir_if * if_stmt)4675 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
4676 {
4677    LLVMValueRef value = get_src(ctx, if_stmt->condition);
4678 
4679    nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list);
4680 
4681    ac_build_ifcc(&ctx->ac, value, then_block->index);
4682 
4683    visit_cf_list(ctx, &if_stmt->then_list);
4684 
4685    if (!exec_list_is_empty(&if_stmt->else_list)) {
4686       nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list);
4687 
4688       ac_build_else(&ctx->ac, else_block->index);
4689       visit_cf_list(ctx, &if_stmt->else_list);
4690    }
4691 
4692    ac_build_endif(&ctx->ac, then_block->index);
4693 }
4694 
visit_loop(struct ac_nir_context * ctx,nir_loop * loop)4695 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
4696 {
4697    nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body);
4698 
4699    ac_build_bgnloop(&ctx->ac, first_loop_block->index);
4700 
4701    visit_cf_list(ctx, &loop->body);
4702 
4703    ac_build_endloop(&ctx->ac, first_loop_block->index);
4704 }
4705 
visit_cf_list(struct ac_nir_context * ctx,struct exec_list * list)4706 static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list)
4707 {
4708    foreach_list_typed(nir_cf_node, node, node, list)
4709    {
4710       switch (node->type) {
4711       case nir_cf_node_block:
4712          visit_block(ctx, nir_cf_node_as_block(node));
4713          break;
4714 
4715       case nir_cf_node_if:
4716          visit_if(ctx, nir_cf_node_as_if(node));
4717          break;
4718 
4719       case nir_cf_node_loop:
4720          visit_loop(ctx, nir_cf_node_as_loop(node));
4721          break;
4722 
4723       default:
4724          assert(0);
4725       }
4726    }
4727 }
4728 
ac_handle_shader_output_decl(struct ac_llvm_context * ctx,struct ac_shader_abi * abi,struct nir_shader * nir,struct nir_variable * variable,gl_shader_stage stage)4729 void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,
4730                                   struct nir_shader *nir, struct nir_variable *variable,
4731                                   gl_shader_stage stage)
4732 {
4733    unsigned output_loc = variable->data.driver_location;
4734    unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4735 
4736    /* tess ctrl has it's own load/store paths for outputs */
4737    if (stage == MESA_SHADER_TESS_CTRL)
4738       return;
4739 
4740    if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
4741        stage == MESA_SHADER_GEOMETRY) {
4742       int idx = variable->data.location + variable->data.index;
4743       if (idx == VARYING_SLOT_CLIP_DIST0) {
4744          int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size;
4745 
4746          if (length > 4)
4747             attrib_count = 2;
4748          else
4749             attrib_count = 1;
4750       }
4751    }
4752 
4753    bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
4754    LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
4755    for (unsigned i = 0; i < attrib_count; ++i) {
4756       for (unsigned chan = 0; chan < 4; chan++) {
4757          abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
4758             ac_build_alloca_undef(ctx, type, "");
4759       }
4760    }
4761 }
4762 
setup_scratch(struct ac_nir_context * ctx,struct nir_shader * shader)4763 static void setup_scratch(struct ac_nir_context *ctx, struct nir_shader *shader)
4764 {
4765    if (shader->scratch_size == 0)
4766       return;
4767 
4768    ctx->scratch =
4769       ac_build_alloca_undef(&ctx->ac, LLVMArrayType(ctx->ac.i8, shader->scratch_size), "scratch");
4770 }
4771 
setup_constant_data(struct ac_nir_context * ctx,struct nir_shader * shader)4772 static void setup_constant_data(struct ac_nir_context *ctx, struct nir_shader *shader)
4773 {
4774    if (!shader->constant_data)
4775       return;
4776 
4777    LLVMValueRef data = LLVMConstStringInContext(ctx->ac.context, shader->constant_data,
4778                                                 shader->constant_data_size, true);
4779    LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
4780 
4781    /* We want to put the constant data in the CONST address space so that
4782     * we can use scalar loads. However, LLVM versions before 10 put these
4783     * variables in the same section as the code, which is unacceptable
4784     * for RadeonSI as it needs to relocate all the data sections after
4785     * the code sections. See https://reviews.llvm.org/D65813.
4786     */
4787    unsigned address_space = LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST;
4788 
4789    LLVMValueRef global =
4790       LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "const_data", address_space);
4791 
4792    LLVMSetInitializer(global, data);
4793    LLVMSetGlobalConstant(global, true);
4794    LLVMSetVisibility(global, LLVMHiddenVisibility);
4795    ctx->constant_data = global;
4796 }
4797 
setup_shared(struct ac_nir_context * ctx,struct nir_shader * nir)4798 static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)
4799 {
4800    if (ctx->ac.lds)
4801       return;
4802 
4803    LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.cs.shared_size);
4804 
4805    LLVMValueRef lds =
4806       LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);
4807    LLVMSetAlignment(lds, 64 * 1024);
4808 
4809    ctx->ac.lds =
4810       LLVMBuildBitCast(ctx->ac.builder, lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");
4811 }
4812 
ac_nir_translate(struct ac_llvm_context * ac,struct ac_shader_abi * abi,const struct ac_shader_args * args,struct nir_shader * nir)4813 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
4814                       const struct ac_shader_args *args, struct nir_shader *nir)
4815 {
4816    struct ac_nir_context ctx = {0};
4817    struct nir_function *func;
4818 
4819    ctx.ac = *ac;
4820    ctx.abi = abi;
4821    ctx.args = args;
4822 
4823    ctx.stage = nir->info.stage;
4824    ctx.info = &nir->info;
4825 
4826    ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
4827 
4828    /* TODO: remove this after RADV switches to lowered IO */
4829    if (!nir->info.io_lowered) {
4830       nir_foreach_shader_out_variable(variable, nir)
4831       {
4832          ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, ctx.stage);
4833       }
4834    }
4835 
4836    ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
4837    ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
4838    ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
4839 
4840    if (ctx.abi->kill_ps_if_inf_interp)
4841       ctx.verified_interp =
4842          _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
4843 
4844    func = (struct nir_function *)exec_list_get_head(&nir->functions);
4845 
4846    nir_index_ssa_defs(func->impl);
4847    ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
4848 
4849    setup_scratch(&ctx, nir);
4850    setup_constant_data(&ctx, nir);
4851 
4852    if (gl_shader_stage_is_compute(nir->info.stage))
4853       setup_shared(&ctx, nir);
4854 
4855    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote) {
4856       ctx.ac.postponed_kill = ac_build_alloca_undef(&ctx.ac, ac->i1, "");
4857       /* true = don't kill. */
4858       LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.ac.postponed_kill);
4859    }
4860 
4861    visit_cf_list(&ctx, &func->impl->body);
4862    phi_post_pass(&ctx);
4863 
4864    if (ctx.ac.postponed_kill)
4865       ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder, ctx.ac.postponed_kill, ""));
4866 
4867    if (!gl_shader_stage_is_compute(nir->info.stage))
4868       ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS, ctx.abi->outputs);
4869 
4870    free(ctx.ssa_defs);
4871    ralloc_free(ctx.defs);
4872    ralloc_free(ctx.phis);
4873    ralloc_free(ctx.vars);
4874    if (ctx.abi->kill_ps_if_inf_interp)
4875       ralloc_free(ctx.verified_interp);
4876 }
4877 
ac_lower_indirect_derefs(struct nir_shader * nir,enum chip_class chip_class)4878 bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
4879 {
4880    bool progress = false;
4881 
4882    /* Lower large variables to scratch first so that we won't bloat the
4883     * shader by generating large if ladders for them. We later lower
4884     * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
4885     */
4886    NIR_PASS(progress, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
4887             glsl_get_natural_size_align_bytes);
4888 
4889    /* While it would be nice not to have this flag, we are constrained
4890     * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
4891     */
4892    bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
4893 
4894    /* TODO: Indirect indexing of GS inputs is unimplemented.
4895     *
4896     * TCS and TES load inputs directly from LDS or offchip memory, so
4897     * indirect indexing is trivial.
4898     */
4899    nir_variable_mode indirect_mask = 0;
4900    if (nir->info.stage == MESA_SHADER_GEOMETRY ||
4901        (nir->info.stage != MESA_SHADER_TESS_CTRL && nir->info.stage != MESA_SHADER_TESS_EVAL &&
4902         !llvm_has_working_vgpr_indexing)) {
4903       indirect_mask |= nir_var_shader_in;
4904    }
4905    if (!llvm_has_working_vgpr_indexing && nir->info.stage != MESA_SHADER_TESS_CTRL)
4906       indirect_mask |= nir_var_shader_out;
4907 
4908    /* TODO: We shouldn't need to do this, however LLVM isn't currently
4909     * smart enough to handle indirects without causing excess spilling
4910     * causing the gpu to hang.
4911     *
4912     * See the following thread for more details of the problem:
4913     * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
4914     */
4915    indirect_mask |= nir_var_function_temp;
4916 
4917    progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
4918    return progress;
4919 }
4920 
get_inst_tessfactor_writemask(nir_intrinsic_instr * intrin)4921 static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
4922 {
4923    if (intrin->intrinsic != nir_intrinsic_store_output)
4924       return 0;
4925 
4926    unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin);
4927    unsigned location = nir_intrinsic_io_semantics(intrin).location;
4928 
4929    if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
4930       return writemask << 4;
4931    else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
4932       return writemask;
4933 
4934    return 0;
4935 }
4936 
scan_tess_ctrl(nir_cf_node * cf_node,unsigned * upper_block_tf_writemask,unsigned * cond_block_tf_writemask,bool * tessfactors_are_def_in_all_invocs,bool is_nested_cf)4937 static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
4938                            unsigned *cond_block_tf_writemask,
4939                            bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
4940 {
4941    switch (cf_node->type) {
4942    case nir_cf_node_block: {
4943       nir_block *block = nir_cf_node_as_block(cf_node);
4944       nir_foreach_instr (instr, block) {
4945          if (instr->type != nir_instr_type_intrinsic)
4946             continue;
4947 
4948          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4949          if (intrin->intrinsic == nir_intrinsic_control_barrier) {
4950 
4951             /* If we find a barrier in nested control flow put this in the
4952              * too hard basket. In GLSL this is not possible but it is in
4953              * SPIR-V.
4954              */
4955             if (is_nested_cf) {
4956                *tessfactors_are_def_in_all_invocs = false;
4957                return;
4958             }
4959 
4960             /* The following case must be prevented:
4961              *    gl_TessLevelInner = ...;
4962              *    barrier();
4963              *    if (gl_InvocationID == 1)
4964              *       gl_TessLevelInner = ...;
4965              *
4966              * If you consider disjoint code segments separated by barriers, each
4967              * such segment that writes tess factor channels should write the same
4968              * channels in all codepaths within that segment.
4969              */
4970             if (*upper_block_tf_writemask || *cond_block_tf_writemask) {
4971                /* Accumulate the result: */
4972                *tessfactors_are_def_in_all_invocs &=
4973                   !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
4974 
4975                /* Analyze the next code segment from scratch. */
4976                *upper_block_tf_writemask = 0;
4977                *cond_block_tf_writemask = 0;
4978             }
4979          } else
4980             *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
4981       }
4982 
4983       break;
4984    }
4985    case nir_cf_node_if: {
4986       unsigned then_tessfactor_writemask = 0;
4987       unsigned else_tessfactor_writemask = 0;
4988 
4989       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
4990       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
4991       {
4992          scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask,
4993                         tessfactors_are_def_in_all_invocs, true);
4994       }
4995 
4996       foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
4997       {
4998          scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask,
4999                         tessfactors_are_def_in_all_invocs, true);
5000       }
5001 
5002       if (then_tessfactor_writemask || else_tessfactor_writemask) {
5003          /* If both statements write the same tess factor channels,
5004           * we can say that the upper block writes them too.
5005           */
5006          *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask;
5007          *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask;
5008       }
5009 
5010       break;
5011    }
5012    case nir_cf_node_loop: {
5013       nir_loop *loop = nir_cf_node_as_loop(cf_node);
5014       foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
5015       {
5016          scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask,
5017                         tessfactors_are_def_in_all_invocs, true);
5018       }
5019 
5020       break;
5021    }
5022    default:
5023       unreachable("unknown cf node type");
5024    }
5025 }
5026 
ac_are_tessfactors_def_in_all_invocs(const struct nir_shader * nir)5027 bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
5028 {
5029    assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
5030 
5031    /* The pass works as follows:
5032     * If all codepaths write tess factors, we can say that all
5033     * invocations define tess factors.
5034     *
5035     * Each tess factor channel is tracked separately.
5036     */
5037    unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
5038    unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
5039 
5040    /* Initial value = true. Here the pass will accumulate results from
5041     * multiple segments surrounded by barriers. If tess factors aren't
5042     * written at all, it's a shader bug and we don't care if this will be
5043     * true.
5044     */
5045    bool tessfactors_are_def_in_all_invocs = true;
5046 
5047    nir_foreach_function (function, nir) {
5048       if (function->impl) {
5049          foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
5050          {
5051             scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask,
5052                            &tessfactors_are_def_in_all_invocs, false);
5053          }
5054       }
5055    }
5056 
5057    /* Accumulate the result for the last code segment separated by a
5058     * barrier.
5059     */
5060    if (main_block_tf_writemask || cond_block_tf_writemask) {
5061       tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask);
5062    }
5063 
5064    return tessfactors_are_def_in_all_invocs;
5065 }
5066