• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_arit.h"
28 #include "lp_bld_bitarit.h"
29 #include "lp_bld_const.h"
30 #include "lp_bld_conv.h"
31 #include "lp_bld_gather.h"
32 #include "lp_bld_logic.h"
33 #include "lp_bld_quad.h"
34 #include "lp_bld_flow.h"
35 #include "lp_bld_intr.h"
36 #include "lp_bld_struct.h"
37 #include "lp_bld_debug.h"
38 #include "lp_bld_printf.h"
39 #include "nir_deref.h"
40 #include "nir_search_helpers.h"
41 
42 
43 // Doing AOS (and linear) codegen?
44 static bool
is_aos(const struct lp_build_nir_context * bld_base)45 is_aos(const struct lp_build_nir_context *bld_base)
46 {
47    // AOS is used for vectors of uint8[16]
48    return bld_base->base.type.length == 16 && bld_base->base.type.width == 8;
49 }
50 
51 
52 static void
53 visit_cf_list(struct lp_build_nir_context *bld_base,
54               struct exec_list *list);
55 
56 
57 static LLVMValueRef
cast_type(struct lp_build_nir_context * bld_base,LLVMValueRef val,nir_alu_type alu_type,unsigned bit_size)58 cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
59           nir_alu_type alu_type, unsigned bit_size)
60 {
61    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
62    switch (alu_type) {
63    case nir_type_float:
64       switch (bit_size) {
65       case 16:
66          return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
67       case 32:
68          return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
69       case 64:
70          return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
71       default:
72          assert(0);
73          break;
74       }
75       break;
76    case nir_type_int:
77       switch (bit_size) {
78       case 8:
79          return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
80       case 16:
81          return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
82       case 32:
83          return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
84       case 64:
85          return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
86       default:
87          assert(0);
88          break;
89       }
90       break;
91    case nir_type_uint:
92       switch (bit_size) {
93       case 8:
94          return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
95       case 16:
96          return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
97       case 1:
98       case 32:
99          return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
100       case 64:
101          return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
102       default:
103          assert(0);
104          break;
105       }
106       break;
107    case nir_type_uint32:
108       return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
109    default:
110       return val;
111    }
112    return NULL;
113 }
114 
115 
116 static unsigned
glsl_sampler_to_pipe(int sampler_dim,bool is_array)117 glsl_sampler_to_pipe(int sampler_dim, bool is_array)
118 {
119    unsigned pipe_target = PIPE_BUFFER;
120    switch (sampler_dim) {
121    case GLSL_SAMPLER_DIM_1D:
122       pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
123       break;
124    case GLSL_SAMPLER_DIM_2D:
125       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
126       break;
127    case GLSL_SAMPLER_DIM_SUBPASS:
128    case GLSL_SAMPLER_DIM_SUBPASS_MS:
129       pipe_target = PIPE_TEXTURE_2D_ARRAY;
130       break;
131    case GLSL_SAMPLER_DIM_3D:
132       pipe_target = PIPE_TEXTURE_3D;
133       break;
134    case GLSL_SAMPLER_DIM_MS:
135       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
136       break;
137    case GLSL_SAMPLER_DIM_CUBE:
138       pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
139       break;
140    case GLSL_SAMPLER_DIM_RECT:
141       pipe_target = PIPE_TEXTURE_RECT;
142       break;
143    case GLSL_SAMPLER_DIM_BUF:
144       pipe_target = PIPE_BUFFER;
145       break;
146    default:
147       break;
148    }
149    return pipe_target;
150 }
151 
152 
get_ssa_src(struct lp_build_nir_context * bld_base,nir_ssa_def * ssa)153 static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
154 {
155    return bld_base->ssa_defs[ssa->index];
156 }
157 
158 
159 static LLVMValueRef
160 get_src(struct lp_build_nir_context *bld_base, nir_src src);
161 
162 
163 static LLVMValueRef
get_reg_src(struct lp_build_nir_context * bld_base,nir_reg_src src)164 get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
165 {
166    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
167    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
168    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
169    LLVMValueRef indir_src = NULL;
170    if (src.indirect)
171       indir_src = get_src(bld_base, *src.indirect);
172    return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
173 }
174 
175 
176 static LLVMValueRef
get_src(struct lp_build_nir_context * bld_base,nir_src src)177 get_src(struct lp_build_nir_context *bld_base, nir_src src)
178 {
179    if (src.is_ssa)
180       return get_ssa_src(bld_base, src.ssa);
181    else
182       return get_reg_src(bld_base, src.reg);
183 }
184 
185 
186 static void
assign_ssa(struct lp_build_nir_context * bld_base,int idx,LLVMValueRef ptr)187 assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
188 {
189    bld_base->ssa_defs[idx] = ptr;
190 }
191 
192 
193 static void
assign_ssa_dest(struct lp_build_nir_context * bld_base,const nir_ssa_def * ssa,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])194 assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
195                 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
196 {
197    if ((ssa->num_components == 1 || is_aos(bld_base))) {
198       assign_ssa(bld_base, ssa->index, vals[0]);
199    } else {
200       assign_ssa(bld_base, ssa->index,
201              lp_nir_array_build_gather_values(bld_base->base.gallivm->builder,
202                                               vals, ssa->num_components));
203    }
204 }
205 
206 
207 static void
assign_reg(struct lp_build_nir_context * bld_base,const nir_reg_dest * reg,unsigned write_mask,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])208 assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
209            unsigned write_mask,
210            LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
211 {
212    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
213    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
214    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
215    LLVMValueRef indir_src = NULL;
216    if (reg->indirect)
217       indir_src = get_src(bld_base, *reg->indirect);
218    bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
219 }
220 
221 
222 static void
assign_dest(struct lp_build_nir_context * bld_base,const nir_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])223 assign_dest(struct lp_build_nir_context *bld_base,
224             const nir_dest *dest,
225             LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
226 {
227    if (dest->is_ssa)
228       assign_ssa_dest(bld_base, &dest->ssa, vals);
229    else
230       assign_reg(bld_base, &dest->reg, 0, vals);
231 }
232 
233 
234 static void
assign_alu_dest(struct lp_build_nir_context * bld_base,const nir_alu_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])235 assign_alu_dest(struct lp_build_nir_context *bld_base,
236                 const nir_alu_dest *dest,
237                 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
238 {
239    if (dest->dest.is_ssa)
240       assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
241    else
242       assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
243 }
244 
245 
246 static LLVMValueRef
int_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,bool is_unsigned,LLVMValueRef val)247 int_to_bool32(struct lp_build_nir_context *bld_base,
248               uint32_t src_bit_size,
249               bool is_unsigned,
250               LLVMValueRef val)
251 {
252    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
253    struct lp_build_context *int_bld =
254       get_int_bld(bld_base, is_unsigned, src_bit_size);
255    LLVMValueRef result = lp_build_compare(bld_base->base.gallivm,
256                                           int_bld->type, PIPE_FUNC_NOTEQUAL,
257                                           val, int_bld->zero);
258    if (src_bit_size == 16)
259       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
260    else if (src_bit_size == 64)
261       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
262    return result;
263 }
264 
265 
266 static LLVMValueRef
flt_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,LLVMValueRef val)267 flt_to_bool32(struct lp_build_nir_context *bld_base,
268               uint32_t src_bit_size,
269               LLVMValueRef val)
270 {
271    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
272    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
273    LLVMValueRef result =
274       lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
275    if (src_bit_size == 64)
276       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
277    if (src_bit_size == 16)
278       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
279    return result;
280 }
281 
282 
283 static LLVMValueRef
fcmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])284 fcmp32(struct lp_build_nir_context *bld_base,
285        enum pipe_compare_func compare,
286        uint32_t src_bit_size,
287        LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
288 {
289    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
290    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
291    LLVMValueRef result;
292 
293    if (compare != PIPE_FUNC_NOTEQUAL)
294       result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
295    else
296       result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
297    if (src_bit_size == 64)
298       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
299    else if (src_bit_size == 16)
300       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
301    return result;
302 }
303 
304 
305 static LLVMValueRef
icmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,bool is_unsigned,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])306 icmp32(struct lp_build_nir_context *bld_base,
307        enum pipe_compare_func compare,
308        bool is_unsigned,
309        uint32_t src_bit_size,
310        LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
311 {
312    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
313    struct lp_build_context *i_bld =
314       get_int_bld(bld_base, is_unsigned, src_bit_size);
315    LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
316    if (src_bit_size < 32)
317       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
318    else if (src_bit_size == 64)
319       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
320    return result;
321 }
322 
323 
324 /**
325  * Get a source register value for an ALU instruction.
326  * This is where swizzled are handled.  There should be no negation
327  * or absolute value modifiers.
328  * num_components indicates the number of components needed in the
329  * returned array or vector.
330  */
331 static LLVMValueRef
get_alu_src(struct lp_build_nir_context * bld_base,nir_alu_src src,unsigned num_components)332 get_alu_src(struct lp_build_nir_context *bld_base,
333             nir_alu_src src,
334             unsigned num_components)
335 {
336    struct gallivm_state *gallivm = bld_base->base.gallivm;
337    LLVMBuilderRef builder = gallivm->builder;
338    LLVMValueRef value = get_src(bld_base, src.src);
339    bool need_swizzle = false;
340 
341    assert(value);
342 
343    if (is_aos(bld_base))
344       return value;
345 
346    unsigned src_components = nir_src_num_components(src.src);
347    for (unsigned i = 0; i < num_components; ++i) {
348       assert(src.swizzle[i] < src_components);
349       if (src.swizzle[i] != i)
350          need_swizzle = true;
351    }
352 
353    if (need_swizzle || num_components != src_components) {
354       if (src_components > 1 && num_components == 1) {
355          value = LLVMBuildExtractValue(gallivm->builder, value,
356                                        src.swizzle[0], "");
357       } else if (src_components == 1 && num_components > 1) {
358          LLVMValueRef values[] = {value, value, value, value,
359                                   value, value, value, value,
360                                   value, value, value, value,
361                                   value, value, value, value};
362          value = lp_nir_array_build_gather_values(builder, values, num_components);
363       } else {
364          LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
365          for (unsigned i = 0; i < num_components; i++)
366             arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
367          value = arr;
368       }
369    }
370    assert(!src.negate);
371    assert(!src.abs);
372    return value;
373 }
374 
375 
376 static LLVMValueRef
emit_b2f(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)377 emit_b2f(struct lp_build_nir_context *bld_base,
378          LLVMValueRef src0,
379          unsigned bitsize)
380 {
381    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
382    LLVMValueRef result =
383       LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
384                    LLVMBuildBitCast(builder,
385                                     lp_build_const_vec(bld_base->base.gallivm,
386                                                        bld_base->base.type,
387                                                        1.0),
388                                     bld_base->int_bld.vec_type, ""),
389                    "");
390    result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
391    switch (bitsize) {
392    case 16:
393       result = LLVMBuildFPTrunc(builder, result,
394                                 bld_base->half_bld.vec_type, "");
395       break;
396    case 32:
397       break;
398    case 64:
399       result = LLVMBuildFPExt(builder, result,
400                               bld_base->dbl_bld.vec_type, "");
401       break;
402    default:
403       unreachable("unsupported bit size.");
404    }
405    return result;
406 }
407 
408 
409 static LLVMValueRef
emit_b2i(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)410 emit_b2i(struct lp_build_nir_context *bld_base,
411          LLVMValueRef src0,
412          unsigned bitsize)
413 {
414    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
415    LLVMValueRef result = LLVMBuildAnd(builder,
416                           cast_type(bld_base, src0, nir_type_int, 32),
417                           lp_build_const_int_vec(bld_base->base.gallivm,
418                                                  bld_base->base.type, 1), "");
419    switch (bitsize) {
420    case 8:
421       return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
422    case 16:
423       return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
424    case 32:
425       return result;
426    case 64:
427       return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
428    default:
429       unreachable("unsupported bit size.");
430    }
431 }
432 
433 
434 static LLVMValueRef
emit_b32csel(struct lp_build_nir_context * bld_base,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])435 emit_b32csel(struct lp_build_nir_context *bld_base,
436              unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
437              LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
438 {
439    LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
440    LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
441    struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
442    return lp_build_select(bld, v, src[1], src[2]);
443 }
444 
445 
446 static LLVMValueRef
split_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)447 split_64bit(struct lp_build_nir_context *bld_base,
448             LLVMValueRef src,
449             bool hi)
450 {
451    struct gallivm_state *gallivm = bld_base->base.gallivm;
452    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
453    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
454    int len = bld_base->base.type.length * 2;
455    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
456 #if UTIL_ARCH_LITTLE_ENDIAN
457       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
458       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
459 #else
460       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
461       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
462 #endif
463    }
464 
465    src = LLVMBuildBitCast(gallivm->builder, src,
466            LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
467    return LLVMBuildShuffleVector(gallivm->builder, src,
468                                  LLVMGetUndef(LLVMTypeOf(src)),
469                                  LLVMConstVector(hi ? shuffles2 : shuffles,
470                                                  bld_base->base.type.length),
471                                  "");
472 }
473 
474 
475 static LLVMValueRef
merge_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)476 merge_64bit(struct lp_build_nir_context *bld_base,
477             LLVMValueRef input,
478             LLVMValueRef input2)
479 {
480    struct gallivm_state *gallivm = bld_base->base.gallivm;
481    LLVMBuilderRef builder = gallivm->builder;
482    int i;
483    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
484    int len = bld_base->base.type.length * 2;
485    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
486 
487    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
488 #if UTIL_ARCH_LITTLE_ENDIAN
489       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
490       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
491 #else
492       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
493       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
494 #endif
495    }
496    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
497 }
498 
499 
500 static LLVMValueRef
split_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)501 split_16bit(struct lp_build_nir_context *bld_base,
502             LLVMValueRef src,
503             bool hi)
504 {
505    struct gallivm_state *gallivm = bld_base->base.gallivm;
506    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
507    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
508    int len = bld_base->base.type.length * 2;
509    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
510 #if UTIL_ARCH_LITTLE_ENDIAN
511       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
512       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
513 #else
514       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
515       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
516 #endif
517    }
518 
519    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
520    return LLVMBuildShuffleVector(gallivm->builder, src,
521                                  LLVMGetUndef(LLVMTypeOf(src)),
522                                  LLVMConstVector(hi ? shuffles2 : shuffles,
523                                                  bld_base->base.type.length),
524                                  "");
525 }
526 
527 
528 static LLVMValueRef
merge_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)529 merge_16bit(struct lp_build_nir_context *bld_base,
530             LLVMValueRef input,
531             LLVMValueRef input2)
532 {
533    struct gallivm_state *gallivm = bld_base->base.gallivm;
534    LLVMBuilderRef builder = gallivm->builder;
535    int i;
536    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
537    int len = bld_base->int16_bld.type.length * 2;
538    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
539 
540    for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
541 #if UTIL_ARCH_LITTLE_ENDIAN
542       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
543       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
544 #else
545       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
546       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
547 #endif
548    }
549    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
550 }
551 
552 
553 static LLVMValueRef
get_signed_divisor(struct gallivm_state * gallivm,struct lp_build_context * int_bld,struct lp_build_context * mask_bld,int src_bit_size,LLVMValueRef src,LLVMValueRef divisor)554 get_signed_divisor(struct gallivm_state *gallivm,
555                    struct lp_build_context *int_bld,
556                    struct lp_build_context *mask_bld,
557                    int src_bit_size,
558                    LLVMValueRef src, LLVMValueRef divisor)
559 {
560    LLVMBuilderRef builder = gallivm->builder;
561    /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
562       and divisor is -1. */
563    /* set mask if numerator == INT_MIN */
564    long long min_val;
565    switch (src_bit_size) {
566    case 8:
567       min_val = INT8_MIN;
568       break;
569    case 16:
570       min_val = INT16_MIN;
571       break;
572    default:
573    case 32:
574       min_val = INT_MIN;
575       break;
576    case 64:
577       min_val = INT64_MIN;
578       break;
579    }
580    LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
581                                          lp_build_const_int_vec(gallivm, int_bld->type, min_val));
582    /* set another mask if divisor is - 1 */
583    LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
584                                          lp_build_const_int_vec(gallivm, int_bld->type, -1));
585    div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
586 
587    divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
588    return divisor;
589 }
590 
591 
592 static LLVMValueRef
do_int_divide(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)593 do_int_divide(struct lp_build_nir_context *bld_base,
594               bool is_unsigned, unsigned src_bit_size,
595               LLVMValueRef src, LLVMValueRef src2)
596 {
597    struct gallivm_state *gallivm = bld_base->base.gallivm;
598    LLVMBuilderRef builder = gallivm->builder;
599    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
600    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
601 
602    /* avoid divide by 0. Converted divisor from 0 to -1 */
603    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
604                                         mask_bld->zero);
605 
606    LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
607    if (!is_unsigned) {
608       divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
609                                    src_bit_size, src, divisor);
610    }
611    LLVMValueRef result = lp_build_div(int_bld, src, divisor);
612 
613    if (!is_unsigned) {
614       LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
615       return LLVMBuildAnd(builder, not_div_mask, result, "");
616    } else
617       /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
618        * may as well do same for idiv */
619       return LLVMBuildOr(builder, div_mask, result, "");
620 }
621 
622 
623 static LLVMValueRef
do_int_mod(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)624 do_int_mod(struct lp_build_nir_context *bld_base,
625            bool is_unsigned, unsigned src_bit_size,
626            LLVMValueRef src, LLVMValueRef src2)
627 {
628    struct gallivm_state *gallivm = bld_base->base.gallivm;
629    LLVMBuilderRef builder = gallivm->builder;
630    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
631    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
632    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
633                                         mask_bld->zero);
634    LLVMValueRef divisor = LLVMBuildOr(builder,
635                                       div_mask,
636                                       src2, "");
637    if (!is_unsigned) {
638       divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
639                                    src_bit_size, src, divisor);
640    }
641    LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
642    return LLVMBuildOr(builder, div_mask, result, "");
643 }
644 
645 
646 static LLVMValueRef
do_quantize_to_f16(struct lp_build_nir_context * bld_base,LLVMValueRef src)647 do_quantize_to_f16(struct lp_build_nir_context *bld_base,
648                    LLVMValueRef src)
649 {
650    struct gallivm_state *gallivm = bld_base->base.gallivm;
651    LLVMBuilderRef builder = gallivm->builder;
652    LLVMValueRef result, cond, cond2, temp;
653 
654    result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
655    result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
656 
657    temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
658    cond = LLVMBuildFCmp(builder, LLVMRealOGT,
659                         LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
660                         temp, "");
661    cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
662    cond = LLVMBuildAnd(builder, cond, cond2, "");
663    result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
664    return result;
665 }
666 
667 
668 static LLVMValueRef
do_alu_action(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])669 do_alu_action(struct lp_build_nir_context *bld_base,
670               const nir_alu_instr *instr,
671               unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
672               LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
673 {
674    struct gallivm_state *gallivm = bld_base->base.gallivm;
675    LLVMBuilderRef builder = gallivm->builder;
676    LLVMValueRef result;
677 
678    switch (instr->op) {
679    case nir_op_b2f16:
680       result = emit_b2f(bld_base, src[0], 16);
681       break;
682    case nir_op_b2f32:
683       result = emit_b2f(bld_base, src[0], 32);
684       break;
685    case nir_op_b2f64:
686       result = emit_b2f(bld_base, src[0], 64);
687       break;
688    case nir_op_b2i8:
689       result = emit_b2i(bld_base, src[0], 8);
690       break;
691    case nir_op_b2i16:
692       result = emit_b2i(bld_base, src[0], 16);
693       break;
694    case nir_op_b2i32:
695       result = emit_b2i(bld_base, src[0], 32);
696       break;
697    case nir_op_b2i64:
698       result = emit_b2i(bld_base, src[0], 64);
699       break;
700    case nir_op_b32csel:
701       result = emit_b32csel(bld_base, src_bit_size, src);
702       break;
703    case nir_op_bit_count:
704       result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
705       if (src_bit_size[0] < 32)
706          result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
707       else if (src_bit_size[0] > 32)
708          result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
709       break;
710    case nir_op_bitfield_select:
711       result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
712       break;
713    case nir_op_bitfield_reverse:
714       result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
715       break;
716    case nir_op_f2b32:
717       result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
718       break;
719    case nir_op_f2f16:
720       if (src_bit_size[0] == 64)
721          src[0] = LLVMBuildFPTrunc(builder, src[0],
722                                    bld_base->base.vec_type, "");
723       result = LLVMBuildFPTrunc(builder, src[0],
724                                 bld_base->half_bld.vec_type, "");
725       break;
726    case nir_op_f2f32:
727       if (src_bit_size[0] < 32)
728          result = LLVMBuildFPExt(builder, src[0],
729                                  bld_base->base.vec_type, "");
730       else
731          result = LLVMBuildFPTrunc(builder, src[0],
732                                    bld_base->base.vec_type, "");
733       break;
734    case nir_op_f2f64:
735       result = LLVMBuildFPExt(builder, src[0],
736                               bld_base->dbl_bld.vec_type, "");
737       break;
738    case nir_op_f2i8:
739       result = LLVMBuildFPToSI(builder,
740                                src[0],
741                                bld_base->uint8_bld.vec_type, "");
742       break;
743    case nir_op_f2i16:
744       result = LLVMBuildFPToSI(builder,
745                                src[0],
746                                bld_base->uint16_bld.vec_type, "");
747       break;
748    case nir_op_f2i32:
749       result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
750       break;
751    case nir_op_f2u8:
752       result = LLVMBuildFPToUI(builder,
753                                src[0],
754                                bld_base->uint8_bld.vec_type, "");
755       break;
756    case nir_op_f2u16:
757       result = LLVMBuildFPToUI(builder,
758                                src[0],
759                                bld_base->uint16_bld.vec_type, "");
760       break;
761    case nir_op_f2u32:
762       result = LLVMBuildFPToUI(builder,
763                                src[0],
764                                bld_base->base.int_vec_type, "");
765       break;
766    case nir_op_f2i64:
767       result = LLVMBuildFPToSI(builder,
768                                src[0],
769                                bld_base->int64_bld.vec_type, "");
770       break;
771    case nir_op_f2u64:
772       result = LLVMBuildFPToUI(builder,
773                                src[0],
774                                bld_base->uint64_bld.vec_type, "");
775       break;
776    case nir_op_fabs:
777       result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
778       break;
779    case nir_op_fadd:
780       result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
781                             src[0], src[1]);
782       break;
783    case nir_op_fceil:
784       result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
785       break;
786    case nir_op_fcos:
787       result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
788       break;
789    case nir_op_fddx:
790    case nir_op_fddx_coarse:
791    case nir_op_fddx_fine:
792       result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
793       break;
794    case nir_op_fddy:
795    case nir_op_fddy_coarse:
796    case nir_op_fddy_fine:
797       result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
798       break;
799    case nir_op_fdiv:
800       result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
801                             src[0], src[1]);
802       break;
803    case nir_op_feq32:
804       result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
805       break;
806    case nir_op_fexp2:
807       result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
808       break;
809    case nir_op_ffloor:
810       result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
811       break;
812    case nir_op_ffma:
813       result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
814       break;
815    case nir_op_ffract: {
816       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
817       LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
818       result = lp_build_sub(flt_bld, src[0], tmp);
819       break;
820    }
821    case nir_op_fge:
822    case nir_op_fge32:
823       result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
824       break;
825    case nir_op_find_lsb: {
826       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
827       result = lp_build_cttz(int_bld, src[0]);
828       if (src_bit_size[0] < 32)
829          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
830       else if (src_bit_size[0] > 32)
831          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
832       break;
833    }
834    case nir_op_fisfinite32:
835       unreachable("Should have been lowered in nir_opt_algebraic_late.");
836    case nir_op_flog2:
837       result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
838       break;
839    case nir_op_flt:
840    case nir_op_flt32:
841       result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
842       break;
843    case nir_op_fmax:
844    case nir_op_fmin: {
845       enum gallivm_nan_behavior minmax_nan;
846       int first = 0;
847 
848       /* If one of the sources is known to be a number (i.e., not NaN), then
849        * better code can be generated by passing that information along.
850        */
851       if (is_a_number(bld_base->range_ht, instr, 1,
852                       0 /* unused num_components */,
853                       NULL /* unused swizzle */)) {
854          minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
855       } else if (is_a_number(bld_base->range_ht, instr, 0,
856                              0 /* unused num_components */,
857                              NULL /* unused swizzle */)) {
858          first = 1;
859          minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
860       } else {
861          minmax_nan = GALLIVM_NAN_RETURN_OTHER;
862       }
863 
864       if (instr->op == nir_op_fmin) {
865          result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
866                                    src[first], src[1 - first], minmax_nan);
867       } else {
868          result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
869                                    src[first], src[1 - first], minmax_nan);
870       }
871       break;
872    }
873    case nir_op_fmod: {
874       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
875       result = lp_build_div(flt_bld, src[0], src[1]);
876       result = lp_build_floor(flt_bld, result);
877       result = lp_build_mul(flt_bld, src[1], result);
878       result = lp_build_sub(flt_bld, src[0], result);
879       break;
880    }
881    case nir_op_fmul:
882       result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
883                             src[0], src[1]);
884       break;
885    case nir_op_fneu32:
886       result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
887       break;
888    case nir_op_fneg:
889       result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
890       break;
891    case nir_op_fpow:
892       result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
893       break;
894    case nir_op_fquantize2f16:
895       result = do_quantize_to_f16(bld_base, src[0]);
896       break;
897    case nir_op_frcp:
898       result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
899       break;
900    case nir_op_fround_even:
901       if (src_bit_size[0] == 16) {
902          struct lp_build_context *bld = get_flt_bld(bld_base, 16);
903          char intrinsic[64];
904          lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
905          result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
906       } else {
907          result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
908       }
909       break;
910    case nir_op_frsq:
911       result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
912       break;
913    case nir_op_fsat:
914       result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
915       break;
916    case nir_op_fsign:
917       result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
918       break;
919    case nir_op_fsin:
920       result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
921       break;
922    case nir_op_fsqrt:
923       result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
924       break;
925    case nir_op_ftrunc:
926       result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
927       break;
928    case nir_op_i2b32:
929       result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
930       break;
931    case nir_op_i2f16:
932       result = LLVMBuildSIToFP(builder, src[0],
933                                bld_base->half_bld.vec_type, "");
934       break;
935    case nir_op_i2f32:
936       result = lp_build_int_to_float(&bld_base->base, src[0]);
937       break;
938    case nir_op_i2f64:
939       result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
940       break;
941    case nir_op_i2i8:
942       result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
943       break;
944    case nir_op_i2i16:
945       if (src_bit_size[0] < 16)
946          result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
947       else
948          result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
949       break;
950    case nir_op_i2i32:
951       if (src_bit_size[0] < 32)
952          result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
953       else
954          result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
955       break;
956    case nir_op_i2i64:
957       result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
958       break;
959    case nir_op_iabs:
960       result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
961       break;
962    case nir_op_iadd:
963       result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
964                             src[0], src[1]);
965       break;
966    case nir_op_iand:
967       result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
968                             src[0], src[1]);
969       break;
970    case nir_op_idiv:
971       result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
972       break;
973    case nir_op_ieq32:
974       result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
975       break;
976    case nir_op_ige32:
977       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
978       break;
979    case nir_op_ilt32:
980       result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
981       break;
982    case nir_op_imax:
983       result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
984       break;
985    case nir_op_imin:
986       result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
987       break;
988    case nir_op_imul:
989    case nir_op_imul24:
990       result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
991                             src[0], src[1]);
992       break;
993    case nir_op_imul_high: {
994       LLVMValueRef hi_bits;
995       lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
996       result = hi_bits;
997       break;
998    }
999    case nir_op_ine32:
1000       result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
1001       break;
1002    case nir_op_ineg:
1003       result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1004       break;
1005    case nir_op_inot:
1006       result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1007       break;
1008    case nir_op_ior:
1009       result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
1010                            src[0], src[1]);
1011       break;
1012    case nir_op_imod:
1013    case nir_op_irem:
1014       result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
1015       break;
1016    case nir_op_ishl: {
1017       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1018       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
1019       if (src_bit_size[0] == 64)
1020          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1021       if (src_bit_size[0] < 32)
1022          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1023       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1024       result = lp_build_shl(int_bld, src[0], src[1]);
1025       break;
1026    }
1027    case nir_op_ishr: {
1028       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1029       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
1030       if (src_bit_size[0] == 64)
1031          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1032       if (src_bit_size[0] < 32)
1033          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1034       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1035       result = lp_build_shr(int_bld, src[0], src[1]);
1036       break;
1037    }
1038    case nir_op_isign:
1039       result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1040       break;
1041    case nir_op_isub:
1042       result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
1043                             src[0], src[1]);
1044       break;
1045    case nir_op_ixor:
1046       result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
1047                             src[0], src[1]);
1048       break;
1049    case nir_op_mov:
1050       result = src[0];
1051       break;
1052    case nir_op_unpack_64_2x32_split_x:
1053       result = split_64bit(bld_base, src[0], false);
1054       break;
1055    case nir_op_unpack_64_2x32_split_y:
1056       result = split_64bit(bld_base, src[0], true);
1057       break;
1058 
1059    case nir_op_pack_32_2x16_split: {
1060       LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
1061       result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
1062       break;
1063    }
1064    case nir_op_unpack_32_2x16_split_x:
1065       result = split_16bit(bld_base, src[0], false);
1066       break;
1067    case nir_op_unpack_32_2x16_split_y:
1068       result = split_16bit(bld_base, src[0], true);
1069       break;
1070    case nir_op_pack_64_2x32_split: {
1071       LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
1072       result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
1073       break;
1074    }
1075    case nir_op_pack_32_4x8_split: {
1076       LLVMValueRef tmp1 = merge_16bit(bld_base, src[0], src[1]);
1077       LLVMValueRef tmp2 = merge_16bit(bld_base, src[2], src[3]);
1078       tmp1 = LLVMBuildBitCast(builder, tmp1, bld_base->uint16_bld.vec_type, "");
1079       tmp2 = LLVMBuildBitCast(builder, tmp2, bld_base->uint16_bld.vec_type, "");
1080       LLVMValueRef tmp = merge_16bit(bld_base, tmp1, tmp2);
1081       result = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.vec_type, "");
1082       break;
1083    }
1084    case nir_op_u2f16:
1085       result = LLVMBuildUIToFP(builder, src[0],
1086                                bld_base->half_bld.vec_type, "");
1087       break;
1088    case nir_op_u2f32:
1089       result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
1090       break;
1091    case nir_op_u2f64:
1092       result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
1093       break;
1094    case nir_op_u2u8:
1095       result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
1096       break;
1097    case nir_op_u2u16:
1098       if (src_bit_size[0] < 16)
1099          result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
1100       else
1101          result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
1102       break;
1103    case nir_op_u2u32:
1104       if (src_bit_size[0] < 32)
1105          result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
1106       else
1107          result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
1108       break;
1109    case nir_op_u2u64:
1110       result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
1111       break;
1112    case nir_op_udiv:
1113       result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
1114       break;
1115    case nir_op_ufind_msb: {
1116       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1117       result = lp_build_ctlz(uint_bld, src[0]);
1118       result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
1119       if (src_bit_size[0] < 32)
1120          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
1121       else
1122          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
1123       break;
1124    }
1125    case nir_op_uge32:
1126       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
1127       break;
1128    case nir_op_ult32:
1129       result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
1130       break;
1131    case nir_op_umax:
1132       result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1133       break;
1134    case nir_op_umin:
1135       result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1136       break;
1137    case nir_op_umod:
1138       result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
1139       break;
1140    case nir_op_umul_high: {
1141       LLVMValueRef hi_bits;
1142       lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
1143       result = hi_bits;
1144       break;
1145    }
1146    case nir_op_ushr: {
1147       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1148       if (src_bit_size[0] == 64)
1149          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1150       if (src_bit_size[0] < 32)
1151          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1152       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1153       result = lp_build_shr(uint_bld, src[0], src[1]);
1154       break;
1155    }
1156    case nir_op_bcsel: {
1157       LLVMTypeRef src1_type = LLVMTypeOf(src[1]);
1158       LLVMTypeRef src2_type = LLVMTypeOf(src[2]);
1159 
1160       if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
1161           LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
1162          src[2] = LLVMBuildIntToPtr(builder, src[2], src1_type, "");
1163       } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
1164                  LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
1165          src[1] = LLVMBuildIntToPtr(builder, src[1], src2_type, "");
1166       }
1167 
1168       for (int i = 1; i <= 2; i++) {
1169          LLVMTypeRef type = LLVMTypeOf(src[i]);
1170          if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
1171             break;
1172          src[i] = LLVMBuildBitCast(builder, src[i], get_int_bld(bld_base, true, src_bit_size[i])->vec_type, "");
1173       }
1174       return LLVMBuildSelect(builder, src[0], src[1], src[2], "");
1175    }
1176    default:
1177       assert(0);
1178       break;
1179    }
1180    return result;
1181 }
1182 
1183 
1184 static void
visit_alu(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr)1185 visit_alu(struct lp_build_nir_context *bld_base,
1186           const nir_alu_instr *instr)
1187 {
1188    struct gallivm_state *gallivm = bld_base->base.gallivm;
1189    LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
1190    unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
1191    const unsigned num_components = nir_dest_num_components(instr->dest.dest);
1192    unsigned src_components;
1193 
1194    switch (instr->op) {
1195    case nir_op_vec2:
1196    case nir_op_vec3:
1197    case nir_op_vec4:
1198    case nir_op_vec8:
1199    case nir_op_vec16:
1200       src_components = 1;
1201       break;
1202    case nir_op_pack_half_2x16:
1203       src_components = 2;
1204       break;
1205    case nir_op_unpack_half_2x16:
1206       src_components = 1;
1207       break;
1208    case nir_op_cube_face_coord_amd:
1209    case nir_op_cube_face_index_amd:
1210       src_components = 3;
1211       break;
1212    case nir_op_fsum2:
1213    case nir_op_fsum3:
1214    case nir_op_fsum4:
1215       src_components = nir_op_infos[instr->op].input_sizes[0];
1216       break;
1217    default:
1218       src_components = num_components;
1219       break;
1220    }
1221 
1222    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1223       src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1224       src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1225    }
1226 
1227    if (instr->op == nir_op_mov &&
1228        is_aos(bld_base) &&
1229        !instr->dest.dest.is_ssa) {
1230       for (unsigned i = 0; i < 4; i++) {
1231          if (instr->dest.write_mask & (1 << i)) {
1232             assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src);
1233          }
1234       }
1235       return;
1236    }
1237 
1238    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1239    if (instr->op == nir_op_vec4 ||
1240        instr->op == nir_op_vec3 ||
1241        instr->op == nir_op_vec2 ||
1242        instr->op == nir_op_vec8 ||
1243        instr->op == nir_op_vec16) {
1244       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1245          result[i] = cast_type(bld_base, src[i],
1246                                nir_op_infos[instr->op].input_types[i],
1247                                src_bit_size[i]);
1248       }
1249    } else if (instr->op == nir_op_fsum4 ||
1250               instr->op == nir_op_fsum3 ||
1251               instr->op == nir_op_fsum2) {
1252       for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1253          LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1254                                                           src[0], c, "");
1255          temp_chan = cast_type(bld_base, temp_chan,
1256                                nir_op_infos[instr->op].input_types[0],
1257                                src_bit_size[0]);
1258          result[0] = (c == 0) ? temp_chan
1259             : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
1260                            result[0], temp_chan);
1261       }
1262    } else if (is_aos(bld_base)) {
1263       if (instr->op == nir_op_fmul) {
1264          if (LLVMIsConstant(src[0])) {
1265             src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
1266          }
1267          if (LLVMIsConstant(src[1])) {
1268             src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
1269          }
1270       }
1271       result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
1272    } else {
1273       /* Loop for R,G,B,A channels */
1274       for (unsigned c = 0; c < num_components; c++) {
1275          LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1276 
1277          /* Loop over instruction operands */
1278          for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1279             if (num_components > 1) {
1280                src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1281                                                      src[i], c, "");
1282             } else {
1283                src_chan[i] = src[i];
1284             }
1285             src_chan[i] = cast_type(bld_base, src_chan[i],
1286                                     nir_op_infos[instr->op].input_types[i],
1287                                     src_bit_size[i]);
1288          }
1289          result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1290          result[c] = cast_type(bld_base, result[c],
1291                                nir_op_infos[instr->op].output_type,
1292                                nir_dest_bit_size(instr->dest.dest));
1293       }
1294    }
1295    assign_alu_dest(bld_base, &instr->dest, result);
1296 }
1297 
1298 
1299 static void
visit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr)1300 visit_load_const(struct lp_build_nir_context *bld_base,
1301                  const nir_load_const_instr *instr)
1302 {
1303    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1304    bld_base->load_const(bld_base, instr, result);
1305    assign_ssa_dest(bld_base, &instr->def, result);
1306 }
1307 
1308 
1309 static void
get_deref_offset(struct lp_build_nir_context * bld_base,nir_deref_instr * instr,bool vs_in,unsigned * vertex_index_out,LLVMValueRef * vertex_index_ref,unsigned * const_out,LLVMValueRef * indir_out)1310 get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1311                  bool vs_in, unsigned *vertex_index_out,
1312                  LLVMValueRef *vertex_index_ref,
1313                  unsigned *const_out, LLVMValueRef *indir_out)
1314 {
1315    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1316    nir_variable *var = nir_deref_instr_get_variable(instr);
1317    nir_deref_path path;
1318    unsigned idx_lvl = 1;
1319 
1320    nir_deref_path_init(&path, instr, NULL);
1321 
1322    if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1323       if (vertex_index_ref) {
1324          *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1325          if (vertex_index_out)
1326             *vertex_index_out = 0;
1327       } else {
1328          *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1329       }
1330       ++idx_lvl;
1331    }
1332 
1333    uint32_t const_offset = 0;
1334    LLVMValueRef offset = NULL;
1335 
1336    if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1337       assert(instr->deref_type == nir_deref_type_array);
1338       const_offset = nir_src_as_uint(instr->arr.index);
1339       goto out;
1340    }
1341 
1342    for (; path.path[idx_lvl]; ++idx_lvl) {
1343       const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1344       if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1345          unsigned index = path.path[idx_lvl]->strct.index;
1346 
1347          for (unsigned i = 0; i < index; i++) {
1348             const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1349             const_offset += glsl_count_attribute_slots(ft, vs_in);
1350          }
1351       } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1352          unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1353          if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1354            const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1355          } else {
1356            LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1357            idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1358            LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1359                                                idx_src);
1360            if (offset)
1361              offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1362            else
1363              offset = array_off;
1364          }
1365       } else
1366          unreachable("Uhandled deref type in get_deref_instr_offset");
1367    }
1368 
1369 out:
1370    nir_deref_path_finish(&path);
1371 
1372    if (const_offset && offset)
1373       offset = LLVMBuildAdd(builder, offset,
1374                             lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1375                             "");
1376    *const_out = const_offset;
1377    *indir_out = offset;
1378 }
1379 
1380 
1381 static void
visit_load_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1382 visit_load_input(struct lp_build_nir_context *bld_base,
1383                  nir_intrinsic_instr *instr,
1384                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1385 {
1386    nir_variable var = {0};
1387    var.data.location = nir_intrinsic_io_semantics(instr).location;
1388    var.data.driver_location = nir_intrinsic_base(instr);
1389    var.data.location_frac = nir_intrinsic_component(instr);
1390 
1391    unsigned nc = nir_dest_num_components(instr->dest);
1392    unsigned bit_size = nir_dest_bit_size(instr->dest);
1393 
1394    nir_src offset = *nir_get_io_offset_src(instr);
1395    bool indirect = !nir_src_is_const(offset);
1396    if (!indirect)
1397       assert(nir_src_as_uint(offset) == 0);
1398    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1399 
1400    bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1401 }
1402 
1403 
1404 static void
visit_store_output(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1405 visit_store_output(struct lp_build_nir_context *bld_base,
1406                    nir_intrinsic_instr *instr)
1407 {
1408    nir_variable var = {0};
1409    var.data.location = nir_intrinsic_io_semantics(instr).location;
1410    var.data.driver_location = nir_intrinsic_base(instr);
1411    var.data.location_frac = nir_intrinsic_component(instr);
1412 
1413    unsigned mask = nir_intrinsic_write_mask(instr);
1414 
1415    unsigned bit_size = nir_src_bit_size(instr->src[0]);
1416    LLVMValueRef src = get_src(bld_base, instr->src[0]);
1417 
1418    nir_src offset = *nir_get_io_offset_src(instr);
1419    bool indirect = !nir_src_is_const(offset);
1420    if (!indirect)
1421       assert(nir_src_as_uint(offset) == 0);
1422    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1423 
1424    if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1425       src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1426                                   src, 0, "");
1427    }
1428 
1429    bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1430                        bit_size, &var, mask, NULL, 0, indir_index, src);
1431 }
1432 
1433 
1434 static void
visit_load_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1435 visit_load_var(struct lp_build_nir_context *bld_base,
1436                nir_intrinsic_instr *instr,
1437                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1438 {
1439    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1440    nir_variable *var = nir_deref_instr_get_variable(deref);
1441    assert(util_bitcount(deref->modes) == 1);
1442    nir_variable_mode mode = deref->modes;
1443    unsigned const_index;
1444    LLVMValueRef indir_index;
1445    LLVMValueRef indir_vertex_index = NULL;
1446    unsigned vertex_index = 0;
1447    unsigned nc = nir_dest_num_components(instr->dest);
1448    unsigned bit_size = nir_dest_bit_size(instr->dest);
1449    if (var) {
1450       bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1451          var->data.mode == nir_var_shader_in;
1452       bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1453          var->data.mode == nir_var_shader_in;
1454       bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1455          var->data.mode == nir_var_shader_in;
1456       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1457          var->data.mode == nir_var_shader_out && !var->data.patch;
1458       bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1459          var->data.mode == nir_var_shader_in && !var->data.patch;
1460 
1461       mode = var->data.mode;
1462 
1463       get_deref_offset(bld_base, deref, vs_in,
1464                    gs_in ? &vertex_index : NULL,
1465                    (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1466                    &const_index, &indir_index);
1467    }
1468    bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index,
1469                       indir_vertex_index, const_index, indir_index, result);
1470 }
1471 
1472 
1473 static void
visit_store_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1474 visit_store_var(struct lp_build_nir_context *bld_base,
1475                 nir_intrinsic_instr *instr)
1476 {
1477    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1478    nir_variable *var = nir_deref_instr_get_variable(deref);
1479    assert(util_bitcount(deref->modes) == 1);
1480    nir_variable_mode mode = deref->modes;
1481    int writemask = instr->const_index[0];
1482    unsigned bit_size = nir_src_bit_size(instr->src[1]);
1483    LLVMValueRef src = get_src(bld_base, instr->src[1]);
1484    unsigned const_index = 0;
1485    LLVMValueRef indir_index, indir_vertex_index = NULL;
1486    if (var) {
1487       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1488          var->data.mode == nir_var_shader_out && !var->data.patch;
1489       get_deref_offset(bld_base, deref, false, NULL,
1490                        tcs_out ? &indir_vertex_index : NULL,
1491                        &const_index, &indir_index);
1492    }
1493    bld_base->store_var(bld_base, mode, instr->num_components, bit_size,
1494                        var, writemask, indir_vertex_index, const_index,
1495                        indir_index, src);
1496 }
1497 
1498 
1499 static void
visit_load_ubo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1500 visit_load_ubo(struct lp_build_nir_context *bld_base,
1501                nir_intrinsic_instr *instr,
1502                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1503 {
1504    struct gallivm_state *gallivm = bld_base->base.gallivm;
1505    LLVMBuilderRef builder = gallivm->builder;
1506    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1507    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1508 
1509    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1510    idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1511    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest),
1512                       nir_dest_bit_size(instr->dest),
1513                       offset_is_uniform, idx, offset, result);
1514 }
1515 
1516 
1517 static void
visit_load_push_constant(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[4])1518 visit_load_push_constant(struct lp_build_nir_context *bld_base,
1519                          nir_intrinsic_instr *instr,
1520                          LLVMValueRef result[4])
1521 {
1522    struct gallivm_state *gallivm = bld_base->base.gallivm;
1523    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1524    LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1525    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1526 
1527    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest),
1528                       nir_dest_bit_size(instr->dest),
1529                       offset_is_uniform, idx, offset, result);
1530 }
1531 
1532 
1533 static void
visit_load_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1534 visit_load_ssbo(struct lp_build_nir_context *bld_base,
1535                 nir_intrinsic_instr *instr,
1536                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1537 {
1538    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1539    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1540    bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[0]) && nir_src_is_always_uniform(instr->src[1]);
1541    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1542                       index_and_offset_are_uniform, idx, offset, result);
1543 }
1544 
1545 
1546 static void
visit_store_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1547 visit_store_ssbo(struct lp_build_nir_context *bld_base,
1548                  nir_intrinsic_instr *instr)
1549 {
1550    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1551    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
1552    LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1553    bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[1]) && nir_src_is_always_uniform(instr->src[2]);
1554    int writemask = instr->const_index[0];
1555    int nc = nir_src_num_components(instr->src[0]);
1556    int bitsize = nir_src_bit_size(instr->src[0]);
1557    bld_base->store_mem(bld_base, writemask, nc, bitsize, index_and_offset_are_uniform, idx, offset, val);
1558 }
1559 
1560 
1561 static void
visit_get_ssbo_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1562 visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1563                     nir_intrinsic_instr *instr,
1564                     LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1565 {
1566    LLVMValueRef idx = cast_type(bld_base,
1567                                 get_src(bld_base, instr->src[0]),
1568                                 nir_type_uint, 32);
1569    result[0] = bld_base->get_ssbo_size(bld_base, idx);
1570 }
1571 
1572 
1573 static void
visit_ssbo_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1574 visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1575                   nir_intrinsic_instr *instr,
1576                   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1577 {
1578    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]),
1579                                 nir_type_uint, 32);
1580    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1581    LLVMValueRef val = get_src(bld_base, instr->src[2]);
1582    LLVMValueRef val2 = NULL;
1583    int bitsize = nir_src_bit_size(instr->src[2]);
1584    if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1585       val2 = get_src(bld_base, instr->src[3]);
1586 
1587    bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx,
1588                         offset, val, val2, &result[0]);
1589 }
1590 
1591 
1592 static void
visit_load_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1593 visit_load_image(struct lp_build_nir_context *bld_base,
1594                  nir_intrinsic_instr *instr,
1595                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1596 {
1597    struct gallivm_state *gallivm = bld_base->base.gallivm;
1598    LLVMBuilderRef builder = gallivm->builder;
1599    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1600    nir_variable *var = nir_deref_instr_get_variable(deref);
1601    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1602    LLVMValueRef coords[5];
1603    struct lp_img_params params;
1604    const struct glsl_type *type = glsl_without_array(var->type);
1605    unsigned const_index;
1606    LLVMValueRef indir_index;
1607    get_deref_offset(bld_base, deref, false, NULL, NULL,
1608                     &const_index, &indir_index);
1609 
1610    memset(&params, 0, sizeof(params));
1611    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1612                                         glsl_sampler_type_is_array(type));
1613    for (unsigned i = 0; i < 4; i++)
1614       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1615    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1616       coords[2] = coords[1];
1617 
1618    params.coords = coords;
1619    params.outdata = result;
1620    params.img_op = LP_IMG_LOAD;
1621    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ||
1622        glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS) {
1623       params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]),
1624                                   nir_type_uint, 32);
1625    }
1626    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1627    params.image_index_offset = indir_index;
1628    bld_base->image_op(bld_base, &params);
1629 }
1630 
1631 
1632 static void
visit_store_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1633 visit_store_image(struct lp_build_nir_context *bld_base,
1634                   nir_intrinsic_instr *instr)
1635 {
1636    struct gallivm_state *gallivm = bld_base->base.gallivm;
1637    LLVMBuilderRef builder = gallivm->builder;
1638    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1639    nir_variable *var = nir_deref_instr_get_variable(deref);
1640    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1641    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1642    LLVMValueRef coords[5];
1643    struct lp_img_params params;
1644    const struct glsl_type *type = glsl_without_array(var->type);
1645    unsigned const_index;
1646    LLVMValueRef indir_index;
1647    get_deref_offset(bld_base, deref, false, NULL, NULL,
1648                     &const_index, &indir_index);
1649 
1650    memset(&params, 0, sizeof(params));
1651    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1652    for (unsigned i = 0; i < 4; i++)
1653       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1654    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1655       coords[2] = coords[1];
1656    params.coords = coords;
1657 
1658    for (unsigned i = 0; i < 4; i++) {
1659       params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1660       params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1661    }
1662    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1663       params.ms_index = get_src(bld_base, instr->src[2]);
1664    params.img_op = LP_IMG_STORE;
1665    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1666    params.image_index_offset = indir_index;
1667 
1668    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1669       coords[2] = coords[1];
1670    bld_base->image_op(bld_base, &params);
1671 }
1672 
1673 
1674 static void
visit_atomic_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1675 visit_atomic_image(struct lp_build_nir_context *bld_base,
1676                    nir_intrinsic_instr *instr,
1677                    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1678 {
1679    struct gallivm_state *gallivm = bld_base->base.gallivm;
1680    LLVMBuilderRef builder = gallivm->builder;
1681    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1682    nir_variable *var = nir_deref_instr_get_variable(deref);
1683    struct lp_img_params params;
1684    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1685    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1686    LLVMValueRef coords[5];
1687    const struct glsl_type *type = glsl_without_array(var->type);
1688    unsigned const_index;
1689    LLVMValueRef indir_index;
1690    get_deref_offset(bld_base, deref, false, NULL, NULL,
1691                     &const_index, &indir_index);
1692 
1693    memset(&params, 0, sizeof(params));
1694 
1695    switch (instr->intrinsic) {
1696    case nir_intrinsic_image_deref_atomic_add:
1697       params.op = LLVMAtomicRMWBinOpAdd;
1698       break;
1699    case nir_intrinsic_image_deref_atomic_exchange:
1700       params.op = LLVMAtomicRMWBinOpXchg;
1701       break;
1702    case nir_intrinsic_image_deref_atomic_and:
1703       params.op = LLVMAtomicRMWBinOpAnd;
1704       break;
1705    case nir_intrinsic_image_deref_atomic_or:
1706       params.op = LLVMAtomicRMWBinOpOr;
1707       break;
1708    case nir_intrinsic_image_deref_atomic_xor:
1709       params.op = LLVMAtomicRMWBinOpXor;
1710       break;
1711    case nir_intrinsic_image_deref_atomic_umin:
1712       params.op = LLVMAtomicRMWBinOpUMin;
1713       break;
1714    case nir_intrinsic_image_deref_atomic_umax:
1715       params.op = LLVMAtomicRMWBinOpUMax;
1716       break;
1717    case nir_intrinsic_image_deref_atomic_imin:
1718       params.op = LLVMAtomicRMWBinOpMin;
1719       break;
1720    case nir_intrinsic_image_deref_atomic_imax:
1721       params.op = LLVMAtomicRMWBinOpMax;
1722       break;
1723    default:
1724       break;
1725    }
1726 
1727    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1728                                         glsl_sampler_type_is_array(type));
1729    for (unsigned i = 0; i < 4; i++) {
1730       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1731    }
1732    if (params.target == PIPE_TEXTURE_1D_ARRAY) {
1733       coords[2] = coords[1];
1734    }
1735 
1736    params.coords = coords;
1737 
1738    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1739       params.ms_index = get_src(bld_base, instr->src[2]);
1740    }
1741    if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1742       LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1743       params.indata[0] = in_val;
1744       params.indata2[0] = cas_val;
1745    } else {
1746       params.indata[0] = in_val;
1747    }
1748 
1749    params.outdata = result;
1750    params.img_op =
1751       (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
1752       ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1753    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1754    params.image_index_offset = indir_index;
1755 
1756    bld_base->image_op(bld_base, &params);
1757 }
1758 
1759 
1760 static void
visit_image_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1761 visit_image_size(struct lp_build_nir_context *bld_base,
1762                  nir_intrinsic_instr *instr,
1763                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1764 {
1765    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1766    nir_variable *var = nir_deref_instr_get_variable(deref);
1767    struct lp_sampler_size_query_params params = { 0 };
1768    unsigned const_index;
1769    LLVMValueRef indir_index;
1770    const struct glsl_type *type = glsl_without_array(var->type);
1771    get_deref_offset(bld_base, deref, false, NULL, NULL,
1772                     &const_index, &indir_index);
1773    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1774    params.texture_unit_offset = indir_index;
1775    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1776                                         glsl_sampler_type_is_array(type));
1777    params.sizes_out = result;
1778 
1779    bld_base->image_size(bld_base, &params);
1780 }
1781 
1782 
1783 static void
visit_image_samples(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1784 visit_image_samples(struct lp_build_nir_context *bld_base,
1785                     nir_intrinsic_instr *instr,
1786                     LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1787 {
1788    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1789    nir_variable *var = nir_deref_instr_get_variable(deref);
1790    struct lp_sampler_size_query_params params = { 0 };
1791    unsigned const_index;
1792    LLVMValueRef indir_index;
1793    const struct glsl_type *type = glsl_without_array(var->type);
1794    get_deref_offset(bld_base, deref, false, NULL, NULL,
1795                     &const_index, &indir_index);
1796 
1797    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1798    params.texture_unit_offset = indir_index;
1799    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1800                                         glsl_sampler_type_is_array(type));
1801    params.sizes_out = result;
1802    params.samples_only = true;
1803 
1804    bld_base->image_size(bld_base, &params);
1805 }
1806 
1807 static void
visit_shared_load(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1808 visit_shared_load(struct lp_build_nir_context *bld_base,
1809                   nir_intrinsic_instr *instr,
1810                   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1811 {
1812    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1813    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1814    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1815                       offset_is_uniform, NULL, offset, result);
1816 }
1817 
1818 
1819 static void
visit_shared_store(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1820 visit_shared_store(struct lp_build_nir_context *bld_base,
1821                    nir_intrinsic_instr *instr)
1822 {
1823    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1824    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1825    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1826    int writemask = instr->const_index[1];
1827    int nc = nir_src_num_components(instr->src[0]);
1828    int bitsize = nir_src_bit_size(instr->src[0]);
1829    bld_base->store_mem(bld_base, writemask, nc, bitsize, offset_is_uniform, NULL, offset, val);
1830 }
1831 
1832 
1833 static void
visit_shared_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1834 visit_shared_atomic(struct lp_build_nir_context *bld_base,
1835                     nir_intrinsic_instr *instr,
1836                     LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1837 {
1838    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1839    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1840    LLVMValueRef val2 = NULL;
1841    int bitsize = nir_src_bit_size(instr->src[1]);
1842    if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1843       val2 = get_src(bld_base, instr->src[2]);
1844 
1845    bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1846 }
1847 
1848 
1849 static void
visit_barrier(struct lp_build_nir_context * bld_base)1850 visit_barrier(struct lp_build_nir_context *bld_base)
1851 {
1852    bld_base->barrier(bld_base);
1853 }
1854 
1855 
1856 static void
visit_discard(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1857 visit_discard(struct lp_build_nir_context *bld_base,
1858               nir_intrinsic_instr *instr)
1859 {
1860    LLVMValueRef cond = NULL;
1861    if (instr->intrinsic == nir_intrinsic_discard_if) {
1862       cond = get_src(bld_base, instr->src[0]);
1863       cond = cast_type(bld_base, cond, nir_type_int, 32);
1864    }
1865    bld_base->discard(bld_base, cond);
1866 }
1867 
1868 
1869 static void
visit_load_kernel_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1870 visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1871                         nir_intrinsic_instr *instr,
1872                         LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1873 {
1874    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1875 
1876    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1877    bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest),
1878                              nir_dest_bit_size(instr->dest),
1879                              nir_src_bit_size(instr->src[0]),
1880                              offset_is_uniform, offset, result);
1881 }
1882 
1883 
1884 static void
visit_load_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1885 visit_load_global(struct lp_build_nir_context *bld_base,
1886                   nir_intrinsic_instr *instr,
1887                   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1888 {
1889    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1890    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1891    bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1892                          nir_src_bit_size(instr->src[0]),
1893                          offset_is_uniform, addr, result);
1894 }
1895 
1896 
1897 static void
visit_store_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1898 visit_store_global(struct lp_build_nir_context *bld_base,
1899                    nir_intrinsic_instr *instr)
1900 {
1901    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1902    int nc = nir_src_num_components(instr->src[0]);
1903    int bitsize = nir_src_bit_size(instr->src[0]);
1904    LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1905    int addr_bitsize = nir_src_bit_size(instr->src[1]);
1906    int writemask = instr->const_index[0];
1907    bld_base->store_global(bld_base, writemask, nc, bitsize,
1908                           addr_bitsize, addr, val);
1909 }
1910 
1911 
1912 static void
visit_global_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1913 visit_global_atomic(struct lp_build_nir_context *bld_base,
1914                     nir_intrinsic_instr *instr,
1915                     LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1916 {
1917    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1918    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1919    LLVMValueRef val2 = NULL;
1920    int addr_bitsize = nir_src_bit_size(instr->src[0]);
1921    int val_bitsize = nir_src_bit_size(instr->src[1]);
1922    if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1923       val2 = get_src(bld_base, instr->src[2]);
1924 
1925    bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1926                            val_bitsize, addr, val, val2, &result[0]);
1927 }
1928 
1929 #if LLVM_VERSION_MAJOR >= 10
visit_shuffle(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef dst[4])1930 static void visit_shuffle(struct lp_build_nir_context *bld_base,
1931                           nir_intrinsic_instr *instr,
1932                           LLVMValueRef dst[4])
1933 {
1934    LLVMValueRef src = get_src(bld_base, instr->src[0]);
1935    src = cast_type(bld_base, src, nir_type_int, nir_src_bit_size(instr->src[0]));
1936    LLVMValueRef index = get_src(bld_base, instr->src[1]);
1937    index = cast_type(bld_base, index, nir_type_uint, nir_src_bit_size(instr->src[1]));
1938 
1939    bld_base->shuffle(bld_base, src, index, instr, dst);
1940 }
1941 #endif
1942 
1943 
1944 static void
visit_interp(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1945 visit_interp(struct lp_build_nir_context *bld_base,
1946              nir_intrinsic_instr *instr,
1947              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1948 {
1949    struct gallivm_state *gallivm = bld_base->base.gallivm;
1950    LLVMBuilderRef builder = gallivm->builder;
1951    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1952    unsigned num_components = nir_dest_num_components(instr->dest);
1953    nir_variable *var = nir_deref_instr_get_variable(deref);
1954    unsigned const_index;
1955    LLVMValueRef indir_index;
1956    LLVMValueRef offsets[2] = { NULL, NULL };
1957    get_deref_offset(bld_base, deref, false, NULL, NULL,
1958                     &const_index, &indir_index);
1959    bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1960    bool sample = false;
1961    if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1962       for (unsigned i = 0; i < 2; i++) {
1963          offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1964          offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1965       }
1966    } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1967       offsets[0] = get_src(bld_base, instr->src[1]);
1968       offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1969       sample = true;
1970    }
1971    bld_base->interp_at(bld_base, num_components, var, centroid, sample,
1972                        const_index, indir_index, offsets, result);
1973 }
1974 
1975 
1976 static void
visit_load_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1977 visit_load_scratch(struct lp_build_nir_context *bld_base,
1978                    nir_intrinsic_instr *instr,
1979                    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1980 {
1981    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1982 
1983    bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1984                           nir_dest_bit_size(instr->dest), offset, result);
1985 }
1986 
1987 
1988 static void
visit_store_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1989 visit_store_scratch(struct lp_build_nir_context *bld_base,
1990                     nir_intrinsic_instr *instr)
1991 {
1992    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1993    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1994    int writemask = instr->const_index[2];
1995    int nc = nir_src_num_components(instr->src[0]);
1996    int bitsize = nir_src_bit_size(instr->src[0]);
1997    bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1998 }
1999 
2000 
2001 static void
visit_intrinsic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)2002 visit_intrinsic(struct lp_build_nir_context *bld_base,
2003                 nir_intrinsic_instr *instr)
2004 {
2005    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
2006    switch (instr->intrinsic) {
2007    case nir_intrinsic_load_input:
2008       visit_load_input(bld_base, instr, result);
2009       break;
2010    case nir_intrinsic_store_output:
2011       visit_store_output(bld_base, instr);
2012       break;
2013    case nir_intrinsic_load_deref:
2014       visit_load_var(bld_base, instr, result);
2015       break;
2016    case nir_intrinsic_store_deref:
2017       visit_store_var(bld_base, instr);
2018       break;
2019    case nir_intrinsic_load_ubo:
2020       visit_load_ubo(bld_base, instr, result);
2021       break;
2022    case nir_intrinsic_load_push_constant:
2023       visit_load_push_constant(bld_base, instr, result);
2024       break;
2025    case nir_intrinsic_load_ssbo:
2026       visit_load_ssbo(bld_base, instr, result);
2027       break;
2028    case nir_intrinsic_store_ssbo:
2029       visit_store_ssbo(bld_base, instr);
2030       break;
2031    case nir_intrinsic_get_ssbo_size:
2032       visit_get_ssbo_size(bld_base, instr, result);
2033       break;
2034    case nir_intrinsic_load_vertex_id:
2035    case nir_intrinsic_load_primitive_id:
2036    case nir_intrinsic_load_instance_id:
2037    case nir_intrinsic_load_base_instance:
2038    case nir_intrinsic_load_base_vertex:
2039    case nir_intrinsic_load_first_vertex:
2040    case nir_intrinsic_load_workgroup_id:
2041    case nir_intrinsic_load_local_invocation_id:
2042    case nir_intrinsic_load_local_invocation_index:
2043    case nir_intrinsic_load_num_workgroups:
2044    case nir_intrinsic_load_invocation_id:
2045    case nir_intrinsic_load_front_face:
2046    case nir_intrinsic_load_draw_id:
2047    case nir_intrinsic_load_workgroup_size:
2048    case nir_intrinsic_load_work_dim:
2049    case nir_intrinsic_load_tess_coord:
2050    case nir_intrinsic_load_tess_level_outer:
2051    case nir_intrinsic_load_tess_level_inner:
2052    case nir_intrinsic_load_patch_vertices_in:
2053    case nir_intrinsic_load_sample_id:
2054    case nir_intrinsic_load_sample_pos:
2055    case nir_intrinsic_load_sample_mask_in:
2056    case nir_intrinsic_load_view_index:
2057    case nir_intrinsic_load_subgroup_invocation:
2058    case nir_intrinsic_load_subgroup_id:
2059    case nir_intrinsic_load_num_subgroups:
2060       bld_base->sysval_intrin(bld_base, instr, result);
2061       break;
2062    case nir_intrinsic_load_helper_invocation:
2063       bld_base->helper_invocation(bld_base, &result[0]);
2064       break;
2065    case nir_intrinsic_discard_if:
2066    case nir_intrinsic_discard:
2067       visit_discard(bld_base, instr);
2068       break;
2069    case nir_intrinsic_emit_vertex:
2070       bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
2071       break;
2072    case nir_intrinsic_end_primitive:
2073       bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
2074       break;
2075    case nir_intrinsic_ssbo_atomic_add:
2076    case nir_intrinsic_ssbo_atomic_imin:
2077    case nir_intrinsic_ssbo_atomic_imax:
2078    case nir_intrinsic_ssbo_atomic_umin:
2079    case nir_intrinsic_ssbo_atomic_umax:
2080    case nir_intrinsic_ssbo_atomic_and:
2081    case nir_intrinsic_ssbo_atomic_or:
2082    case nir_intrinsic_ssbo_atomic_xor:
2083    case nir_intrinsic_ssbo_atomic_exchange:
2084    case nir_intrinsic_ssbo_atomic_comp_swap:
2085       visit_ssbo_atomic(bld_base, instr, result);
2086       break;
2087    case nir_intrinsic_image_deref_load:
2088       visit_load_image(bld_base, instr, result);
2089       break;
2090    case nir_intrinsic_image_deref_store:
2091       visit_store_image(bld_base, instr);
2092       break;
2093    case nir_intrinsic_image_deref_atomic_add:
2094    case nir_intrinsic_image_deref_atomic_imin:
2095    case nir_intrinsic_image_deref_atomic_imax:
2096    case nir_intrinsic_image_deref_atomic_umin:
2097    case nir_intrinsic_image_deref_atomic_umax:
2098    case nir_intrinsic_image_deref_atomic_and:
2099    case nir_intrinsic_image_deref_atomic_or:
2100    case nir_intrinsic_image_deref_atomic_xor:
2101    case nir_intrinsic_image_deref_atomic_exchange:
2102    case nir_intrinsic_image_deref_atomic_comp_swap:
2103       visit_atomic_image(bld_base, instr, result);
2104       break;
2105    case nir_intrinsic_image_deref_size:
2106       visit_image_size(bld_base, instr, result);
2107       break;
2108    case nir_intrinsic_image_deref_samples:
2109       visit_image_samples(bld_base, instr, result);
2110       break;
2111    case nir_intrinsic_load_shared:
2112       visit_shared_load(bld_base, instr, result);
2113       break;
2114    case nir_intrinsic_store_shared:
2115       visit_shared_store(bld_base, instr);
2116       break;
2117    case nir_intrinsic_shared_atomic_add:
2118    case nir_intrinsic_shared_atomic_imin:
2119    case nir_intrinsic_shared_atomic_umin:
2120    case nir_intrinsic_shared_atomic_imax:
2121    case nir_intrinsic_shared_atomic_umax:
2122    case nir_intrinsic_shared_atomic_and:
2123    case nir_intrinsic_shared_atomic_or:
2124    case nir_intrinsic_shared_atomic_xor:
2125    case nir_intrinsic_shared_atomic_exchange:
2126    case nir_intrinsic_shared_atomic_comp_swap:
2127       visit_shared_atomic(bld_base, instr, result);
2128       break;
2129    case nir_intrinsic_control_barrier:
2130    case nir_intrinsic_scoped_barrier:
2131       visit_barrier(bld_base);
2132       break;
2133    case nir_intrinsic_group_memory_barrier:
2134    case nir_intrinsic_memory_barrier:
2135    case nir_intrinsic_memory_barrier_shared:
2136    case nir_intrinsic_memory_barrier_buffer:
2137    case nir_intrinsic_memory_barrier_image:
2138    case nir_intrinsic_memory_barrier_tcs_patch:
2139       break;
2140    case nir_intrinsic_load_kernel_input:
2141       visit_load_kernel_input(bld_base, instr, result);
2142      break;
2143    case nir_intrinsic_load_global:
2144    case nir_intrinsic_load_global_constant:
2145       visit_load_global(bld_base, instr, result);
2146       break;
2147    case nir_intrinsic_store_global:
2148       visit_store_global(bld_base, instr);
2149       break;
2150    case nir_intrinsic_global_atomic_add:
2151    case nir_intrinsic_global_atomic_imin:
2152    case nir_intrinsic_global_atomic_umin:
2153    case nir_intrinsic_global_atomic_imax:
2154    case nir_intrinsic_global_atomic_umax:
2155    case nir_intrinsic_global_atomic_and:
2156    case nir_intrinsic_global_atomic_or:
2157    case nir_intrinsic_global_atomic_xor:
2158    case nir_intrinsic_global_atomic_exchange:
2159    case nir_intrinsic_global_atomic_comp_swap:
2160       visit_global_atomic(bld_base, instr, result);
2161       break;
2162    case nir_intrinsic_vote_all:
2163    case nir_intrinsic_vote_any:
2164    case nir_intrinsic_vote_ieq:
2165    case nir_intrinsic_vote_feq:
2166       bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
2167       break;
2168    case nir_intrinsic_elect:
2169       bld_base->elect(bld_base, result);
2170       break;
2171    case nir_intrinsic_reduce:
2172    case nir_intrinsic_inclusive_scan:
2173    case nir_intrinsic_exclusive_scan:
2174       bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
2175       break;
2176    case nir_intrinsic_ballot:
2177       bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
2178       break;
2179 #if LLVM_VERSION_MAJOR >= 10
2180    case nir_intrinsic_shuffle:
2181       visit_shuffle(bld_base, instr, result);
2182       break;
2183 #endif
2184    case nir_intrinsic_read_invocation:
2185    case nir_intrinsic_read_first_invocation: {
2186       LLVMValueRef src1 = NULL;
2187       LLVMValueRef src0 = get_src(bld_base, instr->src[0]);
2188       if (instr->intrinsic == nir_intrinsic_read_invocation) {
2189          src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
2190          src0 = cast_type(bld_base, src0, nir_type_int, nir_src_bit_size(instr->src[0]));
2191       }
2192       bld_base->read_invocation(bld_base, src0, nir_src_bit_size(instr->src[0]), src1, result);
2193       break;
2194    }
2195    case nir_intrinsic_interp_deref_at_offset:
2196    case nir_intrinsic_interp_deref_at_centroid:
2197    case nir_intrinsic_interp_deref_at_sample:
2198       visit_interp(bld_base, instr, result);
2199       break;
2200    case nir_intrinsic_load_scratch:
2201       visit_load_scratch(bld_base, instr, result);
2202       break;
2203    case nir_intrinsic_store_scratch:
2204       visit_store_scratch(bld_base, instr);
2205       break;
2206    default:
2207       fprintf(stderr, "Unsupported intrinsic: ");
2208       nir_print_instr(&instr->instr, stderr);
2209       fprintf(stderr, "\n");
2210       assert(0);
2211       break;
2212    }
2213    if (result[0]) {
2214       assign_dest(bld_base, &instr->dest, result);
2215    }
2216 }
2217 
2218 
2219 static void
visit_txs(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)2220 visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2221 {
2222    struct lp_sampler_size_query_params params = { 0 };
2223    LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
2224    LLVMValueRef explicit_lod = NULL;
2225    LLVMValueRef texture_unit_offset = NULL;
2226    for (unsigned i = 0; i < instr->num_srcs; i++) {
2227       switch (instr->src[i].src_type) {
2228       case nir_tex_src_lod:
2229          explicit_lod = cast_type(bld_base,
2230                                   get_src(bld_base, instr->src[i].src),
2231                                   nir_type_int, 32);
2232          break;
2233       case nir_tex_src_texture_offset:
2234          texture_unit_offset = get_src(bld_base, instr->src[i].src);
2235          break;
2236       default:
2237          break;
2238       }
2239    }
2240 
2241    params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
2242    params.texture_unit = instr->texture_index;
2243    params.explicit_lod = explicit_lod;
2244    params.is_sviewinfo = TRUE;
2245    params.sizes_out = sizes_out;
2246    params.samples_only = (instr->op == nir_texop_texture_samples);
2247    params.texture_unit_offset = texture_unit_offset;
2248 
2249    if (instr->op == nir_texop_query_levels)
2250       params.explicit_lod = bld_base->uint_bld.zero;
2251    bld_base->tex_size(bld_base, &params);
2252    assign_dest(bld_base, &instr->dest,
2253                &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
2254 }
2255 
2256 
2257 static enum lp_sampler_lod_property
lp_build_nir_lod_property(struct lp_build_nir_context * bld_base,nir_src lod_src)2258 lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
2259                           nir_src lod_src)
2260 {
2261    enum lp_sampler_lod_property lod_property;
2262 
2263    if (nir_src_is_always_uniform(lod_src)) {
2264       lod_property = LP_SAMPLER_LOD_SCALAR;
2265    } else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2266       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2267          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2268       else
2269          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2270    } else {
2271       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2272    }
2273    return lod_property;
2274 }
2275 
2276 
2277 static void
visit_tex(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)2278 visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2279 {
2280    struct gallivm_state *gallivm = bld_base->base.gallivm;
2281    LLVMBuilderRef builder = gallivm->builder;
2282    LLVMValueRef coords[5];
2283    LLVMValueRef offsets[3] = { NULL };
2284    LLVMValueRef explicit_lod = NULL, ms_index = NULL;
2285    struct lp_sampler_params params;
2286    struct lp_derivatives derivs;
2287    unsigned sample_key = 0;
2288    nir_deref_instr *texture_deref_instr = NULL;
2289    nir_deref_instr *sampler_deref_instr = NULL;
2290    LLVMValueRef texture_unit_offset = NULL;
2291    LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
2292    unsigned lod_src = 0;
2293    LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
2294    unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components;
2295    memset(&params, 0, sizeof(params));
2296    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2297 
2298    if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
2299       visit_txs(bld_base, instr);
2300       return;
2301    }
2302    if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2303       sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2304    else if (instr->op == nir_texop_tg4) {
2305       sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
2306       sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
2307    } else if (instr->op == nir_texop_lod)
2308       sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
2309    for (unsigned i = 0; i < instr->num_srcs; i++) {
2310       switch (instr->src[i].src_type) {
2311       case nir_tex_src_coord: {
2312          LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
2313          if (coord_vals == 1)
2314             coords[0] = coord;
2315          else {
2316             for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2317                coords[chan] = LLVMBuildExtractValue(builder, coord,
2318                                                     chan, "");
2319          }
2320          for (unsigned chan = coord_vals; chan < 5; chan++)
2321             coords[chan] = coord_undef;
2322 
2323          break;
2324       }
2325       case nir_tex_src_texture_deref:
2326          texture_deref_instr = nir_src_as_deref(instr->src[i].src);
2327          break;
2328       case nir_tex_src_sampler_deref:
2329          sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
2330          break;
2331       case nir_tex_src_comparator:
2332          sample_key |= LP_SAMPLER_SHADOW;
2333          coords[4] = get_src(bld_base, instr->src[i].src);
2334          coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
2335          break;
2336       case nir_tex_src_bias:
2337          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2338          lod_src = i;
2339          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2340          break;
2341       case nir_tex_src_lod:
2342          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2343          lod_src = i;
2344          if (instr->op == nir_texop_txf)
2345             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2346          else
2347             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2348          break;
2349       case nir_tex_src_ddx: {
2350          int deriv_cnt = instr->coord_components;
2351          if (instr->is_array)
2352             deriv_cnt--;
2353          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2354          if (deriv_cnt == 1)
2355             derivs.ddx[0] = deriv_val;
2356          else
2357             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2358                derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
2359                                                         chan, "");
2360          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2361             derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2362          break;
2363       }
2364       case nir_tex_src_ddy: {
2365          int deriv_cnt = instr->coord_components;
2366          if (instr->is_array)
2367             deriv_cnt--;
2368          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2369          if (deriv_cnt == 1)
2370             derivs.ddy[0] = deriv_val;
2371          else
2372             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2373                derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2374                                                         chan, "");
2375          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2376             derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2377          break;
2378       }
2379       case nir_tex_src_offset: {
2380          int offset_cnt = instr->coord_components;
2381          if (instr->is_array)
2382             offset_cnt--;
2383          LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2384          sample_key |= LP_SAMPLER_OFFSETS;
2385          if (offset_cnt == 1)
2386             offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2387          else {
2388             for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2389                offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2390                                                      chan, "");
2391                offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2392             }
2393          }
2394          break;
2395       }
2396       case nir_tex_src_ms_index:
2397          sample_key |= LP_SAMPLER_FETCH_MS;
2398          ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2399          break;
2400 
2401       case nir_tex_src_texture_offset:
2402          texture_unit_offset = get_src(bld_base, instr->src[i].src);
2403          break;
2404       case nir_tex_src_sampler_offset:
2405          break;
2406       default:
2407          assert(0);
2408          break;
2409       }
2410    }
2411    if (!sampler_deref_instr)
2412       sampler_deref_instr = texture_deref_instr;
2413 
2414    if (explicit_lod)
2415       lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2416 
2417    if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2418        instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2419       for (unsigned chan = 0; chan < coord_vals; ++chan)
2420          coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2421    else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2422       for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2423          coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2424 
2425    if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2426       /* move layer coord for 1d arrays. */
2427       coords[2] = coords[1];
2428       coords[1] = coord_undef;
2429    }
2430 
2431    uint32_t samp_base_index = 0, tex_base_index = 0;
2432    if (!sampler_deref_instr) {
2433       int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2434       if (samp_src_index == -1) {
2435          samp_base_index = instr->sampler_index;
2436       }
2437    }
2438    if (!texture_deref_instr) {
2439       int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2440       if (tex_src_index == -1) {
2441          tex_base_index = instr->texture_index;
2442       }
2443    }
2444 
2445    if (instr->op == nir_texop_txd) {
2446       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2447       params.derivs = &derivs;
2448       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2449          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2450             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2451          else
2452             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2453       } else
2454          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2455    }
2456 
2457    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2458    params.sample_key = sample_key;
2459    params.offsets = offsets;
2460    params.texture_index = tex_base_index;
2461    params.texture_index_offset = texture_unit_offset;
2462    params.sampler_index = samp_base_index;
2463    params.coords = coords;
2464    params.texel = texel;
2465    params.lod = explicit_lod;
2466    params.ms_index = ms_index;
2467    params.aniso_filter_table = bld_base->aniso_filter_table;
2468    bld_base->tex(bld_base, &params);
2469 
2470    if (nir_dest_bit_size(instr->dest) != 32) {
2471       assert(nir_dest_bit_size(instr->dest) == 16);
2472       LLVMTypeRef vec_type = NULL;
2473       bool is_float = false;
2474       switch (nir_alu_type_get_base_type(instr->dest_type)) {
2475       case nir_type_float:
2476          is_float = true;
2477          break;
2478       case nir_type_int:
2479          vec_type = bld_base->int16_bld.vec_type;
2480          break;
2481       case nir_type_uint:
2482          vec_type = bld_base->uint16_bld.vec_type;
2483          break;
2484       default:
2485          unreachable("unexpected alu type");
2486       }
2487       for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
2488          if (is_float) {
2489             texel[i] = lp_build_float_to_half(gallivm, texel[i]);
2490          } else {
2491             texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
2492             texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
2493          }
2494       }
2495    }
2496 
2497    assign_dest(bld_base, &instr->dest, texel);
2498 }
2499 
2500 
2501 static void
visit_ssa_undef(struct lp_build_nir_context * bld_base,const nir_ssa_undef_instr * instr)2502 visit_ssa_undef(struct lp_build_nir_context *bld_base,
2503                 const nir_ssa_undef_instr *instr)
2504 {
2505    unsigned num_components = instr->def.num_components;
2506    LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2507    struct lp_build_context *undef_bld = get_int_bld(bld_base, true,
2508                                                     instr->def.bit_size);
2509    for (unsigned i = 0; i < num_components; i++)
2510       undef[i] = LLVMGetUndef(undef_bld->vec_type);
2511    memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2512    assign_ssa_dest(bld_base, &instr->def, undef);
2513 }
2514 
2515 
2516 static void
visit_jump(struct lp_build_nir_context * bld_base,const nir_jump_instr * instr)2517 visit_jump(struct lp_build_nir_context *bld_base,
2518            const nir_jump_instr *instr)
2519 {
2520    switch (instr->type) {
2521    case nir_jump_break:
2522       bld_base->break_stmt(bld_base);
2523       break;
2524    case nir_jump_continue:
2525       bld_base->continue_stmt(bld_base);
2526       break;
2527    default:
2528       unreachable("Unknown jump instr\n");
2529    }
2530 }
2531 
2532 
2533 static void
visit_deref(struct lp_build_nir_context * bld_base,nir_deref_instr * instr)2534 visit_deref(struct lp_build_nir_context *bld_base,
2535             nir_deref_instr *instr)
2536 {
2537    if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2538                                         nir_var_mem_global)) {
2539       return;
2540    }
2541 
2542    LLVMValueRef result = NULL;
2543    switch(instr->deref_type) {
2544    case nir_deref_type_var: {
2545       struct hash_entry *entry =
2546          _mesa_hash_table_search(bld_base->vars, instr->var);
2547       result = entry->data;
2548       break;
2549    }
2550    default:
2551       unreachable("Unhandled deref_instr deref type");
2552    }
2553 
2554    assign_ssa(bld_base, instr->dest.ssa.index, result);
2555 }
2556 
2557 
2558 static void
visit_block(struct lp_build_nir_context * bld_base,nir_block * block)2559 visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2560 {
2561    nir_foreach_instr(instr, block)
2562    {
2563       switch (instr->type) {
2564       case nir_instr_type_alu:
2565          visit_alu(bld_base, nir_instr_as_alu(instr));
2566          break;
2567       case nir_instr_type_load_const:
2568          visit_load_const(bld_base, nir_instr_as_load_const(instr));
2569          break;
2570       case nir_instr_type_intrinsic:
2571          visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2572          break;
2573       case nir_instr_type_tex:
2574          visit_tex(bld_base, nir_instr_as_tex(instr));
2575          break;
2576       case nir_instr_type_phi:
2577          assert(0);
2578          break;
2579       case nir_instr_type_ssa_undef:
2580          visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2581          break;
2582       case nir_instr_type_jump:
2583          visit_jump(bld_base, nir_instr_as_jump(instr));
2584          break;
2585       case nir_instr_type_deref:
2586          visit_deref(bld_base, nir_instr_as_deref(instr));
2587          break;
2588       default:
2589          fprintf(stderr, "Unknown NIR instr type: ");
2590          nir_print_instr(instr, stderr);
2591          fprintf(stderr, "\n");
2592          abort();
2593       }
2594    }
2595 }
2596 
2597 
2598 static void
visit_if(struct lp_build_nir_context * bld_base,nir_if * if_stmt)2599 visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2600 {
2601    LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2602 
2603    bld_base->if_cond(bld_base, cond);
2604    visit_cf_list(bld_base, &if_stmt->then_list);
2605 
2606    if (!exec_list_is_empty(&if_stmt->else_list)) {
2607       bld_base->else_stmt(bld_base);
2608       visit_cf_list(bld_base, &if_stmt->else_list);
2609    }
2610    bld_base->endif_stmt(bld_base);
2611 }
2612 
2613 
2614 static void
visit_loop(struct lp_build_nir_context * bld_base,nir_loop * loop)2615 visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2616 {
2617    bld_base->bgnloop(bld_base);
2618    visit_cf_list(bld_base, &loop->body);
2619    bld_base->endloop(bld_base);
2620 }
2621 
2622 
2623 static void
visit_cf_list(struct lp_build_nir_context * bld_base,struct exec_list * list)2624 visit_cf_list(struct lp_build_nir_context *bld_base,
2625               struct exec_list *list)
2626 {
2627    foreach_list_typed(nir_cf_node, node, node, list)
2628    {
2629       switch (node->type) {
2630       case nir_cf_node_block:
2631          visit_block(bld_base, nir_cf_node_as_block(node));
2632          break;
2633       case nir_cf_node_if:
2634          visit_if(bld_base, nir_cf_node_as_if(node));
2635          break;
2636       case nir_cf_node_loop:
2637          visit_loop(bld_base, nir_cf_node_as_loop(node));
2638          break;
2639       default:
2640          assert(0);
2641       }
2642    }
2643 }
2644 
2645 
2646 static void
handle_shader_output_decl(struct lp_build_nir_context * bld_base,struct nir_shader * nir,struct nir_variable * variable)2647 handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2648                           struct nir_shader *nir,
2649                           struct nir_variable *variable)
2650 {
2651    bld_base->emit_var_decl(bld_base, variable);
2652 }
2653 
2654 
2655 /* vector registers are stored as arrays in LLVM side,
2656    so we can use GEP on them, as to do exec mask stores
2657    we need to operate on a single components.
2658    arrays are:
2659    0.x, 1.x, 2.x, 3.x
2660    0.y, 1.y, 2.y, 3.y
2661    ....
2662 */
2663 static LLVMTypeRef
get_register_type(struct lp_build_nir_context * bld_base,nir_register * reg)2664 get_register_type(struct lp_build_nir_context *bld_base,
2665                   nir_register *reg)
2666 {
2667    if (is_aos(bld_base))
2668       return bld_base->base.int_vec_type;
2669 
2670    struct lp_build_context *int_bld =
2671       get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2672 
2673    LLVMTypeRef type = int_bld->vec_type;
2674    if (reg->num_array_elems)
2675       type = LLVMArrayType(type, reg->num_array_elems);
2676    if (reg->num_components > 1)
2677       type = LLVMArrayType(type, reg->num_components);
2678 
2679    return type;
2680 }
2681 
2682 
lp_build_nir_llvm(struct lp_build_nir_context * bld_base,struct nir_shader * nir)2683 bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
2684                        struct nir_shader *nir)
2685 {
2686    struct nir_function *func;
2687 
2688    nir_convert_from_ssa(nir, true);
2689    nir_lower_locals_to_regs(nir);
2690    nir_remove_dead_derefs(nir);
2691    nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2692 
2693    if (is_aos(bld_base)) {
2694       nir_move_vec_src_uses_to_dest(nir);
2695       nir_lower_vec_to_movs(nir, NULL, NULL);
2696    }
2697 
2698    nir_foreach_shader_out_variable(variable, nir)
2699       handle_shader_output_decl(bld_base, nir, variable);
2700 
2701    if (nir->info.io_lowered) {
2702       uint64_t outputs_written = nir->info.outputs_written;
2703 
2704       while (outputs_written) {
2705          unsigned location = u_bit_scan64(&outputs_written);
2706          nir_variable var = {0};
2707 
2708          var.type = glsl_vec4_type();
2709          var.data.mode = nir_var_shader_out;
2710          var.data.location = location;
2711          var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2712                                                     BITFIELD64_MASK(location));
2713          bld_base->emit_var_decl(bld_base, &var);
2714       }
2715    }
2716 
2717    bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2718                                             _mesa_key_pointer_equal);
2719    bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2720                                             _mesa_key_pointer_equal);
2721    bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2722 
2723    func = (struct nir_function *)exec_list_get_head(&nir->functions);
2724 
2725    nir_foreach_register(reg, &func->impl->registers) {
2726       LLVMTypeRef type = get_register_type(bld_base, reg);
2727       LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2728                                                type, "reg");
2729       _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2730    }
2731    nir_index_ssa_defs(func->impl);
2732    bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2733    visit_cf_list(bld_base, &func->impl->body);
2734 
2735    free(bld_base->ssa_defs);
2736    ralloc_free(bld_base->vars);
2737    ralloc_free(bld_base->regs);
2738    ralloc_free(bld_base->range_ht);
2739    return true;
2740 }
2741 
2742 
2743 /* do some basic opts to remove some things we don't want to see. */
2744 void
lp_build_opt_nir(struct nir_shader * nir)2745 lp_build_opt_nir(struct nir_shader *nir)
2746 {
2747    bool progress;
2748 
2749    static const struct nir_lower_tex_options lower_tex_options = {
2750       .lower_tg4_offsets = true,
2751       .lower_txp = ~0u,
2752       .lower_invalid_implicit_lod = true,
2753    };
2754    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2755    NIR_PASS_V(nir, nir_lower_frexp);
2756 
2757    NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2758    NIR_PASS_V(nir, nir_lower_fp16_casts);
2759    do {
2760       progress = false;
2761       NIR_PASS(progress, nir, nir_opt_constant_folding);
2762       NIR_PASS(progress, nir, nir_opt_algebraic);
2763       NIR_PASS(progress, nir, nir_lower_pack);
2764 
2765       nir_lower_tex_options options = { .lower_invalid_implicit_lod = true, };
2766       NIR_PASS_V(nir, nir_lower_tex, &options);
2767 
2768       const nir_lower_subgroups_options subgroups_options = {
2769          .subgroup_size = lp_native_vector_width / 32,
2770          .ballot_bit_size = 32,
2771          .ballot_components = 1,
2772          .lower_to_scalar = true,
2773          .lower_subgroup_masks = true,
2774          .lower_relative_shuffle = true,
2775       };
2776       NIR_PASS(progress, nir, nir_lower_subgroups, &subgroups_options);
2777    } while (progress);
2778 
2779    do {
2780       progress = false;
2781       NIR_PASS(progress, nir, nir_opt_algebraic_late);
2782       if (progress) {
2783          NIR_PASS_V(nir, nir_copy_prop);
2784          NIR_PASS_V(nir, nir_opt_dce);
2785          NIR_PASS_V(nir, nir_opt_cse);
2786       }
2787    } while (progress);
2788 
2789    if (nir_lower_bool_to_int32(nir)) {
2790       NIR_PASS_V(nir, nir_copy_prop);
2791       NIR_PASS_V(nir, nir_opt_dce);
2792    }
2793 }
2794