• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_jit_types.h"
34 #include "lp_bld_arit.h"
35 #include "lp_bld_bitarit.h"
36 #include "lp_bld_coro.h"
37 #include "lp_bld_printf.h"
38 #include "lp_bld_intr.h"
39 #include "util/u_cpu_detect.h"
40 #include "util/u_math.h"
41 
bit_size_to_shift_size(int bit_size)42 static int bit_size_to_shift_size(int bit_size)
43 {
44    switch (bit_size) {
45    case 64:
46       return 3;
47    default:
48    case 32:
49       return 2;
50    case 16:
51       return 1;
52    case 8:
53       return 0;
54    }
55 }
56 
57 /*
58  * combine the execution mask if there is one with the current mask.
59  */
60 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)61 mask_vec(struct lp_build_nir_context *bld_base)
62 {
63    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
64    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
65    struct lp_exec_mask *exec_mask = &bld->exec_mask;
66    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
67    if (!exec_mask->has_mask) {
68       return bld_mask;
69    }
70    if (!bld_mask)
71       return exec_mask->exec_mask;
72    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
73                        exec_mask->exec_mask, "");
74 }
75 
76 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)77 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
78 {
79    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
80 
81    /* Fragment shaders may dispatch with invocation 0 inactive.  All other
82     * stages have invocation 0 active at the top.  (See
83     * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
84     */
85    if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
86       return false;
87 
88    /* If we're in some control flow right now, then invocation 0 may be
89     * disabled.
90     */
91    if (bld->exec_mask.has_mask)
92       return false;
93 
94    return true;
95 }
96 
97 /**
98  * Returns a scalar value of the first active invocation in the exec_mask.
99  *
100  * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else
101  * branches thare are all false, or portions of a loop after a break/continue
102  * has ended the last invocation that had been active in the loop).  In that
103  * case, we return a 0 value so that unconditional LLVMBuildExtractElement of
104  * the first_active_invocation (such as in memory loads, texture unit index
105  * lookups, etc) will use a valid index
106  */
first_active_invocation(struct lp_build_nir_context * bld_base)107 static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base)
108 {
109    struct gallivm_state *gallivm = bld_base->base.gallivm;
110    LLVMBuilderRef builder = gallivm->builder;
111    struct lp_build_context *uint_bld = &bld_base->uint_bld;
112 
113    if (invocation_0_must_be_active(bld_base))
114       return lp_build_const_int32(gallivm, 0);
115 
116    LLVMValueRef exec_mask = mask_vec(bld_base);
117 
118    LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "exec_bitvec");
119    /* Turn it from N x i1 to iN, then extend it up to i32 so we can use a single
120     * cttz intrinsic -- I assume the compiler will drop the extend if there are
121     * smaller instructions available, since we have is_zero_poison.
122     */
123    bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask");
124    bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
125 
126    LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
127 
128    LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
129                                                          LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false));
130 
131    return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0");
132 }
133 
134 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size,bool is_float)135 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float)
136 {
137    if (bit_size == 64)
138       return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
139    else if (bit_size == 16)
140       return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
141    else if (bit_size == 8)
142       return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
143    else
144       return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0);
145 }
146 
147 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)148 emit_fetch_64bit(
149    struct lp_build_nir_context * bld_base,
150    LLVMValueRef input,
151    LLVMValueRef input2)
152 {
153    struct gallivm_state *gallivm = bld_base->base.gallivm;
154    LLVMBuilderRef builder = gallivm->builder;
155    LLVMValueRef res;
156    int i;
157    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
158    int len = bld_base->base.type.length * 2;
159    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
160 
161    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
162 #if UTIL_ARCH_LITTLE_ENDIAN
163       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
164       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
165 #else
166       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
167       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
168 #endif
169    }
170    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
171 
172    return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
173 }
174 
175 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])176 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
177                        LLVMValueRef value,
178                        LLVMValueRef split_values[2])
179 {
180    struct gallivm_state *gallivm = bld_base->base.gallivm;
181    LLVMBuilderRef builder = gallivm->builder;
182    unsigned i;
183    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
184    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
185    int len = bld_base->base.type.length * 2;
186 
187    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
188    for (i = 0; i < bld_base->base.type.length; i++) {
189 #if UTIL_ARCH_LITTLE_ENDIAN
190       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
191       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
192 #else
193       shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
194       shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
195 #endif
196    }
197 
198    split_values[0] = LLVMBuildShuffleVector(builder, value,
199                                  LLVMGetUndef(LLVMTypeOf(value)),
200                                  LLVMConstVector(shuffles,
201                                                  bld_base->base.type.length),
202                                  "");
203    split_values[1] = LLVMBuildShuffleVector(builder, value,
204                                   LLVMGetUndef(LLVMTypeOf(value)),
205                                   LLVMConstVector(shuffles2,
206                                                   bld_base->base.type.length),
207                                   "");
208 }
209 
210 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)211 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
212                       LLVMValueRef chan_ptr,
213                       LLVMValueRef chan_ptr2,
214                       LLVMValueRef value)
215 {
216    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
217    struct lp_build_context *float_bld = &bld_base->base;
218    LLVMValueRef split_vals[2];
219 
220    emit_store_64bit_split(bld_base, value, split_vals);
221 
222    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
223    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
224 }
225 
226 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)227 get_soa_array_offsets(struct lp_build_context *uint_bld,
228                       LLVMValueRef indirect_index,
229                       int num_components,
230                       unsigned chan_index,
231                       bool need_perelement_offset)
232 {
233    struct gallivm_state *gallivm = uint_bld->gallivm;
234    LLVMValueRef chan_vec =
235       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
236    LLVMValueRef length_vec =
237       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
238    LLVMValueRef index_vec;
239 
240    /* index_vec = (indirect_index * num_components + chan_index) * length + offsets */
241    index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
242    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
243    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
244 
245    if (need_perelement_offset) {
246       LLVMValueRef pixel_offsets;
247       unsigned i;
248      /* build pixel offset vector: {0, 1, 2, 3, ...} */
249       pixel_offsets = uint_bld->undef;
250       for (i = 0; i < uint_bld->type.length; i++) {
251          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
252          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
253                                                 ii, ii, "");
254       }
255       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
256    }
257    return index_vec;
258 }
259 
260 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMTypeRef base_type,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)261 build_gather(struct lp_build_nir_context *bld_base,
262              struct lp_build_context *bld,
263              LLVMTypeRef base_type,
264              LLVMValueRef base_ptr,
265              LLVMValueRef indexes,
266              LLVMValueRef overflow_mask,
267              LLVMValueRef indexes2)
268 {
269    struct gallivm_state *gallivm = bld_base->base.gallivm;
270    LLVMBuilderRef builder = gallivm->builder;
271    struct lp_build_context *uint_bld = &bld_base->uint_bld;
272    LLVMValueRef res;
273    unsigned i;
274 
275    if (indexes2)
276       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
277    else
278       res = bld->undef;
279    /*
280     * overflow_mask is a vector telling us which channels
281     * in the vector overflowed. We use the overflow behavior for
282     * constant buffers which is defined as:
283     * Out of bounds access to constant buffer returns 0 in all
284     * components. Out of bounds behavior is always with respect
285     * to the size of the buffer bound at that slot.
286     */
287 
288    if (overflow_mask) {
289       /*
290        * We avoid per-element control flow here (also due to llvm going crazy,
291        * though I suspect it's better anyway since overflow is likely rare).
292        * Note that since we still fetch from buffers even if num_elements was
293        * zero (in this case we'll fetch from index zero) the jit func callers
294        * MUST provide valid fake constant buffers of size 4x32 (the values do
295        * not matter), otherwise we'd still need (not per element though)
296        * control flow.
297        */
298       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
299       if (indexes2)
300          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
301    }
302 
303    /*
304     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
305     */
306    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
307       LLVMValueRef si, di;
308       LLVMValueRef index;
309       LLVMValueRef scalar_ptr, scalar;
310 
311       di = lp_build_const_int32(gallivm, i);
312       if (indexes2)
313          si = lp_build_const_int32(gallivm, i >> 1);
314       else
315          si = di;
316 
317       if (indexes2 && (i & 1)) {
318          index = LLVMBuildExtractElement(builder,
319                                          indexes2, si, "");
320       } else {
321          index = LLVMBuildExtractElement(builder,
322                                          indexes, si, "");
323       }
324 
325       scalar_ptr = LLVMBuildGEP2(builder, base_type, base_ptr, &index, 1, "gather_ptr");
326       scalar = LLVMBuildLoad2(builder, base_type, scalar_ptr, "");
327 
328       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
329    }
330 
331    if (overflow_mask) {
332       if (indexes2) {
333          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
334          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
335                                        bld_base->dbl_bld.int_vec_type, "");
336          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
337                                bld_base->dbl_bld.zero, res);
338       } else
339          res = lp_build_select(bld, overflow_mask, bld->zero, res);
340    }
341 
342    return res;
343 }
344 
345 /**
346  * Scatter/store vector.
347  */
348 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)349 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
350                   LLVMValueRef base_ptr,
351                   LLVMValueRef indexes,
352                   LLVMValueRef values,
353                   struct lp_exec_mask *mask)
354 {
355    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
356    LLVMBuilderRef builder = gallivm->builder;
357    unsigned i;
358    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
359 
360    /*
361     * Loop over elements of index_vec, store scalar value.
362     */
363    for (i = 0; i < bld->bld_base.base.type.length; i++) {
364       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
365       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
366       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
367       LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(val), base_ptr, &index, 1, "scatter_ptr");
368       LLVMValueRef scalar_pred = pred ?
369          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
370 
371       if (0)
372          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
373                          ii, val, index, scalar_ptr);
374 
375       if (scalar_pred) {
376          LLVMValueRef real_val, dst_val;
377          dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
378          scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
379          real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
380          LLVMBuildStore(builder, real_val, scalar_ptr);
381       }
382       else {
383          LLVMBuildStore(builder, val, scalar_ptr);
384       }
385    }
386 }
387 
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])388 static void emit_load_var(struct lp_build_nir_context *bld_base,
389                            nir_variable_mode deref_mode,
390                            unsigned num_components,
391                            unsigned bit_size,
392                            nir_variable *var,
393                            unsigned vertex_index,
394                            LLVMValueRef indir_vertex_index,
395                            unsigned const_index,
396                            LLVMValueRef indir_index,
397                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
398 {
399    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
400    struct gallivm_state *gallivm = bld_base->base.gallivm;
401    int dmul = bit_size == 64 ? 2 : 1;
402    unsigned location = var->data.driver_location;
403    unsigned location_frac = var->data.location_frac;
404 
405    if (!var->data.compact && !indir_index)
406       location += const_index;
407    else if (var->data.compact) {
408       location += const_index / 4;
409       location_frac += const_index % 4;
410       const_index = 0;
411    }
412    switch (deref_mode) {
413    case nir_var_shader_in:
414       for (unsigned i = 0; i < num_components; i++) {
415          int idx = (i * dmul) + location_frac;
416          int comp_loc = location;
417 
418          if (bit_size == 64 && idx >= 4) {
419             comp_loc++;
420             idx = idx % 4;
421          }
422 
423          if (bld->gs_iface) {
424             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
425             LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
426             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
427             LLVMValueRef result2;
428 
429             result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
430                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
431             if (bit_size == 64) {
432                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
433                result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
434                                                     false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
435                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
436             }
437          } else if (bld->tes_iface) {
438             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
439             LLVMValueRef attrib_index_val;
440             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
441             LLVMValueRef result2;
442 
443             if (indir_index) {
444                if (var->data.compact) {
445                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447                } else
448                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
449             } else
450                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451 
452             if (var->data.patch) {
453                result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
454                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
455                if (bit_size == 64) {
456                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457                   result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
458                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
459                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
460                }
461             }
462             else {
463                result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
464                                                               indir_vertex_index ? true : false,
465                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
466                                                               (indir_index && !var->data.compact) ? true : false, attrib_index_val,
467                                                               (indir_index && var->data.compact) ? true : false, swizzle_index_val);
468                if (bit_size == 64) {
469                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
470                   result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
471                                                                indir_vertex_index ? true : false,
472                                                                indir_vertex_index ? indir_vertex_index : vertex_index_val,
473                                                                indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
474                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
475                }
476             }
477          } else if (bld->tcs_iface) {
478             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
479             LLVMValueRef attrib_index_val;
480             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
481 
482             if (indir_index) {
483                if (var->data.compact) {
484                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
485                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
486                } else
487                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
488             } else
489                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
490             result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
491                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
492                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
493                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val);
494             if (bit_size == 64) {
495                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
496                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
497                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
498                                                                        indir_index ? true : false, attrib_index_val,
499                                                                        false, swizzle_index_val);
500                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
501             }
502          } else {
503             if (indir_index) {
504                LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
505                LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
506                                                               attrib_index_val, 4, idx,
507                                                               true);
508                LLVMValueRef index_vec2 = NULL;
509                LLVMTypeRef scalar_type = LLVMFloatTypeInContext(gallivm->context);
510                LLVMValueRef inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, LLVMPointerType(scalar_type, 0), "");
511 
512                if (bit_size == 64)
513                   index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
514                                                      indir_index, 4, idx + 1, true);
515 
516                /* Gather values from the input register array */
517                result[i] = build_gather(bld_base, &bld_base->base, scalar_type, inputs_array, index_vec, NULL, index_vec2);
518             } else {
519                if (bld->indirects & nir_var_shader_in) {
520                   LLVMValueRef lindex = lp_build_const_int32(gallivm,
521                                                              comp_loc * 4 + idx);
522                   LLVMValueRef input_ptr = lp_build_pointer_get2(gallivm->builder,
523                                                                  bld->bld_base.base.vec_type,
524                                                                  bld->inputs_array, lindex);
525                   if (bit_size == 64) {
526                      LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
527                                                                  comp_loc * 4 + (idx + 1));
528                      LLVMValueRef input_ptr2 = lp_build_pointer_get2(gallivm->builder,
529                                                                      bld->bld_base.base.vec_type,
530                                                                      bld->inputs_array, lindex2);
531                      result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
532                   } else {
533                      result[i] = input_ptr;
534                   }
535                } else {
536                   if (bit_size == 64) {
537                      LLVMValueRef tmp[2];
538                      tmp[0] = bld->inputs[comp_loc][idx];
539                      tmp[1] = bld->inputs[comp_loc][idx + 1];
540                      result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
541                   } else {
542                      result[i] = bld->inputs[comp_loc][idx];
543                   }
544                }
545             }
546          }
547       }
548       break;
549    case nir_var_shader_out:
550       if (bld->fs_iface && bld->fs_iface->fb_fetch) {
551          bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
552          return;
553       }
554       for (unsigned i = 0; i < num_components; i++) {
555          int idx = (i * dmul) + location_frac;
556          if (bld->tcs_iface) {
557             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
558             LLVMValueRef attrib_index_val;
559             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
560 
561             if (indir_index)
562                attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
563             else
564                attrib_index_val = lp_build_const_int32(gallivm, location);
565 
566             result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
567                                                           indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
568                                                           (indir_index && !var->data.compact) ? true : false, attrib_index_val,
569                                                           (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
570             if (bit_size == 64) {
571                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
572                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
573                                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
574                                                                         indir_index ? true : false, attrib_index_val,
575                                                                         false, swizzle_index_val, 0);
576                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
577             }
578          }
579       }
580       break;
581    default:
582       break;
583    }
584 }
585 
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)586 static void emit_store_chan(struct lp_build_nir_context *bld_base,
587                             nir_variable_mode deref_mode,
588                             unsigned bit_size,
589                             unsigned location, unsigned comp,
590                             unsigned chan,
591                             LLVMValueRef dst)
592 {
593    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
594    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
595    struct lp_build_context *float_bld = &bld_base->base;
596 
597    if (bit_size == 64) {
598       chan *= 2;
599       chan += comp;
600       if (chan >= 4) {
601          chan -= 4;
602          location++;
603       }
604       emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
605                             bld->outputs[location][chan + 1], dst);
606    } else {
607       dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
608       lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
609                          bld->outputs[location][chan + comp]);
610    }
611 }
612 
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)613 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
614                                 bool is_compact,
615                                 unsigned bit_size,
616                                 unsigned location,
617                                 unsigned const_index,
618                                 LLVMValueRef indir_vertex_index,
619                                 LLVMValueRef indir_index,
620                                 unsigned comp,
621                                 unsigned chan,
622                                 LLVMValueRef chan_val)
623 {
624    struct gallivm_state *gallivm = bld_base->base.gallivm;
625    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
626    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
627    unsigned swizzle = chan;
628    if (bit_size == 64) {
629       swizzle *= 2;
630       swizzle += comp;
631       if (swizzle >= 4) {
632          swizzle -= 4;
633          location++;
634       }
635    } else
636       swizzle += comp;
637    LLVMValueRef attrib_index_val;
638    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
639 
640    if (indir_index) {
641       if (is_compact) {
642          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
643          attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
644       } else
645          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
646    } else
647       attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
648    LLVMValueRef exec_mask = mask_vec(bld_base);
649    if (bit_size == 64) {
650       LLVMValueRef split_vals[2];
651       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
652       emit_store_64bit_split(bld_base, chan_val, split_vals);
653       if (bld->mesh_iface) {
654          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
655                                            indir_vertex_index ? true : false,
656                                            indir_vertex_index,
657                                            indir_index ? true : false,
658                                            attrib_index_val,
659                                            false, swizzle_index_val,
660                                            split_vals[0], exec_mask);
661          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
662                                            indir_vertex_index ? true : false,
663                                            indir_vertex_index,
664                                            indir_index ? true : false,
665                                            attrib_index_val,
666                                            false, swizzle_index_val2,
667                                            split_vals[1], exec_mask);
668       } else {
669          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
670                                            indir_vertex_index ? true : false,
671                                            indir_vertex_index,
672                                            indir_index ? true : false,
673                                            attrib_index_val,
674                                            false, swizzle_index_val,
675                                            split_vals[0], exec_mask);
676          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
677                                            indir_vertex_index ? true : false,
678                                            indir_vertex_index,
679                                            indir_index ? true : false,
680                                            attrib_index_val,
681                                            false, swizzle_index_val2,
682                                            split_vals[1], exec_mask);
683       }
684    } else {
685       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
686       if (bld->mesh_iface) {
687          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
688                                            indir_vertex_index ? true : false,
689                                            indir_vertex_index,
690                                            indir_index && !is_compact ? true : false,
691                                            attrib_index_val,
692                                            indir_index && is_compact ? true : false,
693                                            swizzle_index_val,
694                                            chan_val, exec_mask);
695       } else {
696          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
697                                            indir_vertex_index ? true : false,
698                                            indir_vertex_index,
699                                            indir_index && !is_compact ? true : false,
700                                            attrib_index_val,
701                                            indir_index && is_compact ? true : false,
702                                            swizzle_index_val,
703                                            chan_val, exec_mask);
704       }
705    }
706 }
707 
emit_store_mesh_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)708 static void emit_store_mesh_chan(struct lp_build_nir_context *bld_base,
709                                  bool is_compact,
710                                  unsigned bit_size,
711                                  unsigned location,
712                                  unsigned const_index,
713                                  LLVMValueRef indir_vertex_index,
714                                  LLVMValueRef indir_index,
715                                  unsigned comp,
716                                  unsigned chan,
717                                  LLVMValueRef chan_val)
718 {
719    struct gallivm_state *gallivm = bld_base->base.gallivm;
720    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
721    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
722    unsigned swizzle = chan;
723    if (bit_size == 64) {
724       swizzle += const_index;
725       swizzle *= 2;
726       swizzle += comp;
727       if (swizzle >= 4) {
728          swizzle -= 4;
729          location++;
730       }
731    } else
732       swizzle += comp;
733    LLVMValueRef attrib_index_val;
734    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
735 
736    if (indir_index) {
737       if (is_compact) {
738          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
739          attrib_index_val = lp_build_const_int32(gallivm, location);
740       } else
741          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
742    } else
743       attrib_index_val = lp_build_const_int32(gallivm, location + const_index);
744    LLVMValueRef exec_mask = mask_vec(bld_base);
745    if (bit_size == 64) {
746       LLVMValueRef split_vals[2];
747       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
748       emit_store_64bit_split(bld_base, chan_val, split_vals);
749       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
750                                          indir_vertex_index ? true : false,
751                                          indir_vertex_index,
752                                          indir_index ? true : false,
753                                          attrib_index_val,
754                                          false, swizzle_index_val,
755                                          split_vals[0], exec_mask);
756       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
757                                          indir_vertex_index ? true : false,
758                                          indir_vertex_index,
759                                          indir_index ? true : false,
760                                          attrib_index_val,
761                                          false, swizzle_index_val2,
762                                          split_vals[1], exec_mask);
763    } else {
764       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
765       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
766                                          indir_vertex_index ? true : false,
767                                          indir_vertex_index,
768                                          indir_index && !is_compact ? true : false,
769                                          attrib_index_val,
770                                          indir_index && is_compact ? true : false,
771                                          swizzle_index_val,
772                                          chan_val, exec_mask);
773    }
774 }
775 
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)776 static void emit_store_var(struct lp_build_nir_context *bld_base,
777                            nir_variable_mode deref_mode,
778                            unsigned num_components,
779                            unsigned bit_size,
780                            nir_variable *var,
781                            unsigned writemask,
782                            LLVMValueRef indir_vertex_index,
783                            unsigned const_index,
784                            LLVMValueRef indir_index,
785                            LLVMValueRef dst)
786 {
787    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
788    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
789    switch (deref_mode) {
790    case nir_var_shader_out: {
791       unsigned location = var->data.driver_location;
792       unsigned comp = var->data.location_frac;
793       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
794          if (var->data.location == FRAG_RESULT_STENCIL)
795             comp = 1;
796          else if (var->data.location == FRAG_RESULT_DEPTH)
797             comp = 2;
798       }
799 
800       if (var->data.compact) {
801          location += const_index / 4;
802          comp += const_index % 4;
803          const_index = 0;
804       }
805 
806       for (unsigned chan = 0; chan < num_components; chan++) {
807          if (writemask & (1u << chan)) {
808             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
809             if (bld->mesh_iface) {
810                emit_store_mesh_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
811             } else if (bld->tcs_iface) {
812                emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
813             } else
814                emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
815          }
816       }
817       break;
818    }
819    default:
820       break;
821    }
822 }
823 
824 /**
825  * Returns the address of the given constant array index and channel in a
826  * nir register.
827  */
reg_chan_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,LLVMValueRef reg_storage,int array_index,int chan)828 static LLVMValueRef reg_chan_pointer(struct lp_build_nir_context *bld_base,
829                                            struct lp_build_context *reg_bld,
830                                            const nir_intrinsic_instr *decl,
831                                            LLVMValueRef reg_storage,
832                                            int array_index, int chan)
833 {
834    struct gallivm_state *gallivm = bld_base->base.gallivm;
835    int nc = nir_intrinsic_num_components(decl);
836    int num_array_elems = nir_intrinsic_num_array_elems(decl);
837 
838    LLVMTypeRef chan_type = reg_bld->vec_type;
839    if (nc > 1)
840       chan_type = LLVMArrayType(chan_type, nc);
841 
842    if (num_array_elems > 0) {
843       LLVMTypeRef array_type = LLVMArrayType(chan_type, num_array_elems);
844       reg_storage = lp_build_array_get_ptr2(gallivm, array_type, reg_storage,
845                                             lp_build_const_int32(gallivm, array_index));
846    }
847    if (nc > 1) {
848       reg_storage = lp_build_array_get_ptr2(gallivm, chan_type, reg_storage,
849                                             lp_build_const_int32(gallivm, chan));
850    }
851 
852    return reg_storage;
853 }
854 
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage)855 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
856                                   struct lp_build_context *reg_bld,
857                                   const nir_intrinsic_instr *decl,
858                                   unsigned base,
859                                   LLVMValueRef indir_src,
860                                   LLVMValueRef reg_storage)
861 {
862    struct gallivm_state *gallivm = bld_base->base.gallivm;
863    LLVMBuilderRef builder = gallivm->builder;
864    int nc = nir_intrinsic_num_components(decl);
865    int num_array_elems = nir_intrinsic_num_array_elems(decl);
866    LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
867    struct lp_build_context *uint_bld = &bld_base->uint_bld;
868    if (indir_src != NULL) {
869       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
870       LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
871       indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
872       indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
873       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
874       for (unsigned i = 0; i < nc; i++) {
875          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
876          vals[i] = build_gather(bld_base, reg_bld, reg_bld->elem_type, reg_storage, indirect_offset, NULL, NULL);
877       }
878    } else {
879       for (unsigned i = 0; i < nc; i++) {
880          vals[i] = LLVMBuildLoad2(builder, reg_bld->vec_type,
881                                   reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
882                                                    base, i), "");
883       }
884    }
885    return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
886 }
887 
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned writemask,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])888 static void emit_store_reg(struct lp_build_nir_context *bld_base,
889                            struct lp_build_context *reg_bld,
890                            const nir_intrinsic_instr *decl,
891                            unsigned writemask,
892                            unsigned base,
893                            LLVMValueRef indir_src,
894                            LLVMValueRef reg_storage,
895                            LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
896 {
897    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
898    struct gallivm_state *gallivm = bld_base->base.gallivm;
899    LLVMBuilderRef builder = gallivm->builder;
900    struct lp_build_context *uint_bld = &bld_base->uint_bld;
901    int nc = nir_intrinsic_num_components(decl);
902    int num_array_elems = nir_intrinsic_num_array_elems(decl);
903    if (indir_src != NULL) {
904       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
905       LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
906       indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
907       indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
908       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
909       for (unsigned i = 0; i < nc; i++) {
910          if (!(writemask & (1 << i)))
911             continue;
912          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
913          dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
914          emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
915       }
916       return;
917    }
918 
919    for (unsigned i = 0; i < nc; i++) {
920       if (!(writemask & (1 << i)))
921          continue;
922       dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
923       lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i],
924                          reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
925                                           base, i));
926    }
927 }
928 
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])929 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
930                                  unsigned nc,
931                                  unsigned bit_size,
932                                  unsigned offset_bit_size,
933                                  bool offset_is_uniform,
934                                  LLVMValueRef offset,
935                                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
936 {
937    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
938    struct gallivm_state *gallivm = bld_base->base.gallivm;
939    LLVMBuilderRef builder = gallivm->builder;
940    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
941    LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
942    unsigned size_shift = bit_size_to_shift_size(bit_size);
943    struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
944    if (size_shift)
945       offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
946 
947    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
948    kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
949 
950    if (offset_is_uniform) {
951       offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
952 
953       for (unsigned c = 0; c < nc; c++) {
954          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
955 
956          LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset);
957          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
958       }
959    } else {
960       unreachable("load_kernel_arg must have a uniform offset.");
961    }
962 }
963 
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)964 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
965 {
966    LLVMBuilderRef builder = gallivm->builder;
967    switch (bit_size) {
968    case 8:
969       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
970       break;
971    case 16:
972       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
973       break;
974    case 32:
975    default:
976       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
977       break;
978    case 64:
979       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
980       break;
981    }
982    return addr_ptr;
983 }
984 
global_addr_to_ptr_vec(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned length,unsigned bit_size)985 static LLVMValueRef global_addr_to_ptr_vec(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned length, unsigned bit_size)
986 {
987    LLVMBuilderRef builder = gallivm->builder;
988    switch (bit_size) {
989    case 8:
990       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), length), "");
991       break;
992    case 16:
993       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), length), "");
994       break;
995    case 32:
996    default:
997       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), length), "");
998       break;
999    case 64:
1000       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), length), "");
1001       break;
1002    }
1003    return addr_ptr;
1004 }
1005 
lp_vec_add_offset_ptr(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef ptr,LLVMValueRef offset)1006 static LLVMValueRef lp_vec_add_offset_ptr(struct lp_build_nir_context *bld_base,
1007                                           unsigned bit_size,
1008                                           LLVMValueRef ptr,
1009                                           LLVMValueRef offset)
1010 {
1011    unsigned pointer_size = 8 * sizeof(void *);
1012    struct gallivm_state *gallivm = bld_base->base.gallivm;
1013    LLVMBuilderRef builder = gallivm->builder;
1014    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1015    struct lp_build_context *ptr_bld = get_int_bld(bld_base, true, pointer_size);
1016    LLVMValueRef result = LLVMBuildPtrToInt(builder, ptr, ptr_bld->vec_type, "");
1017    if (pointer_size == 64)
1018       offset = LLVMBuildZExt(builder, offset, ptr_bld->vec_type, "");
1019    result = LLVMBuildAdd(builder, offset, result, "");
1020    return global_addr_to_ptr_vec(gallivm, result, uint_bld->type.length, bit_size);
1021 }
1022 
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1023 static void emit_load_global(struct lp_build_nir_context *bld_base,
1024                              unsigned nc,
1025                              unsigned bit_size,
1026                              unsigned addr_bit_size,
1027                              bool offset_is_uniform,
1028                              LLVMValueRef addr,
1029                              LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1030 {
1031    struct gallivm_state *gallivm = bld_base->base.gallivm;
1032    LLVMBuilderRef builder = gallivm->builder;
1033    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1034    struct lp_build_context *res_bld;
1035    LLVMValueRef exec_mask = mask_vec(bld_base);
1036 
1037    res_bld = get_int_bld(bld_base, true, bit_size);
1038 
1039    /* Note, we don't use first_active_invocation here, since we aren't
1040     * guaranteed that there is actually an active invocation.
1041     */
1042    if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1043       /* If the offset is uniform, then use the address from invocation 0 to
1044        * load, and broadcast to all invocations.
1045        */
1046       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1047                                                       lp_build_const_int32(gallivm, 0), "");
1048       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
1049 
1050       for (unsigned c = 0; c < nc; c++) {
1051          LLVMValueRef scalar = lp_build_pointer_get2(builder, res_bld->elem_type,
1052                                                      addr_ptr, lp_build_const_int32(gallivm, c));
1053          outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
1054       }
1055       return;
1056    }
1057 
1058    for (unsigned c = 0; c < nc; c++) {
1059       LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1060 
1061       outval[c] = lp_build_masked_gather(gallivm, res_bld->type.length,
1062                                          bit_size,
1063                                          res_bld->vec_type,
1064                                          lp_vec_add_offset_ptr(bld_base, bit_size, addr, chan_offset),
1065                                          exec_mask);
1066       outval[c] = LLVMBuildBitCast(builder, outval[c], res_bld->vec_type, "");
1067    }
1068 }
1069 
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)1070 static void emit_store_global(struct lp_build_nir_context *bld_base,
1071                               unsigned writemask,
1072                               unsigned nc, unsigned bit_size,
1073                               unsigned addr_bit_size,
1074                               LLVMValueRef addr,
1075                               LLVMValueRef dst)
1076 {
1077    struct gallivm_state *gallivm = bld_base->base.gallivm;
1078    LLVMBuilderRef builder = gallivm->builder;
1079    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1080    LLVMValueRef exec_mask = mask_vec(bld_base);
1081 
1082    for (unsigned c = 0; c < nc; c++) {
1083       if (!(writemask & (1u << c)))
1084          continue;
1085       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1086       LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1087 
1088       struct lp_build_context *out_bld = get_int_bld(bld_base, false, bit_size);
1089       val = LLVMBuildBitCast(builder, val, out_bld->vec_type, "");
1090       lp_build_masked_scatter(gallivm, out_bld->type.length, bit_size,
1091                               lp_vec_add_offset_ptr(bld_base, bit_size,
1092                                                     addr, chan_offset),
1093                               val, exec_mask);
1094    }
1095 }
1096 
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1097 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
1098                                nir_atomic_op nir_op,
1099                                unsigned addr_bit_size,
1100                                unsigned val_bit_size,
1101                                LLVMValueRef addr,
1102                                LLVMValueRef val, LLVMValueRef val2,
1103                                LLVMValueRef *result)
1104 {
1105    struct gallivm_state *gallivm = bld_base->base.gallivm;
1106    LLVMBuilderRef builder = gallivm->builder;
1107    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1108    bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
1109    struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
1110    if (is_flt)
1111       val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
1112 
1113    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1114                                            atom_bld->vec_type, "");
1115    LLVMValueRef exec_mask = mask_vec(bld_base);
1116    struct lp_build_loop_state loop_state;
1117    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1118 
1119    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1120                                                     loop_state.counter, "");
1121    value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atom_bld->elem_type, "");
1122 
1123    LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1124                                                    loop_state.counter, "");
1125    addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
1126    struct lp_build_if_state ifthen;
1127    LLVMValueRef cond, temp_res;
1128    LLVMValueRef scalar;
1129    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1130    cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1131    lp_build_if(&ifthen, gallivm, cond);
1132 
1133    addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
1134    if (val2 != NULL /* compare-and-swap */) {
1135       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1136                                                          loop_state.counter, "");
1137       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
1138       scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
1139                                       cas_src_ptr,
1140                                       LLVMAtomicOrderingSequentiallyConsistent,
1141                                       LLVMAtomicOrderingSequentiallyConsistent,
1142                                       false);
1143       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1144    } else {
1145       scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1146                                   addr_ptr, value_ptr,
1147                                   LLVMAtomicOrderingSequentiallyConsistent,
1148                                   false);
1149    }
1150    temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1151    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1152    LLVMBuildStore(builder, temp_res, atom_res);
1153    lp_build_else(&ifthen);
1154    temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1155    LLVMValueRef zero_val = lp_build_zero_bits(gallivm, val_bit_size, is_flt);
1156    temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1157    LLVMBuildStore(builder, temp_res, atom_res);
1158    lp_build_endif(&ifthen);
1159    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1160                           NULL, LLVMIntUGE);
1161    *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1162 }
1163 
1164 /* Returns a boolean for whether the offset is in range of the given limit for
1165  * SSBO/UBO dereferences.
1166  */
1167 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1168 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1169                    LLVMValueRef offset,
1170                    LLVMValueRef limit)
1171 {
1172    struct gallivm_state *gallivm = bld_base->base.gallivm;
1173    LLVMBuilderRef builder = gallivm->builder;
1174 
1175    LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1176    LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1177    LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1178    return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1179 }
1180 
1181 static LLVMValueRef
build_resource_to_scalar(struct lp_build_nir_context * bld_base,LLVMValueRef resource)1182 build_resource_to_scalar(struct lp_build_nir_context *bld_base, LLVMValueRef resource)
1183 {
1184    struct gallivm_state *gallivm = bld_base->base.gallivm;
1185 
1186    LLVMValueRef invocation = first_active_invocation(bld_base);
1187 
1188    LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, resource, 0, "");
1189    set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1190 
1191    LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, resource, 1, "");
1192    binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1193 
1194    LLVMValueRef components[2] = { set, binding };
1195    return lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1196 }
1197 
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1198 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1199                           unsigned nc,
1200                           unsigned bit_size,
1201                           bool offset_is_uniform,
1202                           LLVMValueRef index,
1203                           LLVMValueRef offset,
1204                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1205 {
1206    if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind)
1207       index = build_resource_to_scalar(bld_base, index);
1208 
1209    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1210    struct gallivm_state *gallivm = bld_base->base.gallivm;
1211    LLVMBuilderRef builder = gallivm->builder;
1212    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1213    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1214    LLVMValueRef consts_ptr = lp_llvm_buffer_base(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1215    LLVMValueRef num_consts = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1216    unsigned size_shift = bit_size_to_shift_size(bit_size);
1217    if (size_shift)
1218       offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1219 
1220    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1221    consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1222 
1223    if (offset_is_uniform) {
1224       offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
1225       struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1226       switch (bit_size) {
1227       case 8:
1228          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1229          break;
1230       case 16:
1231          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1232          break;
1233       case 64:
1234          num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1235          break;
1236       default: break;
1237       }
1238       for (unsigned c = 0; c < nc; c++) {
1239          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1240 
1241          LLVMValueRef scalar;
1242          /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1243          LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1244          LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1245          LLVMBuildStore(builder, zero, res_store);
1246 
1247          struct lp_build_if_state ifthen;
1248          lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1249          LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type,
1250                                                        consts_ptr, chan_offset), res_store);
1251          lp_build_endif(&ifthen);
1252 
1253          scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1254 
1255          result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1256       }
1257    } else {
1258       LLVMValueRef overflow_mask;
1259 
1260       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1261       if (bit_size == 64)
1262          num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1263       else if (bit_size == 16)
1264          num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1265       else if (bit_size == 8)
1266          num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1267 
1268       for (unsigned c = 0; c < nc; c++) {
1269          LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1270          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1271                                           this_offset, num_consts);
1272          result[c] = build_gather(bld_base, bld_broad, bld_broad->elem_type, consts_ptr, this_offset, overflow_mask, NULL);
1273       }
1274    }
1275 }
1276 
1277 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1278 emit_load_const(struct lp_build_nir_context *bld_base,
1279                 const nir_load_const_instr *instr,
1280                 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1281 {
1282    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1283    const unsigned bits = instr->def.bit_size;
1284 
1285    for (unsigned i = 0; i < instr->def.num_components; i++) {
1286      outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1287                                         bits == 32 ? instr->value[i].u32
1288                                                    : instr->value[i].u64);
1289    }
1290    for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1291       outval[i] = NULL;
1292    }
1293 }
1294 
1295 /**
1296  * Get the base address of SSBO[@index] for the @invocation channel, returning
1297  * the address and also the bounds (in units of the bit_size).
1298  */
1299 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1300 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1301                   unsigned bit_size,
1302                   LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1303 {
1304    struct gallivm_state *gallivm = bld_base->base.gallivm;
1305    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1306    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1307 
1308    LLVMValueRef ssbo_idx;
1309    LLVMValueRef buffers;
1310    uint32_t buffers_limit;
1311    if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) {
1312       LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, index, 0, "");
1313       set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1314 
1315       LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, index, 1, "");
1316       binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1317 
1318       LLVMValueRef components[2] = { set, binding };
1319       ssbo_idx = lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1320 
1321       buffers = bld->consts_ptr;
1322       buffers_limit = LP_MAX_TGSI_CONST_BUFFERS;
1323    } else {
1324       ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1325 
1326       buffers = bld->ssbo_ptr;
1327       buffers_limit = LP_MAX_TGSI_SHADER_BUFFERS;
1328    }
1329 
1330    LLVMValueRef ssbo_size_ptr = lp_llvm_buffer_num_elements(gallivm, buffers, ssbo_idx, buffers_limit);
1331    LLVMValueRef ssbo_ptr = lp_llvm_buffer_base(gallivm, buffers, ssbo_idx, buffers_limit);
1332    if (bounds)
1333       *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1334 
1335    return ssbo_ptr;
1336 }
1337 
1338 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,bool payload,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1339 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1340                         struct lp_build_context *mem_bld,
1341                         unsigned bit_size, bool payload,
1342                         LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1343 {
1344    struct gallivm_state *gallivm = bld_base->base.gallivm;
1345    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1346    LLVMValueRef ptr;
1347 
1348    if (index) {
1349       ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1350    } else {
1351       if (payload) {
1352          ptr = bld->payload_ptr;
1353          ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
1354          ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
1355          ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1356       }
1357       else
1358          ptr = bld->shared_ptr;
1359       *bounds = NULL;
1360    }
1361 
1362    /* Cast it to the pointer type of the access this instruction is doing. */
1363    if (bit_size == 32 && !mem_bld->type.floating)
1364       return ptr;
1365    else
1366       return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1367 }
1368 
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1369 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1370                           unsigned nc,
1371                           unsigned bit_size,
1372                           bool index_and_offset_are_uniform,
1373                           bool payload,
1374                           LLVMValueRef index,
1375                           LLVMValueRef offset,
1376                           LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1377 {
1378    struct gallivm_state *gallivm = bld_base->base.gallivm;
1379    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1380    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1381    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1382    struct lp_build_context *load_bld;
1383    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1384 
1385    load_bld = get_int_bld(bld_base, true, bit_size);
1386 
1387    offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1388 
1389    /* If the address is uniform, then use the address from the first active
1390     * invocation 0 to load, and broadcast to all invocations.  We can't do
1391     * computed first active invocation for shared accesses (index == NULL),
1392     * though, since those don't do bounds checking and we could use an invalid
1393     * offset if exec_mask == 0.
1394     */
1395    if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) {
1396       LLVMValueRef ssbo_limit;
1397       LLVMValueRef first_active = first_active_invocation(bld_base);
1398       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1399                                                      first_active, &ssbo_limit);
1400 
1401       offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, "");
1402 
1403       for (unsigned c = 0; c < nc; c++) {
1404          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1405 
1406          LLVMValueRef scalar;
1407          /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1408          if (ssbo_limit) {
1409             LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1410             LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1411             LLVMBuildStore(builder, zero, res_store);
1412 
1413             struct lp_build_if_state ifthen;
1414             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1415             LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store);
1416             lp_build_endif(&ifthen);
1417 
1418             scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1419          } else {
1420             scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset);
1421          }
1422 
1423          outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1424       }
1425       return;
1426    }
1427 
1428    /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1429 
1430    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1431    for (unsigned c = 0; c < nc; c++)
1432       result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1433 
1434    LLVMValueRef exec_mask = mask_vec(bld_base);
1435 
1436    /* mask the offset to prevent invalid reads */
1437    offset = LLVMBuildAnd(gallivm->builder, offset, exec_mask, "");
1438 
1439    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1440       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1441 
1442       LLVMValueRef ssbo_limit;
1443       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1444                                                      counter, &ssbo_limit);
1445 
1446       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1447 
1448       for (unsigned c = 0; c < nc; c++) {
1449          LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1450          LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1451          if (ssbo_limit) {
1452             LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1453             do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1454          }
1455 
1456          struct lp_build_if_state ifthen;
1457          LLVMValueRef fetch_cond, temp_res;
1458 
1459          fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1460 
1461          lp_build_if(&ifthen, gallivm, fetch_cond);
1462          LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index);
1463 
1464          temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1465          temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1466          LLVMBuildStore(builder, temp_res, result[c]);
1467          lp_build_else(&ifthen);
1468          temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1469          LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1470          temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1471          LLVMBuildStore(builder, temp_res, result[c]);
1472          lp_build_endif(&ifthen);
1473       }
1474    }
1475    for (unsigned c = 0; c < nc; c++)
1476       outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1477 
1478 }
1479 
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1480 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1481                            unsigned writemask,
1482                            unsigned nc,
1483                            unsigned bit_size,
1484                            bool index_and_offset_are_uniform,
1485                            bool payload,
1486                            LLVMValueRef index,
1487                            LLVMValueRef offset,
1488                            LLVMValueRef dst)
1489 {
1490    struct gallivm_state *gallivm = bld_base->base.gallivm;
1491    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1492    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1493    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1494    struct lp_build_context *store_bld;
1495    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1496    store_bld = get_int_bld(bld_base, true, bit_size);
1497 
1498    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1499 
1500    /* If the address is uniform, then just store the value from the first
1501     * channel instead of making LLVM unroll the invocation loop.  Note that we
1502     * don't use first_active_uniform(), since we aren't guaranteed that there is
1503     * actually an active invocation.
1504     */
1505    if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1506       LLVMValueRef ssbo_limit;
1507       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1508                                                      lp_build_const_int32(gallivm, 0), &ssbo_limit);
1509 
1510       offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1511 
1512       for (unsigned c = 0; c < nc; c++) {
1513          if (!(writemask & (1u << c)))
1514             continue;
1515 
1516          /* Pick out invocation 0's value. */
1517          LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1518          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1519                                                          lp_build_const_int32(gallivm, 0), "");
1520          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1521 
1522          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1523 
1524          /* If storing outside the SSBO, we need to skip the store instead. */
1525          if (ssbo_limit) {
1526             struct lp_build_if_state ifthen;
1527             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1528             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1529             lp_build_endif(&ifthen);
1530          } else {
1531             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1532          }
1533       }
1534       return;
1535    }
1536 
1537    LLVMValueRef exec_mask = mask_vec(bld_base);
1538    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1539    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1540       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1541       LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1542 
1543       struct lp_build_if_state exec_ifthen;
1544       lp_build_if(&exec_ifthen, gallivm, loop_cond);
1545 
1546       LLVMValueRef ssbo_limit;
1547       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1548                                                      counter, &ssbo_limit);
1549 
1550       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1551 
1552       for (unsigned c = 0; c < nc; c++) {
1553          if (!(writemask & (1u << c)))
1554             continue;
1555          LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1556          LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1557          LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1558 
1559          if (ssbo_limit) {
1560             LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1561             do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1562          }
1563 
1564          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1565                                                           counter, "");
1566          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1567          struct lp_build_if_state ifthen;
1568          LLVMValueRef store_cond;
1569 
1570          store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1571          lp_build_if(&ifthen, gallivm, store_cond);
1572          lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1573          lp_build_endif(&ifthen);
1574       }
1575 
1576       lp_build_endif(&exec_ifthen);
1577    }
1578 }
1579 
1580 
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,uint32_t bit_size,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1581 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1582                             nir_atomic_op nir_op,
1583                             uint32_t bit_size,
1584                             bool payload,
1585                             LLVMValueRef index, LLVMValueRef offset,
1586                             LLVMValueRef val, LLVMValueRef val2,
1587                             LLVMValueRef *result)
1588 {
1589    struct gallivm_state *gallivm = bld_base->base.gallivm;
1590    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1591    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1592    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1593    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1594    bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
1595    struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
1596 
1597    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1598    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1599                                            atomic_bld->vec_type, "");
1600 
1601    LLVMValueRef exec_mask = mask_vec(bld_base);
1602    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1603    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1604       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1605       LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1606 
1607       struct lp_build_if_state exec_ifthen;
1608       lp_build_if(&exec_ifthen, gallivm, loop_cond);
1609 
1610       LLVMValueRef ssbo_limit;
1611       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
1612                                                      counter, &ssbo_limit);
1613 
1614       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1615 
1616       LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1617       if (ssbo_limit) {
1618          LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1619          do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1620       }
1621 
1622       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1623                                                        counter, "");
1624       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1625 
1626       LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1627 
1628       struct lp_build_if_state ifthen;
1629       LLVMValueRef inner_cond, temp_res;
1630       LLVMValueRef scalar;
1631 
1632       inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1633       lp_build_if(&ifthen, gallivm, inner_cond);
1634 
1635       if (val2 != NULL) {
1636          LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1637                                                             counter, "");
1638          cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1639          scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1640                                          cas_src_ptr,
1641                                          LLVMAtomicOrderingSequentiallyConsistent,
1642                                          LLVMAtomicOrderingSequentiallyConsistent,
1643                                          false);
1644          scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1645       } else {
1646          scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1647                                      scalar_ptr, value_ptr,
1648                                      LLVMAtomicOrderingSequentiallyConsistent,
1649                                      false);
1650       }
1651       temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1652       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1653       LLVMBuildStore(builder, temp_res, atom_res);
1654       lp_build_else(&ifthen);
1655       temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1656       LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
1657       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1658       LLVMBuildStore(builder, temp_res, atom_res);
1659       lp_build_endif(&ifthen);
1660 
1661       lp_build_endif(&exec_ifthen);
1662    }
1663    *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1664 }
1665 
emit_barrier(struct lp_build_nir_context * bld_base)1666 static void emit_barrier(struct lp_build_nir_context *bld_base)
1667 {
1668    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1669    struct gallivm_state * gallivm = bld_base->base.gallivm;
1670 
1671    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1672 
1673    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1674    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1675 }
1676 
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1677 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1678                                        LLVMValueRef index)
1679 {
1680    struct lp_build_context *bld_broad = &bld_base->uint_bld;
1681 
1682    LLVMValueRef size;
1683    ssbo_base_pointer(bld_base, 8, index, first_active_invocation(bld_base), &size);
1684 
1685    return lp_build_broadcast_scalar(bld_broad, size);
1686 }
1687 
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1688 static void emit_image_op(struct lp_build_nir_context *bld_base,
1689                           struct lp_img_params *params)
1690 {
1691    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1692    struct gallivm_state *gallivm = bld_base->base.gallivm;
1693 
1694    params->type = bld_base->base.type;
1695    params->resources_type = bld->resources_type;
1696    params->resources_ptr = bld->resources_ptr;
1697    params->thread_data_type = bld->thread_data_type;
1698    params->thread_data_ptr = bld->thread_data_ptr;
1699    params->exec_mask = mask_vec(bld_base);
1700 
1701    if (params->image_index_offset)
1702       params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1703                                                            first_active_invocation(bld_base), "");
1704 
1705    if (params->resource)
1706       params->resource = build_resource_to_scalar(bld_base, params->resource);
1707 
1708    bld->image->emit_op(bld->image,
1709                        bld->bld_base.base.gallivm,
1710                        params);
1711 
1712 }
1713 
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1714 static void emit_image_size(struct lp_build_nir_context *bld_base,
1715                             struct lp_sampler_size_query_params *params)
1716 {
1717    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1718    struct gallivm_state *gallivm = bld_base->base.gallivm;
1719 
1720    params->int_type = bld_base->int_bld.type;
1721    params->resources_type = bld->resources_type;
1722    params->resources_ptr = bld->resources_ptr;
1723    if (params->texture_unit_offset)
1724       params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1725                                                             first_active_invocation(bld_base), "");
1726    bld->image->emit_size_query(bld->image,
1727                                bld->bld_base.base.gallivm,
1728                                params);
1729 
1730 }
1731 
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1732 static void init_var_slots(struct lp_build_nir_context *bld_base,
1733                            nir_variable *var, unsigned sc)
1734 {
1735    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1736    unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1737 
1738    if (!bld->outputs)
1739      return;
1740    for (unsigned comp = sc; comp < slots + sc; comp++) {
1741       unsigned this_loc = var->data.driver_location + (comp / 4);
1742       unsigned this_chan = comp % 4;
1743 
1744       if (!bld->outputs[this_loc][this_chan])
1745          bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1746                                                              bld_base->base.vec_type, "output");
1747    }
1748 }
1749 
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1750 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1751                           nir_variable *var)
1752 {
1753    unsigned sc = var->data.location_frac;
1754    switch (var->data.mode) {
1755    case nir_var_shader_out: {
1756       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1757          if (var->data.location == FRAG_RESULT_STENCIL)
1758             sc = 1;
1759          else if (var->data.location == FRAG_RESULT_DEPTH)
1760             sc = 2;
1761       }
1762       init_var_slots(bld_base, var, sc);
1763       break;
1764    }
1765    default:
1766       break;
1767    }
1768 }
1769 
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1770 static void emit_tex(struct lp_build_nir_context *bld_base,
1771                      struct lp_sampler_params *params)
1772 {
1773    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1774    struct gallivm_state *gallivm = bld_base->base.gallivm;
1775 
1776    params->type = bld_base->base.type;
1777    params->resources_type = bld->resources_type;
1778    params->resources_ptr = bld->resources_ptr;
1779    params->thread_data_type = bld->thread_data_type;
1780    params->thread_data_ptr = bld->thread_data_ptr;
1781    params->exec_mask = mask_vec(bld_base);
1782 
1783    if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1784       /* this is horrible but this can be dynamic */
1785       LLVMValueRef coords[5];
1786       LLVMValueRef *orig_texel_ptr;
1787       struct lp_build_context *uint_bld = &bld_base->uint_bld;
1788       LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1789                                  LLVMGetUndef(bld_base->base.vec_type),
1790                                  LLVMGetUndef(bld_base->base.vec_type),
1791                                  LLVMGetUndef(bld_base->base.vec_type) };
1792       LLVMValueRef texel[4], orig_offset, orig_lod;
1793       unsigned i;
1794       orig_texel_ptr = params->texel;
1795       orig_lod = params->lod;
1796       for (i = 0; i < 5; i++) {
1797          coords[i] = params->coords[i];
1798       }
1799       orig_offset = params->texture_index_offset;
1800 
1801       for (unsigned v = 0; v < uint_bld->type.length; v++) {
1802          LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1803          LLVMValueRef new_coords[5];
1804          for (i = 0; i < 5; i++) {
1805             new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1806                                                     coords[i], idx, "");
1807          }
1808          params->coords = new_coords;
1809          params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1810                                                                 orig_offset,
1811                                                                 idx, "");
1812          params->type = lp_elem_type(bld_base->base.type);
1813 
1814          if (orig_lod)
1815             params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1816          params->texel = texel;
1817          bld->sampler->emit_tex_sample(bld->sampler,
1818                                        gallivm,
1819                                        params);
1820 
1821          for (i = 0; i < 4; i++) {
1822             result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1823          }
1824       }
1825       for (i = 0; i < 4; i++) {
1826          orig_texel_ptr[i] = result[i];
1827       }
1828       return;
1829    }
1830 
1831    if (params->texture_index_offset) {
1832       params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1833                                                              first_active_invocation(bld_base), "");
1834    }
1835 
1836    if (params->texture_resource)
1837       params->texture_resource = build_resource_to_scalar(bld_base, params->texture_resource);
1838 
1839    if (params->sampler_resource)
1840       params->sampler_resource = build_resource_to_scalar(bld_base, params->sampler_resource);
1841 
1842    params->type = bld_base->base.type;
1843    bld->sampler->emit_tex_sample(bld->sampler,
1844                                  bld->bld_base.base.gallivm,
1845                                  params);
1846 }
1847 
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1848 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1849                           struct lp_sampler_size_query_params *params)
1850 {
1851    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1852 
1853    params->int_type = bld_base->int_bld.type;
1854    params->resources_type = bld->resources_type;
1855    params->resources_ptr = bld->resources_ptr;
1856    if (params->texture_unit_offset)
1857       params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1858                                                              params->texture_unit_offset,
1859                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1860 
1861    params->exec_mask = mask_vec(bld_base);
1862    if (params->resource)
1863       params->resource = build_resource_to_scalar(bld_base, params->resource);
1864 
1865    bld->sampler->emit_size_query(bld->sampler,
1866                                  bld->bld_base.base.gallivm,
1867                                  params);
1868 }
1869 
get_local_invocation_index(struct lp_build_nir_soa_context * bld)1870 static LLVMValueRef get_local_invocation_index(struct lp_build_nir_soa_context *bld)
1871 {
1872    struct lp_build_nir_context *bld_base = &bld->bld_base;
1873    LLVMValueRef tmp, tmp2;
1874 
1875    tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[1]);
1876    tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[0]);
1877    tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1878    tmp = lp_build_mul(&bld_base->uint_bld, tmp, bld->system_values.thread_id[2]);
1879 
1880    tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, bld->system_values.thread_id[1]);
1881    tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1882    tmp = lp_build_add(&bld_base->uint_bld, tmp, bld->system_values.thread_id[0]);
1883    return tmp;
1884 }
1885 
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1886 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1887                                nir_intrinsic_instr *instr,
1888                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1889 {
1890    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1891    struct gallivm_state *gallivm = bld_base->base.gallivm;
1892    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->def.bit_size);
1893    switch (instr->intrinsic) {
1894    case nir_intrinsic_load_instance_id:
1895       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1896       break;
1897    case nir_intrinsic_load_base_instance:
1898       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1899       break;
1900    case nir_intrinsic_load_base_vertex:
1901       result[0] = bld->system_values.basevertex;
1902       break;
1903    case nir_intrinsic_load_first_vertex:
1904       result[0] = bld->system_values.firstvertex;
1905       break;
1906    case nir_intrinsic_load_vertex_id:
1907       result[0] = bld->system_values.vertex_id;
1908       break;
1909    case nir_intrinsic_load_primitive_id:
1910       result[0] = bld->system_values.prim_id;
1911       break;
1912    case nir_intrinsic_load_workgroup_id: {
1913       LLVMValueRef tmp[3];
1914       for (unsigned i = 0; i < 3; i++) {
1915          tmp[i] = bld->system_values.block_id[i];
1916          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1917       }
1918       break;
1919    }
1920    case nir_intrinsic_load_local_invocation_id:
1921       for (unsigned i = 0; i < 3; i++)
1922          result[i] = bld->system_values.thread_id[i];
1923       break;
1924    case nir_intrinsic_load_local_invocation_index:
1925       result[0] = get_local_invocation_index(bld);
1926       break;
1927    case nir_intrinsic_load_num_workgroups: {
1928       LLVMValueRef tmp[3];
1929       for (unsigned i = 0; i < 3; i++) {
1930          tmp[i] = bld->system_values.grid_size[i];
1931          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1932       }
1933       break;
1934    }
1935    case nir_intrinsic_load_invocation_id:
1936       if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1937          result[0] = bld->system_values.invocation_id;
1938       else
1939          result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1940       break;
1941    case nir_intrinsic_load_front_face:
1942       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1943       break;
1944    case nir_intrinsic_load_draw_id:
1945       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1946       break;
1947    default:
1948       break;
1949    case nir_intrinsic_load_workgroup_size:
1950      for (unsigned i = 0; i < 3; i++)
1951        result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[i]);
1952      break;
1953    case nir_intrinsic_load_work_dim:
1954       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1955       break;
1956    case nir_intrinsic_load_tess_coord:
1957       for (unsigned i = 0; i < 3; i++) {
1958 	 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1959       }
1960       break;
1961    case nir_intrinsic_load_tess_level_outer:
1962       for (unsigned i = 0; i < 4; i++)
1963          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1964       break;
1965    case nir_intrinsic_load_tess_level_inner:
1966       for (unsigned i = 0; i < 2; i++)
1967          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1968       break;
1969    case nir_intrinsic_load_patch_vertices_in:
1970       result[0] = bld->system_values.vertices_in;
1971       break;
1972    case nir_intrinsic_load_sample_id:
1973       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1974       break;
1975    case nir_intrinsic_load_sample_pos:
1976       for (unsigned i = 0; i < 2; i++) {
1977          LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1978          idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1979          LLVMValueRef val = lp_build_array_get2(gallivm, bld->system_values.sample_pos_type,
1980                                                 bld->system_values.sample_pos, idx);
1981          result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1982       }
1983       break;
1984    case nir_intrinsic_load_sample_mask_in:
1985       result[0] = bld->system_values.sample_mask_in;
1986       break;
1987    case nir_intrinsic_load_view_index:
1988       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1989       break;
1990    case nir_intrinsic_load_subgroup_invocation: {
1991       LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1992       for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1993          elems[i] = lp_build_const_int32(gallivm, i);
1994       result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1995       break;
1996    }
1997    case nir_intrinsic_load_subgroup_id:
1998       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1999       break;
2000    case nir_intrinsic_load_num_subgroups:
2001       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
2002       break;
2003    }
2004 }
2005 
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)2006 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
2007                                    LLVMValueRef *dst)
2008 {
2009    struct gallivm_state *gallivm = bld_base->base.gallivm;
2010    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2011    *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
2012 }
2013 
bgnloop(struct lp_build_nir_context * bld_base)2014 static void bgnloop(struct lp_build_nir_context *bld_base)
2015 {
2016    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2017    lp_exec_bgnloop(&bld->exec_mask, true);
2018 }
2019 
endloop(struct lp_build_nir_context * bld_base)2020 static void endloop(struct lp_build_nir_context *bld_base)
2021 {
2022    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2023    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
2024 }
2025 
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2026 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2027 {
2028    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2029    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2030    lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
2031 }
2032 
else_stmt(struct lp_build_nir_context * bld_base)2033 static void else_stmt(struct lp_build_nir_context *bld_base)
2034 {
2035    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2036    lp_exec_mask_cond_invert(&bld->exec_mask);
2037 }
2038 
endif_stmt(struct lp_build_nir_context * bld_base)2039 static void endif_stmt(struct lp_build_nir_context *bld_base)
2040 {
2041    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2042    lp_exec_mask_cond_pop(&bld->exec_mask);
2043 }
2044 
break_stmt(struct lp_build_nir_context * bld_base)2045 static void break_stmt(struct lp_build_nir_context *bld_base)
2046 {
2047    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2048 
2049    lp_exec_break(&bld->exec_mask, NULL, false);
2050 }
2051 
continue_stmt(struct lp_build_nir_context * bld_base)2052 static void continue_stmt(struct lp_build_nir_context *bld_base)
2053 {
2054    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2055    lp_exec_continue(&bld->exec_mask);
2056 }
2057 
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2058 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2059 {
2060    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2061    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2062    LLVMValueRef mask;
2063 
2064    if (!cond) {
2065       if (bld->exec_mask.has_mask) {
2066          mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2067       } else {
2068          mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
2069       }
2070    } else {
2071       mask = LLVMBuildNot(builder, cond, "");
2072       if (bld->exec_mask.has_mask) {
2073          LLVMValueRef invmask;
2074          invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2075          mask = LLVMBuildOr(builder, mask, invmask, "");
2076       }
2077    }
2078    lp_build_mask_update(bld->mask, mask);
2079 }
2080 
2081 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2082 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
2083                           LLVMValueRef ptr,
2084                           LLVMValueRef mask)
2085 {
2086    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2087    LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
2088 
2089    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2090 
2091    LLVMBuildStore(builder, current_vec, ptr);
2092 }
2093 
2094 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2095 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
2096                              LLVMValueRef ptr,
2097                              LLVMValueRef mask)
2098 {
2099    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2100    LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
2101 
2102    current_vec = lp_build_select(&bld_base->uint_bld,
2103                                  mask,
2104                                  bld_base->uint_bld.zero,
2105                                  current_vec);
2106 
2107    LLVMBuildStore(builder, current_vec, ptr);
2108 }
2109 
2110 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)2111 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
2112                                   LLVMValueRef current_mask_vec,
2113                                   LLVMValueRef total_emitted_vertices_vec)
2114 {
2115    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2116    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2117    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2118                                             total_emitted_vertices_vec,
2119                                             bld->max_output_vertices_vec);
2120 
2121    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2122 }
2123 
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)2124 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2125 {
2126    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2127    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2128 
2129    if (stream_id >= bld->gs_vertex_streams)
2130       return;
2131    assert(bld->gs_iface->emit_vertex);
2132    LLVMValueRef total_emitted_vertices_vec =
2133       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2134    LLVMValueRef mask = mask_vec(bld_base);
2135    mask = clamp_mask_to_max_output_vertices(bld, mask,
2136                                             total_emitted_vertices_vec);
2137    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
2138                               bld->outputs,
2139                               total_emitted_vertices_vec,
2140                               mask,
2141                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
2142 
2143    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2144                              mask);
2145    increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2146                              mask);
2147 }
2148 
2149 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2150 end_primitive_masked(struct lp_build_nir_context * bld_base,
2151                      LLVMValueRef mask, uint32_t stream_id)
2152 {
2153    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2154    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2155 
2156    if (stream_id >= bld->gs_vertex_streams)
2157       return;
2158    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2159    LLVMValueRef emitted_vertices_vec =
2160       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2161    LLVMValueRef emitted_prims_vec =
2162       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2163    LLVMValueRef total_emitted_vertices_vec =
2164       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2165 
2166    LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2167                                             PIPE_FUNC_NOTEQUAL,
2168                                             emitted_vertices_vec,
2169                                             uint_bld->zero);
2170    mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2171    bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2172 				total_emitted_vertices_vec,
2173 				emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2174    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2175                              mask);
2176    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2177                                 mask);
2178 }
2179 
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2180 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2181 {
2182    ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2183 
2184    assert(bld->gs_iface->end_primitive);
2185 
2186    LLVMValueRef mask = mask_vec(bld_base);
2187    end_primitive_masked(bld_base, mask, stream_id);
2188 }
2189 
2190 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2191 emit_prologue(struct lp_build_nir_soa_context *bld)
2192 {
2193    struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2194    if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2195       uint32_t num_inputs = bld->num_inputs;
2196       /* If this is an indirect case, the number of inputs should not be 0 */
2197       assert(num_inputs > 0);
2198 
2199       unsigned index, chan;
2200       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2201       LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2202       bld->inputs_array = lp_build_array_alloca(gallivm,
2203                                                vec_type, array_size,
2204                                                "input_array");
2205 
2206       for (index = 0; index < num_inputs; ++index) {
2207          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2208             LLVMValueRef lindex =
2209                lp_build_const_int32(gallivm, index * 4 + chan);
2210             LLVMValueRef input_ptr =
2211                LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2212             LLVMValueRef value = bld->inputs[index][chan];
2213             if (value)
2214                LLVMBuildStore(gallivm->builder, value, input_ptr);
2215          }
2216       }
2217    }
2218 }
2219 
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2220 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2221                       nir_intrinsic_instr *instr, LLVMValueRef result[4])
2222 {
2223    struct gallivm_state * gallivm = bld_base->base.gallivm;
2224    LLVMBuilderRef builder = gallivm->builder;
2225    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2226    LLVMValueRef exec_mask = mask_vec(bld_base);
2227    struct lp_build_loop_state loop_state;
2228    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2229 
2230    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2231    LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2232    LLVMValueRef init_val = NULL;
2233    if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2234        instr->intrinsic == nir_intrinsic_vote_feq) {
2235       /* for equal we unfortunately have to loop and find the first valid one. */
2236       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2237       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2238 
2239       struct lp_build_if_state ifthen;
2240       lp_build_if(&ifthen, gallivm, if_cond);
2241       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2242                                                        loop_state.counter, "");
2243       LLVMBuildStore(builder, value_ptr, eq_store);
2244       LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2245       lp_build_endif(&ifthen);
2246       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2247                              NULL, LLVMIntUGE);
2248       init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2249    } else {
2250       LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2251    }
2252 
2253    LLVMValueRef res;
2254    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2255    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2256                                                        loop_state.counter, "");
2257    struct lp_build_if_state ifthen;
2258    LLVMValueRef if_cond;
2259    if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2260 
2261    lp_build_if(&ifthen, gallivm, if_cond);
2262    res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2263 
2264    if (instr->intrinsic == nir_intrinsic_vote_feq) {
2265       struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2266       LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2267                                        LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2268                                        LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2269       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2270       res = LLVMBuildAnd(builder, res, tmp, "");
2271    } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2272       LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2273       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2274       res = LLVMBuildAnd(builder, res, tmp, "");
2275    } else if (instr->intrinsic == nir_intrinsic_vote_any)
2276       res = LLVMBuildOr(builder, res, value_ptr, "");
2277    else
2278       res = LLVMBuildAnd(builder, res, value_ptr, "");
2279    LLVMBuildStore(builder, res, res_store);
2280    lp_build_endif(&ifthen);
2281    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2282                           NULL, LLVMIntUGE);
2283    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2284                                          LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2285 }
2286 
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2287 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2288 {
2289    struct gallivm_state * gallivm = bld_base->base.gallivm;
2290    LLVMBuilderRef builder = gallivm->builder;
2291    LLVMValueRef exec_mask = mask_vec(bld_base);
2292    struct lp_build_loop_state loop_state;
2293    src = LLVMBuildAnd(builder, src, exec_mask, "");
2294    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2295    LLVMValueRef res;
2296    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2297    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2298                                                     loop_state.counter, "");
2299    res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2300    res = LLVMBuildOr(builder,
2301                      res,
2302                      LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2303    LLVMBuildStore(builder, res, res_store);
2304 
2305    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2306                           NULL, LLVMIntUGE);
2307    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2308                                          LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2309 }
2310 
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2311 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2312 {
2313    struct gallivm_state *gallivm = bld_base->base.gallivm;
2314    LLVMBuilderRef builder = gallivm->builder;
2315    LLVMValueRef exec_mask = mask_vec(bld_base);
2316    struct lp_build_loop_state loop_state;
2317 
2318    LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2319    LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2320    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2321    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2322                                                     loop_state.counter, "");
2323    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2324                                      LLVMIntEQ,
2325                                      value_ptr,
2326                                      lp_build_const_int32(gallivm, -1), "");
2327    LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2328                                       LLVMIntEQ,
2329                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2330                                       lp_build_const_int32(gallivm, 0), "");
2331 
2332    cond = LLVMBuildAnd(builder, cond, cond2, "");
2333    struct lp_build_if_state ifthen;
2334    lp_build_if(&ifthen, gallivm, cond);
2335    LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2336    LLVMBuildStore(builder, loop_state.counter, idx_store);
2337    lp_build_endif(&ifthen);
2338    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2339                           NULL, LLVMIntUGE);
2340 
2341    result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2342                                       lp_build_const_int32(gallivm, -1),
2343                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2344                                       "");
2345 }
2346 
2347 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2348 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2349                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2350 {
2351    assert(instr->intrinsic == nir_intrinsic_shuffle);
2352 
2353    struct gallivm_state *gallivm = bld_base->base.gallivm;
2354    LLVMBuilderRef builder = gallivm->builder;
2355    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2356    uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2357    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2358 
2359    if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2360       /* freeze `src` in case inactive invocations contain poison */
2361       src = LLVMBuildFreeze(builder, src, "");
2362       result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2363    } else {
2364       LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2365       struct lp_build_loop_state loop_state;
2366       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2367 
2368       LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2369 
2370       LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2371       /* freeze `src_value` in case an out-of-bounds index or an index into an
2372        * inactive invocation results in poison
2373        */
2374       src_value = LLVMBuildFreeze(builder, src_value, "");
2375 
2376       LLVMValueRef res = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2377       res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2378       LLVMBuildStore(builder, res, res_store);
2379 
2380       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2381                              NULL, LLVMIntUGE);
2382 
2383       result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2384    }
2385 }
2386 #endif
2387 
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2388 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2389                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2390 {
2391    struct gallivm_state *gallivm = bld_base->base.gallivm;
2392    LLVMBuilderRef builder = gallivm->builder;
2393    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2394    /* can't use llvm reduction intrinsics because of exec_mask */
2395    LLVMValueRef exec_mask = mask_vec(bld_base);
2396    struct lp_build_loop_state loop_state;
2397    nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2398 
2399    LLVMValueRef res_store = NULL;
2400    LLVMValueRef scan_store;
2401    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2402 
2403    if (instr->intrinsic != nir_intrinsic_reduce)
2404       res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2405 
2406    scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2407 
2408    struct lp_build_context elem_bld;
2409    bool is_flt = reduction_op == nir_op_fadd ||
2410       reduction_op == nir_op_fmul ||
2411       reduction_op == nir_op_fmin ||
2412       reduction_op == nir_op_fmax;
2413    bool is_unsigned = reduction_op == nir_op_umin ||
2414       reduction_op == nir_op_umax;
2415 
2416    struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2417       get_int_bld(bld_base, is_unsigned, bit_size);
2418 
2419    lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2420 
2421    LLVMValueRef store_val = NULL;
2422    /*
2423     * Put the identity value for the operation into the storage
2424     */
2425    switch (reduction_op) {
2426    case nir_op_fmin: {
2427       LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2428          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2429       store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2430       break;
2431    }
2432    case nir_op_fmax: {
2433       LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2434          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2435       store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2436       break;
2437    }
2438    case nir_op_fmul: {
2439       LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2440          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2441       store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2442       break;
2443    }
2444    case nir_op_umin:
2445       switch (bit_size) {
2446       case 8:
2447          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2448          break;
2449       case 16:
2450          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2451          break;
2452       case 32:
2453       default:
2454          store_val  = lp_build_const_int32(gallivm, UINT_MAX);
2455          break;
2456       case 64:
2457          store_val  = lp_build_const_int64(gallivm, UINT64_MAX);
2458          break;
2459       }
2460       break;
2461    case nir_op_imin:
2462       switch (bit_size) {
2463       case 8:
2464          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2465          break;
2466       case 16:
2467          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2468          break;
2469       case 32:
2470       default:
2471          store_val  = lp_build_const_int32(gallivm, INT_MAX);
2472          break;
2473       case 64:
2474          store_val  = lp_build_const_int64(gallivm, INT64_MAX);
2475          break;
2476       }
2477       break;
2478    case nir_op_imax:
2479       switch (bit_size) {
2480       case 8:
2481          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2482          break;
2483       case 16:
2484          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2485          break;
2486       case 32:
2487       default:
2488          store_val  = lp_build_const_int32(gallivm, INT_MIN);
2489          break;
2490       case 64:
2491          store_val  = lp_build_const_int64(gallivm, INT64_MIN);
2492          break;
2493       }
2494       break;
2495    case nir_op_imul:
2496       switch (bit_size) {
2497       case 8:
2498          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2499          break;
2500       case 16:
2501          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2502          break;
2503       case 32:
2504       default:
2505          store_val  = lp_build_const_int32(gallivm, 1);
2506          break;
2507       case 64:
2508          store_val  = lp_build_const_int64(gallivm, 1);
2509          break;
2510       }
2511       break;
2512    case nir_op_iand:
2513       switch (bit_size) {
2514       case 8:
2515          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2516          break;
2517       case 16:
2518          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2519          break;
2520       case 32:
2521       default:
2522          store_val  = lp_build_const_int32(gallivm, 0xffffffff);
2523          break;
2524       case 64:
2525          store_val  = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2526          break;
2527       }
2528       break;
2529    default:
2530       break;
2531    }
2532    if (store_val)
2533       LLVMBuildStore(builder, store_val, scan_store);
2534 
2535    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2536 
2537    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2538 
2539    struct lp_build_if_state ifthen;
2540    LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2541    lp_build_if(&ifthen, gallivm, if_cond);
2542    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2543 
2544    LLVMValueRef res = NULL;
2545    LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2546    if (instr->intrinsic != nir_intrinsic_reduce)
2547       res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2548 
2549    if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2550       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2551 
2552    if (is_flt) {
2553       scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2554       value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2555    }
2556    switch (reduction_op) {
2557    case nir_op_fadd:
2558    case nir_op_iadd:
2559       scan_val = lp_build_add(&elem_bld, value, scan_val);
2560       break;
2561    case nir_op_fmul:
2562    case nir_op_imul:
2563       scan_val = lp_build_mul(&elem_bld, value, scan_val);
2564       break;
2565    case nir_op_imin:
2566    case nir_op_umin:
2567    case nir_op_fmin:
2568       scan_val = lp_build_min(&elem_bld, value, scan_val);
2569       break;
2570    case nir_op_imax:
2571    case nir_op_umax:
2572    case nir_op_fmax:
2573       scan_val = lp_build_max(&elem_bld, value, scan_val);
2574       break;
2575    case nir_op_iand:
2576       scan_val = lp_build_and(&elem_bld, value, scan_val);
2577       break;
2578    case nir_op_ior:
2579       scan_val = lp_build_or(&elem_bld, value, scan_val);
2580       break;
2581    case nir_op_ixor:
2582       scan_val = lp_build_xor(&elem_bld, value, scan_val);
2583       break;
2584    default:
2585       assert(0);
2586       break;
2587    }
2588    if (is_flt)
2589       scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2590    LLVMBuildStore(builder, scan_val, scan_store);
2591 
2592    if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2593       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2594    }
2595 
2596    if (instr->intrinsic != nir_intrinsic_reduce)
2597       LLVMBuildStore(builder, res, res_store);
2598    lp_build_endif(&ifthen);
2599 
2600    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2601                           NULL, LLVMIntUGE);
2602    if (instr->intrinsic == nir_intrinsic_reduce)
2603       result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad2(builder, int_bld->elem_type, scan_store, ""));
2604    else
2605       result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2606 }
2607 
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2608 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2609                                  LLVMValueRef src,
2610                                  unsigned bit_size,
2611                                  LLVMValueRef invoc,
2612                                  LLVMValueRef result[4])
2613 {
2614    struct gallivm_state *gallivm = bld_base->base.gallivm;
2615    LLVMValueRef idx = first_active_invocation(bld_base);
2616    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2617 
2618    /* If we're emitting readInvocation() (as opposed to readFirstInvocation),
2619     * use the first active channel to pull the invocation index number out of
2620     * the invocation arg.
2621     */
2622    if (invoc)
2623       idx = LLVMBuildExtractElement(gallivm->builder, invoc, idx, "");
2624 
2625    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2626                                                 src, idx, "");
2627    result[0] = lp_build_broadcast_scalar(uint_bld, value);
2628 }
2629 
2630 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2631 emit_interp_at(struct lp_build_nir_context *bld_base,
2632                unsigned num_components,
2633                nir_variable *var,
2634                bool centroid,
2635                bool sample,
2636                unsigned const_index,
2637                LLVMValueRef indir_index,
2638                LLVMValueRef offsets[2],
2639                LLVMValueRef dst[4])
2640 {
2641    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2642 
2643    for (unsigned i = 0; i < num_components; i++) {
2644       dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2645                                         const_index + var->data.driver_location, i + var->data.location_frac,
2646                                         centroid, sample, indir_index, offsets);
2647    }
2648 }
2649 
2650 static void
emit_set_vertex_and_primitive_count(struct lp_build_nir_context * bld_base,LLVMValueRef vert_count,LLVMValueRef prim_count)2651 emit_set_vertex_and_primitive_count(struct lp_build_nir_context *bld_base,
2652                                     LLVMValueRef vert_count,
2653                                     LLVMValueRef prim_count)
2654 {
2655    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2656    struct gallivm_state *gallivm = bld_base->base.gallivm;
2657    assert(bld->mesh_iface);
2658    LLVMValueRef idx = first_active_invocation(bld_base);
2659 
2660    LLVMValueRef vcount = LLVMBuildExtractElement(gallivm->builder,
2661                                                  vert_count, idx, "");
2662    LLVMValueRef pcount = LLVMBuildExtractElement(gallivm->builder,
2663                                                  prim_count, idx, "");
2664 
2665    bld->mesh_iface->emit_vertex_and_primitive_count(bld->mesh_iface, &bld_base->base, vcount, pcount);
2666 }
2667 
2668 static void
emit_launch_mesh_workgroups(struct lp_build_nir_context * bld_base,LLVMValueRef launch_grid)2669 emit_launch_mesh_workgroups(struct lp_build_nir_context *bld_base,
2670                             LLVMValueRef launch_grid)
2671 {
2672    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2673    struct gallivm_state *gallivm = bld_base->base.gallivm;
2674    LLVMTypeRef vec_type = LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), 3);
2675 
2676    LLVMValueRef local_invoc_idx = get_local_invocation_index(bld);
2677 
2678    vec_type = LLVMPointerType(vec_type, 0);
2679 
2680    local_invoc_idx = LLVMBuildExtractElement(gallivm->builder, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2681    LLVMValueRef if_cond = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2682    struct lp_build_if_state ifthen;
2683    lp_build_if(&ifthen, gallivm, if_cond);
2684    LLVMValueRef ptr = bld->payload_ptr;
2685    ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
2686    for (unsigned i = 0; i < 3; i++) {
2687       LLVMValueRef lg = LLVMBuildExtractValue(gallivm->builder, launch_grid, i, "");
2688       lg = LLVMBuildExtractElement(gallivm->builder, lg, lp_build_const_int32(gallivm, 0), "");
2689       LLVMValueRef this_ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
2690       LLVMBuildStore(gallivm->builder, lg, this_ptr);
2691       ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 4), "");
2692    }
2693    lp_build_endif(&ifthen);
2694 }
2695 
2696 static void
emit_call(struct lp_build_nir_context * bld_base,struct lp_build_fn * fn,int num_args,LLVMValueRef * args)2697 emit_call(struct lp_build_nir_context *bld_base,
2698           struct lp_build_fn *fn,
2699           int num_args,
2700           LLVMValueRef *args)
2701 {
2702    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2703 
2704    args[0] = mask_vec(bld_base);
2705    args[1] = bld->call_context_ptr;
2706    LLVMBuildCall2(bld_base->base.gallivm->builder, fn->fn_type, fn->fn, args, num_args, "");
2707 }
2708 
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2709 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2710                                                struct lp_type type,
2711                                                unsigned scratch_size)
2712 {
2713    LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2714    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2715    unsigned i;
2716 
2717    if (type.length == 1)
2718       return LLVMConstInt(elem_type, 0, 0);
2719 
2720    for (i = 0; i < type.length; ++i)
2721       elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2722 
2723    return LLVMConstVector(elems, type.length);
2724 }
2725 
2726 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2727 emit_load_scratch(struct lp_build_nir_context *bld_base,
2728                   unsigned nc, unsigned bit_size,
2729                   LLVMValueRef offset,
2730                   LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2731 {
2732    struct gallivm_state * gallivm = bld_base->base.gallivm;
2733    LLVMBuilderRef builder = gallivm->builder;
2734    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2735    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2736    struct lp_build_context *load_bld;
2737    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2738    LLVMValueRef exec_mask = mask_vec(bld_base);
2739    LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2740                                                      LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2741                                                      bld->scratch_ptr);
2742    load_bld = get_int_bld(bld_base, true, bit_size);
2743 
2744    offset = lp_build_add(uint_bld, offset, thread_offsets);
2745 
2746    for (unsigned c = 0; c < nc; c++) {
2747       LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2748 
2749       outval[c] = lp_build_masked_gather(gallivm, load_bld->type.length, bit_size,
2750                                          load_bld->vec_type,
2751                                          lp_vec_add_offset_ptr(bld_base, bit_size,
2752                                                                scratch_ptr_vec,
2753                                                                chan_offset),
2754                                          exec_mask);
2755       outval[c] = LLVMBuildBitCast(builder, outval[c], load_bld->vec_type, "");
2756    }
2757 }
2758 
2759 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2760 emit_store_scratch(struct lp_build_nir_context *bld_base,
2761                    unsigned writemask, unsigned nc,
2762                    unsigned bit_size, LLVMValueRef offset,
2763                    LLVMValueRef dst)
2764 {
2765    struct gallivm_state * gallivm = bld_base->base.gallivm;
2766    LLVMBuilderRef builder = gallivm->builder;
2767    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2768    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2769    struct lp_build_context *store_bld;
2770    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2771    LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2772                                                      LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2773                                                      bld->scratch_ptr);
2774    store_bld = get_int_bld(bld_base, true, bit_size);
2775 
2776    LLVMValueRef exec_mask = mask_vec(bld_base);
2777    offset = lp_build_add(uint_bld, offset, thread_offsets);
2778 
2779    for (unsigned c = 0; c < nc; c++) {
2780       if (!(writemask & (1u << c)))
2781          continue;
2782       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2783 
2784       LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2785 
2786       val = LLVMBuildBitCast(builder, val, store_bld->vec_type, "");
2787 
2788       lp_build_masked_scatter(gallivm, store_bld->type.length, bit_size,
2789                               lp_vec_add_offset_ptr(bld_base, bit_size,
2790                                                     scratch_ptr_vec, chan_offset),
2791                               val, exec_mask);
2792    }
2793 }
2794 
2795 static void
emit_clock(struct lp_build_nir_context * bld_base,LLVMValueRef dst[4])2796 emit_clock(struct lp_build_nir_context *bld_base,
2797            LLVMValueRef dst[4])
2798 {
2799    struct gallivm_state *gallivm = bld_base->base.gallivm;
2800    LLVMBuilderRef builder = gallivm->builder;
2801    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, 32);
2802 
2803    lp_init_clock_hook(gallivm);
2804 
2805    LLVMTypeRef get_time_type = LLVMFunctionType(LLVMInt64TypeInContext(gallivm->context), NULL, 0, 1);
2806    LLVMValueRef result = LLVMBuildCall2(builder, get_time_type, gallivm->get_time_hook, NULL, 0, "");
2807 
2808    LLVMValueRef hi = LLVMBuildShl(builder, result, lp_build_const_int64(gallivm, 32), "");
2809    hi = LLVMBuildTrunc(builder, hi, uint_bld->elem_type, "");
2810    LLVMValueRef lo = LLVMBuildTrunc(builder, result, uint_bld->elem_type, "");
2811    dst[0] = lp_build_broadcast_scalar(uint_bld, lo);
2812    dst[1] = lp_build_broadcast_scalar(uint_bld, hi);
2813 }
2814 
2815 LLVMTypeRef
lp_build_cs_func_call_context(struct gallivm_state * gallivm,int length,LLVMTypeRef context_type,LLVMTypeRef resources_type)2816 lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length,
2817                               LLVMTypeRef context_type, LLVMTypeRef resources_type)
2818 {
2819    LLVMTypeRef args[LP_NIR_CALL_CONTEXT_MAX_ARGS];
2820 
2821    args[LP_NIR_CALL_CONTEXT_CONTEXT] = LLVMPointerType(context_type, 0);
2822    args[LP_NIR_CALL_CONTEXT_RESOURCES] = LLVMPointerType(resources_type, 0);
2823    args[LP_NIR_CALL_CONTEXT_SHARED] = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); /* shared_ptr */
2824    args[LP_NIR_CALL_CONTEXT_SCRATCH] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* scratch ptr */
2825    args[LP_NIR_CALL_CONTEXT_WORK_DIM] = LLVMInt32TypeInContext(gallivm->context); /* work_dim */
2826    args[LP_NIR_CALL_CONTEXT_THREAD_ID_0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[0] */
2827    args[LP_NIR_CALL_CONTEXT_THREAD_ID_1] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[1] */
2828    args[LP_NIR_CALL_CONTEXT_THREAD_ID_2] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[2] */
2829    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[0] */
2830    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[1] */
2831    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[2] */
2832 
2833    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[0] */
2834    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[1] */
2835    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[2] */
2836    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[0] */
2837    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[1] */
2838    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[2] */
2839 
2840    LLVMTypeRef stype = LLVMStructTypeInContext(gallivm->context, args, LP_NIR_CALL_CONTEXT_MAX_ARGS, 0);
2841    return stype;
2842 }
2843 
2844 static void
build_call_context(struct lp_build_nir_soa_context * bld)2845 build_call_context(struct lp_build_nir_soa_context *bld)
2846 {
2847    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2848    bld->call_context_ptr = lp_build_alloca(gallivm, bld->call_context_type, "callcontext");
2849    LLVMValueRef call_context = LLVMGetUndef(bld->call_context_type);
2850    call_context = LLVMBuildInsertValue(gallivm->builder,
2851                                        call_context, bld->context_ptr, LP_NIR_CALL_CONTEXT_CONTEXT, "");
2852    call_context = LLVMBuildInsertValue(gallivm->builder,
2853                                        call_context, bld->resources_ptr, LP_NIR_CALL_CONTEXT_RESOURCES, "");
2854    if (bld->shared_ptr) {
2855       call_context = LLVMBuildInsertValue(gallivm->builder,
2856                                           call_context, bld->shared_ptr, LP_NIR_CALL_CONTEXT_SHARED, "");
2857    } else {
2858       call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2859                                           LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2860                                           LP_NIR_CALL_CONTEXT_SHARED, "");
2861    }
2862    if (bld->scratch_ptr) {
2863       call_context = LLVMBuildInsertValue(gallivm->builder,
2864                                           call_context, bld->scratch_ptr, LP_NIR_CALL_CONTEXT_SCRATCH, "");
2865    } else {
2866       call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2867                                           LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2868                                           LP_NIR_CALL_CONTEXT_SCRATCH, "");
2869    }
2870    call_context = LLVMBuildInsertValue(gallivm->builder,
2871                                        call_context, bld->system_values.work_dim, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
2872    call_context = LLVMBuildInsertValue(gallivm->builder,
2873                                        call_context, bld->system_values.thread_id[0], LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
2874    call_context = LLVMBuildInsertValue(gallivm->builder,
2875                                        call_context, bld->system_values.thread_id[1], LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
2876    call_context = LLVMBuildInsertValue(gallivm->builder,
2877                                        call_context, bld->system_values.thread_id[2], LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
2878    call_context = LLVMBuildInsertValue(gallivm->builder,
2879                                        call_context, bld->system_values.block_id[0], LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
2880    call_context = LLVMBuildInsertValue(gallivm->builder,
2881                                        call_context, bld->system_values.block_id[1], LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
2882    call_context = LLVMBuildInsertValue(gallivm->builder,
2883                                        call_context, bld->system_values.block_id[2], LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
2884    call_context = LLVMBuildInsertValue(gallivm->builder,
2885                                        call_context, bld->system_values.grid_size[0], LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
2886    call_context = LLVMBuildInsertValue(gallivm->builder,
2887                                        call_context, bld->system_values.grid_size[1], LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
2888    call_context = LLVMBuildInsertValue(gallivm->builder,
2889                                        call_context, bld->system_values.grid_size[2], LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
2890    call_context = LLVMBuildInsertValue(gallivm->builder,
2891                                        call_context, bld->system_values.block_size[0], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
2892    call_context = LLVMBuildInsertValue(gallivm->builder,
2893                                        call_context, bld->system_values.block_size[1], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
2894    call_context = LLVMBuildInsertValue(gallivm->builder,
2895                                        call_context, bld->system_values.block_size[2], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
2896    LLVMBuildStore(gallivm->builder, call_context, bld->call_context_ptr);
2897 }
2898 
lp_build_nir_soa_func(struct gallivm_state * gallivm,struct nir_shader * shader,nir_function_impl * impl,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2899 void lp_build_nir_soa_func(struct gallivm_state *gallivm,
2900                            struct nir_shader *shader,
2901                            nir_function_impl *impl,
2902                            const struct lp_build_tgsi_params *params,
2903                            LLVMValueRef (*outputs)[4])
2904 {
2905    struct lp_build_nir_soa_context bld;
2906    const struct lp_type type = params->type;
2907    struct lp_type res_type;
2908 
2909    assert(type.length <= LP_MAX_VECTOR_LENGTH);
2910    memset(&res_type, 0, sizeof res_type);
2911    res_type.width = type.width;
2912    res_type.length = type.length;
2913    res_type.sign = 1;
2914 
2915    /* Setup build context */
2916    memset(&bld, 0, sizeof bld);
2917    lp_build_context_init(&bld.bld_base.base, gallivm, type);
2918    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2919    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2920    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2921    lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2922    {
2923       struct lp_type dbl_type;
2924       dbl_type = type;
2925       dbl_type.width *= 2;
2926       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2927    }
2928    {
2929       struct lp_type half_type;
2930       half_type = type;
2931       half_type.width /= 2;
2932       lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2933    }
2934    {
2935       struct lp_type uint64_type;
2936       uint64_type = lp_uint_type(type);
2937       uint64_type.width *= 2;
2938       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2939    }
2940    {
2941       struct lp_type int64_type;
2942       int64_type = lp_int_type(type);
2943       int64_type.width *= 2;
2944       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2945    }
2946    {
2947       struct lp_type uint16_type;
2948       uint16_type = lp_uint_type(type);
2949       uint16_type.width /= 2;
2950       lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2951    }
2952    {
2953       struct lp_type int16_type;
2954       int16_type = lp_int_type(type);
2955       int16_type.width /= 2;
2956       lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2957    }
2958    {
2959       struct lp_type uint8_type;
2960       uint8_type = lp_uint_type(type);
2961       uint8_type.width /= 4;
2962       lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2963    }
2964    {
2965       struct lp_type int8_type;
2966       int8_type = lp_int_type(type);
2967       int8_type.width /= 4;
2968       lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2969    }
2970    bld.bld_base.load_var = emit_load_var;
2971    bld.bld_base.store_var = emit_store_var;
2972    bld.bld_base.load_reg = emit_load_reg;
2973    bld.bld_base.store_reg = emit_store_reg;
2974    bld.bld_base.emit_var_decl = emit_var_decl;
2975    bld.bld_base.load_ubo = emit_load_ubo;
2976    bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2977    bld.bld_base.load_global = emit_load_global;
2978    bld.bld_base.store_global = emit_store_global;
2979    bld.bld_base.atomic_global = emit_atomic_global;
2980    bld.bld_base.tex = emit_tex;
2981    bld.bld_base.tex_size = emit_tex_size;
2982    bld.bld_base.bgnloop = bgnloop;
2983    bld.bld_base.endloop = endloop;
2984    bld.bld_base.if_cond = if_cond;
2985    bld.bld_base.else_stmt = else_stmt;
2986    bld.bld_base.endif_stmt = endif_stmt;
2987    bld.bld_base.break_stmt = break_stmt;
2988    bld.bld_base.continue_stmt = continue_stmt;
2989    bld.bld_base.sysval_intrin = emit_sysval_intrin;
2990    bld.bld_base.discard = discard;
2991    bld.bld_base.emit_vertex = emit_vertex;
2992    bld.bld_base.end_primitive = end_primitive;
2993    bld.bld_base.load_mem = emit_load_mem;
2994    bld.bld_base.store_mem = emit_store_mem;
2995    bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2996    bld.bld_base.atomic_mem = emit_atomic_mem;
2997    bld.bld_base.barrier = emit_barrier;
2998    bld.bld_base.image_op = emit_image_op;
2999    bld.bld_base.image_size = emit_image_size;
3000    bld.bld_base.vote = emit_vote;
3001    bld.bld_base.elect = emit_elect;
3002    bld.bld_base.reduce = emit_reduce;
3003    bld.bld_base.ballot = emit_ballot;
3004 #if LLVM_VERSION_MAJOR >= 10
3005    bld.bld_base.shuffle = emit_shuffle;
3006 #endif
3007    bld.bld_base.read_invocation = emit_read_invocation;
3008    bld.bld_base.helper_invocation = emit_helper_invocation;
3009    bld.bld_base.interp_at = emit_interp_at;
3010    bld.bld_base.call = emit_call;
3011    bld.bld_base.load_scratch = emit_load_scratch;
3012    bld.bld_base.store_scratch = emit_store_scratch;
3013    bld.bld_base.load_const = emit_load_const;
3014    bld.bld_base.clock = emit_clock;
3015    bld.bld_base.set_vertex_and_primitive_count = emit_set_vertex_and_primitive_count;
3016    bld.bld_base.launch_mesh_workgroups = emit_launch_mesh_workgroups;
3017 
3018    bld.bld_base.fns = params->fns;
3019    bld.bld_base.func = params->current_func;
3020    bld.mask = params->mask;
3021    bld.inputs = params->inputs;
3022    bld.outputs = outputs;
3023    bld.consts_ptr = params->consts_ptr;
3024    bld.ssbo_ptr = params->ssbo_ptr;
3025    bld.sampler = params->sampler;
3026 
3027    bld.context_type = params->context_type;
3028    bld.context_ptr = params->context_ptr;
3029    bld.resources_type = params->resources_type;
3030    bld.resources_ptr = params->resources_ptr;
3031    bld.thread_data_type = params->thread_data_type;
3032    bld.thread_data_ptr = params->thread_data_ptr;
3033    bld.bld_base.aniso_filter_table = params->aniso_filter_table;
3034    bld.image = params->image;
3035    bld.shared_ptr = params->shared_ptr;
3036    bld.payload_ptr = params->payload_ptr;
3037    bld.coro = params->coro;
3038    bld.kernel_args_ptr = params->kernel_args;
3039    bld.num_inputs = params->num_inputs;
3040    bld.indirects = 0;
3041    if (shader->info.inputs_read_indirectly)
3042       bld.indirects |= nir_var_shader_in;
3043 
3044    bld.gs_iface = params->gs_iface;
3045    bld.tcs_iface = params->tcs_iface;
3046    bld.tes_iface = params->tes_iface;
3047    bld.fs_iface = params->fs_iface;
3048    bld.mesh_iface = params->mesh_iface;
3049    if (bld.gs_iface) {
3050       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
3051 
3052       bld.gs_vertex_streams = params->gs_vertex_streams;
3053       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3054                                                            shader->info.gs.vertices_out);
3055       for (int i = 0; i < params->gs_vertex_streams; i++) {
3056          bld.emitted_prims_vec_ptr[i] =
3057             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
3058          bld.emitted_vertices_vec_ptr[i] =
3059             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
3060          bld.total_emitted_vertices_vec_ptr[i] =
3061             lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
3062       }
3063    }
3064    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3065 
3066    if (params->system_values)
3067       bld.system_values = *params->system_values;
3068 
3069    bld.bld_base.shader = shader;
3070 
3071    bld.scratch_size = ALIGN(shader->scratch_size, 8);
3072    if (params->scratch_ptr)
3073       bld.scratch_ptr = params->scratch_ptr;
3074    else if (shader->scratch_size) {
3075       bld.scratch_ptr = lp_build_array_alloca(gallivm,
3076                                               LLVMInt8TypeInContext(gallivm->context),
3077                                               lp_build_const_int32(gallivm, bld.scratch_size * type.length),
3078                                               "scratch");
3079    }
3080 
3081    if (shader->info.stage == MESA_SHADER_KERNEL) {
3082       bld.call_context_type = lp_build_cs_func_call_context(gallivm, type.length, bld.context_type, bld.resources_type);
3083       if (!params->call_context_ptr) {
3084          build_call_context(&bld);
3085       } else
3086          bld.call_context_ptr = params->call_context_ptr;
3087    }
3088 
3089    emit_prologue(&bld);
3090    lp_build_nir_llvm(&bld.bld_base, shader, impl);
3091 
3092    if (bld.gs_iface) {
3093       LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
3094       LLVMValueRef total_emitted_vertices_vec;
3095       LLVMValueRef emitted_prims_vec;
3096 
3097       for (int i = 0; i < params->gs_vertex_streams; i++) {
3098          end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
3099 
3100          total_emitted_vertices_vec =
3101             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
3102 
3103          emitted_prims_vec =
3104             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
3105          bld.gs_iface->gs_epilogue(bld.gs_iface,
3106                                    total_emitted_vertices_vec,
3107                                    emitted_prims_vec, i);
3108       }
3109    }
3110    lp_exec_mask_fini(&bld.exec_mask);
3111 }
3112 
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3113 void lp_build_nir_soa(struct gallivm_state *gallivm,
3114                       struct nir_shader *shader,
3115                       const struct lp_build_tgsi_params *params,
3116                       LLVMValueRef (*outputs)[4])
3117 {
3118    lp_build_nir_prepasses(shader);
3119    lp_build_nir_soa_func(gallivm, shader,
3120                          nir_shader_get_entrypoint(shader),
3121                          params, outputs);
3122 }
3123