• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_jit_types.h"
34 #include "lp_bld_arit.h"
35 #include "lp_bld_bitarit.h"
36 #include "lp_bld_coro.h"
37 #include "lp_bld_printf.h"
38 #include "lp_bld_intr.h"
39 #include "util/u_cpu_detect.h"
40 #include "util/u_math.h"
41 
bit_size_to_shift_size(int bit_size)42 static int bit_size_to_shift_size(int bit_size)
43 {
44    switch (bit_size) {
45    case 64:
46       return 3;
47    default:
48    case 32:
49       return 2;
50    case 16:
51       return 1;
52    case 8:
53       return 0;
54    }
55 }
56 
57 /*
58  * Combine the global mask if there is one with the current execution mask.
59  */
60 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)61 mask_vec(struct lp_build_nir_context *bld_base)
62 {
63    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
64 
65    struct lp_exec_mask *exec_mask = &bld->exec_mask;
66    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
67    if (!exec_mask->has_mask)
68       return bld_mask;
69    if (!bld_mask)
70       return exec_mask->exec_mask;
71 
72    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
73    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
74                        exec_mask->exec_mask, "");
75 }
76 
77 /*
78  * Use the execution mask if there is one, otherwise don't mask (ignore global mask).
79  * This allows helper invocations to run, which are necessary for correct derivatives.
80  */
81 static LLVMValueRef
mask_vec_with_helpers(struct lp_build_nir_context * bld_base)82 mask_vec_with_helpers(struct lp_build_nir_context *bld_base)
83 {
84    if (bld_base->shader->info.stage != MESA_SHADER_FRAGMENT)
85       return mask_vec(bld_base); /* No helper invocations needed. */
86 
87    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
88 
89    struct lp_exec_mask *exec_mask = &bld->exec_mask;
90    if (exec_mask->has_mask)
91       return exec_mask->exec_mask;
92 
93    return lp_build_const_int_vec(bld_base->base.gallivm,
94                                  bld_base->uint_bld.type, -1);
95 }
96 
97 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)98 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
99 {
100    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
101 
102    /* Fragment shaders may dispatch with invocation 0 inactive.  All other
103     * stages have invocation 0 active at the top.  (See
104     * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
105     */
106    if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
107       return false;
108 
109    /* If we're in some control flow right now, then invocation 0 may be
110     * disabled.
111     */
112    if (bld->exec_mask.has_mask)
113       return false;
114 
115    return true;
116 }
117 
118 /**
119  * Returns a scalar value of the first active invocation in the exec_mask.
120  *
121  * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else
122  * branches thare are all false, or portions of a loop after a break/continue
123  * has ended the last invocation that had been active in the loop).  In that
124  * case, we return a 0 value so that unconditional LLVMBuildExtractElement of
125  * the first_active_invocation (such as in memory loads, texture unit index
126  * lookups, etc) will use a valid index
127  */
first_active_invocation(struct lp_build_nir_context * bld_base)128 static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base)
129 {
130    struct gallivm_state *gallivm = bld_base->base.gallivm;
131    LLVMBuilderRef builder = gallivm->builder;
132    struct lp_build_context *uint_bld = &bld_base->uint_bld;
133 
134    if (invocation_0_must_be_active(bld_base))
135       return lp_build_const_int32(gallivm, 0);
136 
137    LLVMValueRef exec_mask = mask_vec(bld_base);
138 
139    LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "exec_bitvec");
140    /* Turn it from N x i1 to iN, then extend it up to i32 so we can use a single
141     * cttz intrinsic -- I assume the compiler will drop the extend if there are
142     * smaller instructions available, since we have is_zero_poison.
143     */
144    bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask");
145    bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
146 
147    LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
148 
149    LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
150                                                          LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false));
151 
152    return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0");
153 }
154 
155 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size,bool is_float)156 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float)
157 {
158    if (bit_size == 64)
159       return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
160    else if (bit_size == 16)
161       return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
162    else if (bit_size == 8)
163       return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
164    else
165       return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0);
166 }
167 
168 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)169 emit_fetch_64bit(
170    struct lp_build_nir_context * bld_base,
171    LLVMValueRef input,
172    LLVMValueRef input2)
173 {
174    struct gallivm_state *gallivm = bld_base->base.gallivm;
175    LLVMBuilderRef builder = gallivm->builder;
176    LLVMValueRef res;
177    int i;
178    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
179    int len = bld_base->base.type.length * 2;
180    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
181 
182    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
183 #if UTIL_ARCH_LITTLE_ENDIAN
184       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
185       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
186 #else
187       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
188       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
189 #endif
190    }
191    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
192 
193    return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
194 }
195 
196 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])197 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
198                        LLVMValueRef value,
199                        LLVMValueRef split_values[2])
200 {
201    struct gallivm_state *gallivm = bld_base->base.gallivm;
202    LLVMBuilderRef builder = gallivm->builder;
203    unsigned i;
204    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
205    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
206    int len = bld_base->base.type.length * 2;
207 
208    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
209    for (i = 0; i < bld_base->base.type.length; i++) {
210 #if UTIL_ARCH_LITTLE_ENDIAN
211       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
212       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
213 #else
214       shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
215       shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
216 #endif
217    }
218 
219    split_values[0] = LLVMBuildShuffleVector(builder, value,
220                                  LLVMGetUndef(LLVMTypeOf(value)),
221                                  LLVMConstVector(shuffles,
222                                                  bld_base->base.type.length),
223                                  "");
224    split_values[1] = LLVMBuildShuffleVector(builder, value,
225                                   LLVMGetUndef(LLVMTypeOf(value)),
226                                   LLVMConstVector(shuffles2,
227                                                   bld_base->base.type.length),
228                                   "");
229 }
230 
231 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)232 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
233                       LLVMValueRef chan_ptr,
234                       LLVMValueRef chan_ptr2,
235                       LLVMValueRef value)
236 {
237    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
238    struct lp_build_context *float_bld = &bld_base->base;
239    LLVMValueRef split_vals[2];
240 
241    emit_store_64bit_split(bld_base, value, split_vals);
242 
243    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
244    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
245 }
246 
247 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)248 get_soa_array_offsets(struct lp_build_context *uint_bld,
249                       LLVMValueRef indirect_index,
250                       int num_components,
251                       unsigned chan_index,
252                       bool need_perelement_offset)
253 {
254    struct gallivm_state *gallivm = uint_bld->gallivm;
255    LLVMValueRef chan_vec =
256       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
257    LLVMValueRef length_vec =
258       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
259    LLVMValueRef index_vec;
260 
261    /* index_vec = (indirect_index * num_components + chan_index) * length + offsets */
262    index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
263    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
264    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
265 
266    if (need_perelement_offset) {
267       LLVMValueRef pixel_offsets;
268       unsigned i;
269      /* build pixel offset vector: {0, 1, 2, 3, ...} */
270       pixel_offsets = uint_bld->undef;
271       for (i = 0; i < uint_bld->type.length; i++) {
272          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
273          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
274                                                 ii, ii, "");
275       }
276       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
277    }
278    return index_vec;
279 }
280 
281 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMTypeRef base_type,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)282 build_gather(struct lp_build_nir_context *bld_base,
283              struct lp_build_context *bld,
284              LLVMTypeRef base_type,
285              LLVMValueRef base_ptr,
286              LLVMValueRef indexes,
287              LLVMValueRef overflow_mask,
288              LLVMValueRef indexes2)
289 {
290    struct gallivm_state *gallivm = bld_base->base.gallivm;
291    LLVMBuilderRef builder = gallivm->builder;
292    struct lp_build_context *uint_bld = &bld_base->uint_bld;
293    LLVMValueRef res;
294    unsigned i;
295 
296    if (indexes2)
297       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
298    else
299       res = bld->undef;
300    /*
301     * overflow_mask is a vector telling us which channels
302     * in the vector overflowed. We use the overflow behavior for
303     * constant buffers which is defined as:
304     * Out of bounds access to constant buffer returns 0 in all
305     * components. Out of bounds behavior is always with respect
306     * to the size of the buffer bound at that slot.
307     */
308 
309    if (overflow_mask) {
310       /*
311        * We avoid per-element control flow here (also due to llvm going crazy,
312        * though I suspect it's better anyway since overflow is likely rare).
313        * Note that since we still fetch from buffers even if num_elements was
314        * zero (in this case we'll fetch from index zero) the jit func callers
315        * MUST provide valid fake constant buffers of size 4x32 (the values do
316        * not matter), otherwise we'd still need (not per element though)
317        * control flow.
318        */
319       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
320       if (indexes2)
321          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
322    }
323 
324    /*
325     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
326     */
327    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
328       LLVMValueRef si, di;
329       LLVMValueRef index;
330       LLVMValueRef scalar_ptr, scalar;
331 
332       di = lp_build_const_int32(gallivm, i);
333       if (indexes2)
334          si = lp_build_const_int32(gallivm, i >> 1);
335       else
336          si = di;
337 
338       if (indexes2 && (i & 1)) {
339          index = LLVMBuildExtractElement(builder,
340                                          indexes2, si, "");
341       } else {
342          index = LLVMBuildExtractElement(builder,
343                                          indexes, si, "");
344       }
345 
346       scalar_ptr = LLVMBuildGEP2(builder, base_type, base_ptr, &index, 1, "gather_ptr");
347       scalar = LLVMBuildLoad2(builder, base_type, scalar_ptr, "");
348 
349       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
350    }
351 
352    if (overflow_mask) {
353       if (indexes2) {
354          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
355          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
356                                        bld_base->dbl_bld.int_vec_type, "");
357          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
358                                bld_base->dbl_bld.zero, res);
359       } else
360          res = lp_build_select(bld, overflow_mask, bld->zero, res);
361    }
362 
363    return res;
364 }
365 
366 /**
367  * Scatter/store vector.
368  */
369 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)370 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
371                   LLVMValueRef base_ptr,
372                   LLVMValueRef indexes,
373                   LLVMValueRef values,
374                   struct lp_exec_mask *mask)
375 {
376    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
377    LLVMBuilderRef builder = gallivm->builder;
378    unsigned i;
379    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
380 
381    /*
382     * Loop over elements of index_vec, store scalar value.
383     */
384    for (i = 0; i < bld->bld_base.base.type.length; i++) {
385       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
386       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
387       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
388       LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(val), base_ptr, &index, 1, "scatter_ptr");
389       LLVMValueRef scalar_pred = pred ?
390          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
391 
392       if (0)
393          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
394                          ii, val, index, scalar_ptr);
395 
396       if (scalar_pred) {
397          LLVMValueRef real_val, dst_val;
398          dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
399          scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
400          real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
401          LLVMBuildStore(builder, real_val, scalar_ptr);
402       }
403       else {
404          LLVMBuildStore(builder, val, scalar_ptr);
405       }
406    }
407 }
408 
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])409 static void emit_load_var(struct lp_build_nir_context *bld_base,
410                            nir_variable_mode deref_mode,
411                            unsigned num_components,
412                            unsigned bit_size,
413                            nir_variable *var,
414                            unsigned vertex_index,
415                            LLVMValueRef indir_vertex_index,
416                            unsigned const_index,
417                            LLVMValueRef indir_index,
418                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
419 {
420    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
421    struct gallivm_state *gallivm = bld_base->base.gallivm;
422    int dmul = bit_size == 64 ? 2 : 1;
423    unsigned location = var->data.driver_location;
424    unsigned location_frac = var->data.location_frac;
425 
426    if (!var->data.compact && !indir_index)
427       location += const_index;
428    else if (var->data.compact) {
429       location += const_index / 4;
430       location_frac += const_index % 4;
431       const_index = 0;
432    }
433    switch (deref_mode) {
434    case nir_var_shader_in:
435       for (unsigned i = 0; i < num_components; i++) {
436          int idx = (i * dmul) + location_frac;
437          int comp_loc = location;
438 
439          if (bit_size == 64 && idx >= 4) {
440             comp_loc++;
441             idx = idx % 4;
442          }
443 
444          if (bld->gs_iface) {
445             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
446             LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
448             LLVMValueRef result2;
449 
450             result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
451                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
452             if (bit_size == 64) {
453                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
454                result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
455                                                     false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
456                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
457             }
458          } else if (bld->tes_iface) {
459             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
460             LLVMValueRef attrib_index_val;
461             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
462             LLVMValueRef result2;
463 
464             if (indir_index) {
465                if (var->data.compact) {
466                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
467                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
468                } else
469                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
470             } else
471                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
472 
473             if (var->data.patch) {
474                result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
475                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
476                if (bit_size == 64) {
477                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
478                   result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
479                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
480                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
481                }
482             }
483             else {
484                result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
485                                                               indir_vertex_index ? true : false,
486                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
487                                                               (indir_index && !var->data.compact) ? true : false, attrib_index_val,
488                                                               (indir_index && var->data.compact) ? true : false, swizzle_index_val);
489                if (bit_size == 64) {
490                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
491                   result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
492                                                                indir_vertex_index ? true : false,
493                                                                indir_vertex_index ? indir_vertex_index : vertex_index_val,
494                                                                indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
495                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
496                }
497             }
498          } else if (bld->tcs_iface) {
499             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
500             LLVMValueRef attrib_index_val;
501             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
502 
503             if (indir_index) {
504                if (var->data.compact) {
505                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
506                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
507                } else
508                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
509             } else
510                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
511             result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
512                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
513                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
514                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val);
515             if (bit_size == 64) {
516                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
517                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
518                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
519                                                                        indir_index ? true : false, attrib_index_val,
520                                                                        false, swizzle_index_val);
521                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
522             }
523          } else {
524             if (indir_index) {
525                LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
526                LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
527                                                               attrib_index_val, 4, idx,
528                                                               true);
529                LLVMValueRef index_vec2 = NULL;
530                LLVMTypeRef scalar_type = LLVMFloatTypeInContext(gallivm->context);
531                LLVMValueRef inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, LLVMPointerType(scalar_type, 0), "");
532 
533                if (bit_size == 64)
534                   index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
535                                                      indir_index, 4, idx + 1, true);
536 
537                /* Gather values from the input register array */
538                result[i] = build_gather(bld_base, &bld_base->base, scalar_type, inputs_array, index_vec, NULL, index_vec2);
539             } else {
540                if (bld->indirects & nir_var_shader_in) {
541                   LLVMValueRef lindex = lp_build_const_int32(gallivm,
542                                                              comp_loc * 4 + idx);
543                   LLVMValueRef input_ptr = lp_build_pointer_get2(gallivm->builder,
544                                                                  bld->bld_base.base.vec_type,
545                                                                  bld->inputs_array, lindex);
546                   if (bit_size == 64) {
547                      LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
548                                                                  comp_loc * 4 + (idx + 1));
549                      LLVMValueRef input_ptr2 = lp_build_pointer_get2(gallivm->builder,
550                                                                      bld->bld_base.base.vec_type,
551                                                                      bld->inputs_array, lindex2);
552                      result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
553                   } else {
554                      result[i] = input_ptr;
555                   }
556                } else {
557                   if (bit_size == 64) {
558                      LLVMValueRef tmp[2];
559                      tmp[0] = bld->inputs[comp_loc][idx];
560                      tmp[1] = bld->inputs[comp_loc][idx + 1];
561                      result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
562                   } else {
563                      result[i] = bld->inputs[comp_loc][idx];
564                   }
565                }
566             }
567          }
568       }
569       break;
570    case nir_var_shader_out:
571       if (var->data.fb_fetch_output && bld->fs_iface && bld->fs_iface->fb_fetch) {
572          bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
573          break;
574       }
575 
576       for (unsigned i = 0; i < num_components; i++) {
577          int idx = (i * dmul) + location_frac;
578          int comp_loc = location;
579          if (bit_size == 64 && idx >= 4) {
580             comp_loc++;
581             idx = idx % 4;
582          }
583 
584          if (bld->tcs_iface) {
585             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
586             LLVMValueRef attrib_index_val;
587             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
588 
589             if (indir_index)
590                attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
591             else
592                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
593 
594             result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
595                                                           indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
596                                                           (indir_index && !var->data.compact) ? true : false, attrib_index_val,
597                                                           (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
598             if (bit_size == 64) {
599                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
600                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
601                                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
602                                                                         indir_index ? true : false, attrib_index_val,
603                                                                         false, swizzle_index_val, 0);
604                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
605             }
606          } else {
607             /* Output variable behave like private variables during the shader execution.
608                GLSL 4.60 spec, section 4.3.6.
609                Vulkan 1.3 spec, Helper Invocations */
610             if (bit_size == 64) {
611                result[i] = emit_fetch_64bit(bld_base,
612                      LLVMBuildLoad2(gallivm->builder, bld_base->base.vec_type, bld->outputs[comp_loc][idx], "output0_ptr"),
613                      LLVMBuildLoad2(gallivm->builder, bld_base->base.vec_type, bld->outputs[comp_loc][idx + 1], "output1_ptr"));
614             } else {
615                result[i] = LLVMBuildLoad2(gallivm->builder, bld_base->base.vec_type, bld->outputs[comp_loc][idx], "output_ptr");
616             }
617          }
618       }
619       break;
620    default:
621       break;
622    }
623 }
624 
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)625 static void emit_store_chan(struct lp_build_nir_context *bld_base,
626                             nir_variable_mode deref_mode,
627                             unsigned bit_size,
628                             unsigned location, unsigned comp,
629                             unsigned chan,
630                             LLVMValueRef dst)
631 {
632    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
633    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
634    struct lp_build_context *float_bld = &bld_base->base;
635 
636    if (bit_size == 64) {
637       chan *= 2;
638       chan += comp;
639       if (chan >= 4) {
640          chan -= 4;
641          location++;
642       }
643       emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
644                             bld->outputs[location][chan + 1], dst);
645    } else {
646       dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
647       lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
648                          bld->outputs[location][chan + comp]);
649    }
650 }
651 
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)652 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
653                                 bool is_compact,
654                                 unsigned bit_size,
655                                 unsigned location,
656                                 unsigned const_index,
657                                 LLVMValueRef indir_vertex_index,
658                                 LLVMValueRef indir_index,
659                                 unsigned comp,
660                                 unsigned chan,
661                                 LLVMValueRef chan_val)
662 {
663    struct gallivm_state *gallivm = bld_base->base.gallivm;
664    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
665    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
666    unsigned swizzle = chan;
667    if (bit_size == 64) {
668       swizzle *= 2;
669       swizzle += comp;
670       if (swizzle >= 4) {
671          swizzle -= 4;
672          location++;
673       }
674    } else
675       swizzle += comp;
676    LLVMValueRef attrib_index_val;
677    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
678 
679    if (indir_index) {
680       if (is_compact) {
681          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
682          attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
683       } else
684          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
685    } else
686       attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
687    LLVMValueRef exec_mask = mask_vec(bld_base);
688    if (bit_size == 64) {
689       LLVMValueRef split_vals[2];
690       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
691       emit_store_64bit_split(bld_base, chan_val, split_vals);
692       if (bld->mesh_iface) {
693          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
694                                            indir_vertex_index ? true : false,
695                                            indir_vertex_index,
696                                            indir_index ? true : false,
697                                            attrib_index_val,
698                                            false, swizzle_index_val,
699                                            split_vals[0], exec_mask);
700          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
701                                            indir_vertex_index ? true : false,
702                                            indir_vertex_index,
703                                            indir_index ? true : false,
704                                            attrib_index_val,
705                                            false, swizzle_index_val2,
706                                            split_vals[1], exec_mask);
707       } else {
708          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
709                                            indir_vertex_index ? true : false,
710                                            indir_vertex_index,
711                                            indir_index ? true : false,
712                                            attrib_index_val,
713                                            false, swizzle_index_val,
714                                            split_vals[0], exec_mask);
715          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
716                                            indir_vertex_index ? true : false,
717                                            indir_vertex_index,
718                                            indir_index ? true : false,
719                                            attrib_index_val,
720                                            false, swizzle_index_val2,
721                                            split_vals[1], exec_mask);
722       }
723    } else {
724       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
725       if (bld->mesh_iface) {
726          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
727                                            indir_vertex_index ? true : false,
728                                            indir_vertex_index,
729                                            indir_index && !is_compact ? true : false,
730                                            attrib_index_val,
731                                            indir_index && is_compact ? true : false,
732                                            swizzle_index_val,
733                                            chan_val, exec_mask);
734       } else {
735          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
736                                            indir_vertex_index ? true : false,
737                                            indir_vertex_index,
738                                            indir_index && !is_compact ? true : false,
739                                            attrib_index_val,
740                                            indir_index && is_compact ? true : false,
741                                            swizzle_index_val,
742                                            chan_val, exec_mask);
743       }
744    }
745 }
746 
emit_store_mesh_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)747 static void emit_store_mesh_chan(struct lp_build_nir_context *bld_base,
748                                  bool is_compact,
749                                  unsigned bit_size,
750                                  unsigned location,
751                                  unsigned const_index,
752                                  LLVMValueRef indir_vertex_index,
753                                  LLVMValueRef indir_index,
754                                  unsigned comp,
755                                  unsigned chan,
756                                  LLVMValueRef chan_val)
757 {
758    struct gallivm_state *gallivm = bld_base->base.gallivm;
759    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
760    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
761    unsigned swizzle = chan;
762    if (bit_size == 64) {
763       swizzle += const_index;
764       swizzle *= 2;
765       swizzle += comp;
766       if (swizzle >= 4) {
767          swizzle -= 4;
768          location++;
769       }
770    } else
771       swizzle += comp;
772    LLVMValueRef attrib_index_val;
773    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
774 
775    if (indir_index) {
776       if (is_compact) {
777          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
778          attrib_index_val = lp_build_const_int32(gallivm, location);
779       } else
780          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
781    } else
782       attrib_index_val = lp_build_const_int32(gallivm, location + const_index);
783    LLVMValueRef exec_mask = mask_vec(bld_base);
784    if (bit_size == 64) {
785       LLVMValueRef split_vals[2];
786       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
787       emit_store_64bit_split(bld_base, chan_val, split_vals);
788       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
789                                          indir_vertex_index ? true : false,
790                                          indir_vertex_index,
791                                          indir_index ? true : false,
792                                          attrib_index_val,
793                                          false, swizzle_index_val,
794                                          split_vals[0], exec_mask);
795       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
796                                          indir_vertex_index ? true : false,
797                                          indir_vertex_index,
798                                          indir_index ? true : false,
799                                          attrib_index_val,
800                                          false, swizzle_index_val2,
801                                          split_vals[1], exec_mask);
802    } else {
803       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
804       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
805                                          indir_vertex_index ? true : false,
806                                          indir_vertex_index,
807                                          indir_index && !is_compact ? true : false,
808                                          attrib_index_val,
809                                          indir_index && is_compact ? true : false,
810                                          swizzle_index_val,
811                                          chan_val, exec_mask);
812    }
813 }
814 
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)815 static void emit_store_var(struct lp_build_nir_context *bld_base,
816                            nir_variable_mode deref_mode,
817                            unsigned num_components,
818                            unsigned bit_size,
819                            nir_variable *var,
820                            unsigned writemask,
821                            LLVMValueRef indir_vertex_index,
822                            unsigned const_index,
823                            LLVMValueRef indir_index,
824                            LLVMValueRef dst)
825 {
826    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
827    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
828    switch (deref_mode) {
829    case nir_var_shader_out: {
830       unsigned location = var->data.driver_location;
831       unsigned comp = var->data.location_frac;
832       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
833          if (var->data.location == FRAG_RESULT_STENCIL)
834             comp = 1;
835          else if (var->data.location == FRAG_RESULT_DEPTH)
836             comp = 2;
837       }
838 
839       if (var->data.compact) {
840          location += const_index / 4;
841          comp += const_index % 4;
842          const_index = 0;
843       }
844 
845       for (unsigned chan = 0; chan < num_components; chan++) {
846          if (writemask & (1u << chan)) {
847             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
848             if (bld->mesh_iface) {
849                emit_store_mesh_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
850             } else if (bld->tcs_iface) {
851                emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
852             } else
853                emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
854          }
855       }
856       break;
857    }
858    default:
859       break;
860    }
861 }
862 
863 /**
864  * Returns the address of the given constant array index and channel in a
865  * nir register.
866  */
reg_chan_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,LLVMValueRef reg_storage,int array_index,int chan)867 static LLVMValueRef reg_chan_pointer(struct lp_build_nir_context *bld_base,
868                                            struct lp_build_context *reg_bld,
869                                            const nir_intrinsic_instr *decl,
870                                            LLVMValueRef reg_storage,
871                                            int array_index, int chan)
872 {
873    struct gallivm_state *gallivm = bld_base->base.gallivm;
874    int nc = nir_intrinsic_num_components(decl);
875    int num_array_elems = nir_intrinsic_num_array_elems(decl);
876 
877    LLVMTypeRef chan_type = reg_bld->vec_type;
878    if (nc > 1)
879       chan_type = LLVMArrayType(chan_type, nc);
880 
881    if (num_array_elems > 0) {
882       LLVMTypeRef array_type = LLVMArrayType(chan_type, num_array_elems);
883       reg_storage = lp_build_array_get_ptr2(gallivm, array_type, reg_storage,
884                                             lp_build_const_int32(gallivm, array_index));
885    }
886    if (nc > 1) {
887       reg_storage = lp_build_array_get_ptr2(gallivm, chan_type, reg_storage,
888                                             lp_build_const_int32(gallivm, chan));
889    }
890 
891    return reg_storage;
892 }
893 
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage)894 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
895                                   struct lp_build_context *reg_bld,
896                                   const nir_intrinsic_instr *decl,
897                                   unsigned base,
898                                   LLVMValueRef indir_src,
899                                   LLVMValueRef reg_storage)
900 {
901    struct gallivm_state *gallivm = bld_base->base.gallivm;
902    LLVMBuilderRef builder = gallivm->builder;
903    int nc = nir_intrinsic_num_components(decl);
904    int num_array_elems = nir_intrinsic_num_array_elems(decl);
905    LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
906    struct lp_build_context *uint_bld = &bld_base->uint_bld;
907    if (indir_src != NULL) {
908       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
909       LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
910       indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
911       indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
912       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
913       for (unsigned i = 0; i < nc; i++) {
914          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
915          vals[i] = build_gather(bld_base, reg_bld, reg_bld->elem_type, reg_storage, indirect_offset, NULL, NULL);
916       }
917    } else {
918       for (unsigned i = 0; i < nc; i++) {
919          vals[i] = LLVMBuildLoad2(builder, reg_bld->vec_type,
920                                   reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
921                                                    base, i), "");
922       }
923    }
924    return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
925 }
926 
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned writemask,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])927 static void emit_store_reg(struct lp_build_nir_context *bld_base,
928                            struct lp_build_context *reg_bld,
929                            const nir_intrinsic_instr *decl,
930                            unsigned writemask,
931                            unsigned base,
932                            LLVMValueRef indir_src,
933                            LLVMValueRef reg_storage,
934                            LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
935 {
936    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
937    struct gallivm_state *gallivm = bld_base->base.gallivm;
938    LLVMBuilderRef builder = gallivm->builder;
939    struct lp_build_context *uint_bld = &bld_base->uint_bld;
940    int nc = nir_intrinsic_num_components(decl);
941    int num_array_elems = nir_intrinsic_num_array_elems(decl);
942    if (indir_src != NULL) {
943       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
944       LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
945       indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
946       indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
947       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
948       for (unsigned i = 0; i < nc; i++) {
949          if (!(writemask & (1 << i)))
950             continue;
951          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
952          dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
953          emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
954       }
955       return;
956    }
957 
958    for (unsigned i = 0; i < nc; i++) {
959       if (!(writemask & (1 << i)))
960          continue;
961       dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
962       lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i],
963                          reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
964                                           base, i));
965    }
966 }
967 
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])968 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
969                                  unsigned nc,
970                                  unsigned bit_size,
971                                  unsigned offset_bit_size,
972                                  bool offset_is_uniform,
973                                  LLVMValueRef offset,
974                                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
975 {
976    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
977    struct gallivm_state *gallivm = bld_base->base.gallivm;
978    LLVMBuilderRef builder = gallivm->builder;
979    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
980    LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
981    unsigned size_shift = bit_size_to_shift_size(bit_size);
982    struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
983    if (size_shift)
984       offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
985 
986    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
987    kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
988 
989    if (offset_is_uniform) {
990       offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
991 
992       for (unsigned c = 0; c < nc; c++) {
993          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
994 
995          LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset);
996          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
997       }
998    } else {
999       unreachable("load_kernel_arg must have a uniform offset.");
1000    }
1001 }
1002 
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)1003 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
1004 {
1005    LLVMBuilderRef builder = gallivm->builder;
1006    switch (bit_size) {
1007    case 8:
1008       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
1009       break;
1010    case 16:
1011       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
1012       break;
1013    case 32:
1014    default:
1015       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1016       break;
1017    case 64:
1018       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
1019       break;
1020    }
1021    return addr_ptr;
1022 }
1023 
global_addr_to_ptr_vec(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned length,unsigned bit_size)1024 static LLVMValueRef global_addr_to_ptr_vec(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned length, unsigned bit_size)
1025 {
1026    LLVMBuilderRef builder = gallivm->builder;
1027    switch (bit_size) {
1028    case 8:
1029       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), length), "");
1030       break;
1031    case 16:
1032       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), length), "");
1033       break;
1034    case 32:
1035    default:
1036       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), length), "");
1037       break;
1038    case 64:
1039       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), length), "");
1040       break;
1041    }
1042    return addr_ptr;
1043 }
1044 
lp_vec_add_offset_ptr(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef ptr,LLVMValueRef offset)1045 static LLVMValueRef lp_vec_add_offset_ptr(struct lp_build_nir_context *bld_base,
1046                                           unsigned bit_size,
1047                                           LLVMValueRef ptr,
1048                                           LLVMValueRef offset)
1049 {
1050    unsigned pointer_size = 8 * sizeof(void *);
1051    struct gallivm_state *gallivm = bld_base->base.gallivm;
1052    LLVMBuilderRef builder = gallivm->builder;
1053    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1054    struct lp_build_context *ptr_bld = get_int_bld(bld_base, true, pointer_size);
1055    LLVMValueRef result = LLVMBuildPtrToInt(builder, ptr, ptr_bld->vec_type, "");
1056    if (pointer_size == 64)
1057       offset = LLVMBuildZExt(builder, offset, ptr_bld->vec_type, "");
1058    result = LLVMBuildAdd(builder, offset, result, "");
1059    return global_addr_to_ptr_vec(gallivm, result, uint_bld->type.length, bit_size);
1060 }
1061 
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1062 static void emit_load_global(struct lp_build_nir_context *bld_base,
1063                              unsigned nc,
1064                              unsigned bit_size,
1065                              unsigned addr_bit_size,
1066                              bool offset_is_uniform,
1067                              LLVMValueRef addr,
1068                              LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1069 {
1070    struct gallivm_state *gallivm = bld_base->base.gallivm;
1071    LLVMBuilderRef builder = gallivm->builder;
1072    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1073    struct lp_build_context *res_bld;
1074 
1075    res_bld = get_int_bld(bld_base, true, bit_size);
1076 
1077    /* Note, we don't use first_active_invocation here, since we aren't
1078     * guaranteed that there is actually an active invocation.
1079     */
1080    if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1081       /* If the offset is uniform, then use the address from invocation 0 to
1082        * load, and broadcast to all invocations.
1083        */
1084       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1085                                                       lp_build_const_int32(gallivm, 0), "");
1086       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
1087 
1088       for (unsigned c = 0; c < nc; c++) {
1089          LLVMValueRef scalar = lp_build_pointer_get2(builder, res_bld->elem_type,
1090                                                      addr_ptr, lp_build_const_int32(gallivm, c));
1091          outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
1092       }
1093       return;
1094    }
1095 
1096    LLVMValueRef mask = mask_vec_with_helpers(bld_base);
1097    for (unsigned c = 0; c < nc; c++) {
1098       LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1099 
1100       outval[c] = lp_build_masked_gather(gallivm, res_bld->type.length, bit_size, res_bld->vec_type,
1101                                          lp_vec_add_offset_ptr(bld_base, bit_size, addr, chan_offset),
1102                                          mask);
1103       outval[c] = LLVMBuildBitCast(builder, outval[c], res_bld->vec_type, "");
1104    }
1105 }
1106 
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)1107 static void emit_store_global(struct lp_build_nir_context *bld_base,
1108                               unsigned writemask,
1109                               unsigned nc, unsigned bit_size,
1110                               unsigned addr_bit_size,
1111                               LLVMValueRef addr,
1112                               LLVMValueRef dst)
1113 {
1114    struct gallivm_state *gallivm = bld_base->base.gallivm;
1115    LLVMBuilderRef builder = gallivm->builder;
1116    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1117    LLVMValueRef exec_mask = mask_vec(bld_base);
1118 
1119    for (unsigned c = 0; c < nc; c++) {
1120       if (!(writemask & (1u << c)))
1121          continue;
1122       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1123       LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1124 
1125       struct lp_build_context *out_bld = get_int_bld(bld_base, false, bit_size);
1126       val = LLVMBuildBitCast(builder, val, out_bld->vec_type, "");
1127       lp_build_masked_scatter(gallivm, out_bld->type.length, bit_size,
1128                               lp_vec_add_offset_ptr(bld_base, bit_size,
1129                                                     addr, chan_offset),
1130                               val, exec_mask);
1131    }
1132 }
1133 
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1134 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
1135                                nir_atomic_op nir_op,
1136                                unsigned addr_bit_size,
1137                                unsigned val_bit_size,
1138                                LLVMValueRef addr,
1139                                LLVMValueRef val, LLVMValueRef val2,
1140                                LLVMValueRef *result)
1141 {
1142    struct gallivm_state *gallivm = bld_base->base.gallivm;
1143    LLVMBuilderRef builder = gallivm->builder;
1144    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1145    bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
1146    struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
1147    if (is_flt)
1148       val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
1149 
1150    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1151                                            atom_bld->vec_type, "");
1152    LLVMValueRef exec_mask = mask_vec(bld_base);
1153    struct lp_build_loop_state loop_state;
1154    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1155 
1156    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1157                                                     loop_state.counter, "");
1158    value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atom_bld->elem_type, "");
1159 
1160    LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1161                                                    loop_state.counter, "");
1162    addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
1163    struct lp_build_if_state ifthen;
1164    LLVMValueRef cond, temp_res;
1165    LLVMValueRef scalar;
1166    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1167    cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1168    lp_build_if(&ifthen, gallivm, cond);
1169 
1170    addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
1171    if (val2 != NULL /* compare-and-swap */) {
1172       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1173                                                          loop_state.counter, "");
1174       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
1175       scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
1176                                       cas_src_ptr,
1177                                       LLVMAtomicOrderingSequentiallyConsistent,
1178                                       LLVMAtomicOrderingSequentiallyConsistent,
1179                                       false);
1180       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1181    } else {
1182       scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1183                                   addr_ptr, value_ptr,
1184                                   LLVMAtomicOrderingSequentiallyConsistent,
1185                                   false);
1186    }
1187    temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1188    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1189    LLVMBuildStore(builder, temp_res, atom_res);
1190    lp_build_else(&ifthen);
1191    temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1192    LLVMValueRef zero_val = lp_build_zero_bits(gallivm, val_bit_size, is_flt);
1193    temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1194    LLVMBuildStore(builder, temp_res, atom_res);
1195    lp_build_endif(&ifthen);
1196    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1197                           NULL, LLVMIntUGE);
1198    *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1199 }
1200 
1201 /* Returns a boolean for whether the offset is in range of the given limit for
1202  * SSBO/UBO dereferences.
1203  */
1204 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1205 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1206                    LLVMValueRef offset,
1207                    LLVMValueRef limit)
1208 {
1209    struct gallivm_state *gallivm = bld_base->base.gallivm;
1210    LLVMBuilderRef builder = gallivm->builder;
1211 
1212    LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1213    LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1214    LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1215    return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1216 }
1217 
1218 static LLVMValueRef
build_resource_to_scalar(struct lp_build_nir_context * bld_base,LLVMValueRef resource)1219 build_resource_to_scalar(struct lp_build_nir_context *bld_base, LLVMValueRef resource)
1220 {
1221    struct gallivm_state *gallivm = bld_base->base.gallivm;
1222 
1223    LLVMValueRef invocation = first_active_invocation(bld_base);
1224 
1225    LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, resource, 0, "");
1226    set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1227 
1228    LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, resource, 1, "");
1229    binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1230 
1231    LLVMValueRef components[2] = { set, binding };
1232    return lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1233 }
1234 
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1235 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1236                           unsigned nc,
1237                           unsigned bit_size,
1238                           bool offset_is_uniform,
1239                           LLVMValueRef index,
1240                           LLVMValueRef offset,
1241                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1242 {
1243    if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind)
1244       index = build_resource_to_scalar(bld_base, index);
1245 
1246    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1247    struct gallivm_state *gallivm = bld_base->base.gallivm;
1248    LLVMBuilderRef builder = gallivm->builder;
1249    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1250    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1251    LLVMValueRef consts_ptr = lp_llvm_buffer_base(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1252    LLVMValueRef num_consts = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1253    unsigned size_shift = bit_size_to_shift_size(bit_size);
1254    if (size_shift)
1255       offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1256 
1257    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1258    consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1259 
1260    if (offset_is_uniform) {
1261       offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
1262       struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1263       switch (bit_size) {
1264       case 8:
1265          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1266          break;
1267       case 16:
1268          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1269          break;
1270       case 64:
1271          num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1272          break;
1273       default: break;
1274       }
1275       for (unsigned c = 0; c < nc; c++) {
1276          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1277 
1278          LLVMValueRef scalar;
1279          /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1280          LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1281          LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1282          LLVMBuildStore(builder, zero, res_store);
1283 
1284          struct lp_build_if_state ifthen;
1285          lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1286          LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type,
1287                                                        consts_ptr, chan_offset), res_store);
1288          lp_build_endif(&ifthen);
1289 
1290          scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1291 
1292          result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1293       }
1294    } else {
1295       LLVMValueRef overflow_mask;
1296 
1297       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1298       if (bit_size == 64)
1299          num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1300       else if (bit_size == 16)
1301          num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1302       else if (bit_size == 8)
1303          num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1304 
1305       for (unsigned c = 0; c < nc; c++) {
1306          LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1307          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1308                                           this_offset, num_consts);
1309          result[c] = build_gather(bld_base, bld_broad, bld_broad->elem_type, consts_ptr, this_offset, overflow_mask, NULL);
1310       }
1311    }
1312 }
1313 
1314 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1315 emit_load_const(struct lp_build_nir_context *bld_base,
1316                 const nir_load_const_instr *instr,
1317                 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1318 {
1319    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1320    const unsigned bits = instr->def.bit_size;
1321 
1322    for (unsigned i = 0; i < instr->def.num_components; i++) {
1323      outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1324                                         bits == 32 ? instr->value[i].u32
1325                                                    : instr->value[i].u64);
1326    }
1327    for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1328       outval[i] = NULL;
1329    }
1330 }
1331 
1332 /**
1333  * Get the base address of SSBO[@index] for the @invocation channel, returning
1334  * the address and also the bounds (in units of the bit_size).
1335  */
1336 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1337 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1338                   unsigned bit_size,
1339                   LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1340 {
1341    struct gallivm_state *gallivm = bld_base->base.gallivm;
1342    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1343    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1344 
1345    LLVMValueRef ssbo_idx;
1346    LLVMValueRef buffers;
1347    uint32_t buffers_limit;
1348    if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) {
1349       LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, index, 0, "");
1350       set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1351 
1352       LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, index, 1, "");
1353       binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1354 
1355       LLVMValueRef components[2] = { set, binding };
1356       ssbo_idx = lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1357 
1358       buffers = bld->consts_ptr;
1359       buffers_limit = LP_MAX_TGSI_CONST_BUFFERS;
1360    } else {
1361       ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1362 
1363       buffers = bld->ssbo_ptr;
1364       buffers_limit = LP_MAX_TGSI_SHADER_BUFFERS;
1365    }
1366 
1367    LLVMValueRef ssbo_size_ptr = lp_llvm_buffer_num_elements(gallivm, buffers, ssbo_idx, buffers_limit);
1368    LLVMValueRef ssbo_ptr = lp_llvm_buffer_base(gallivm, buffers, ssbo_idx, buffers_limit);
1369    if (bounds)
1370       *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1371 
1372    return ssbo_ptr;
1373 }
1374 
1375 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,bool payload,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1376 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1377                         struct lp_build_context *mem_bld,
1378                         unsigned bit_size, bool payload,
1379                         LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1380 {
1381    struct gallivm_state *gallivm = bld_base->base.gallivm;
1382    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1383    LLVMValueRef ptr;
1384 
1385    if (index) {
1386       ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1387    } else {
1388       if (payload) {
1389          ptr = bld->payload_ptr;
1390          ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
1391          ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
1392          ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1393       }
1394       else
1395          ptr = bld->shared_ptr;
1396       *bounds = NULL;
1397    }
1398 
1399    /* Cast it to the pointer type of the access this instruction is doing. */
1400    if (bit_size == 32 && !mem_bld->type.floating)
1401       return ptr;
1402    else
1403       return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1404 }
1405 
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1406 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1407                           unsigned nc,
1408                           unsigned bit_size,
1409                           bool index_and_offset_are_uniform,
1410                           bool payload,
1411                           LLVMValueRef index,
1412                           LLVMValueRef offset,
1413                           LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1414 {
1415    struct gallivm_state *gallivm = bld_base->base.gallivm;
1416    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1417    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1418    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1419    struct lp_build_context *load_bld;
1420    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1421 
1422    load_bld = get_int_bld(bld_base, true, bit_size);
1423 
1424    offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1425 
1426    /* If the address is uniform, then use the address from the first active
1427     * invocation 0 to load, and broadcast to all invocations.  We can't do
1428     * computed first active invocation for shared accesses (index == NULL),
1429     * though, since those don't do bounds checking and we could use an invalid
1430     * offset if exec_mask == 0.
1431     */
1432    if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) {
1433       LLVMValueRef ssbo_limit;
1434       LLVMValueRef first_active = first_active_invocation(bld_base);
1435       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1436                                                      first_active, &ssbo_limit);
1437 
1438       offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, "");
1439 
1440       for (unsigned c = 0; c < nc; c++) {
1441          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1442 
1443          LLVMValueRef scalar;
1444          /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1445          if (ssbo_limit) {
1446             LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1447             LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1448             LLVMBuildStore(builder, zero, res_store);
1449 
1450             struct lp_build_if_state ifthen;
1451             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1452             LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store);
1453             lp_build_endif(&ifthen);
1454 
1455             scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1456          } else {
1457             scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset);
1458          }
1459 
1460          outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1461       }
1462       return;
1463    }
1464 
1465    /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1466 
1467    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1468    for (unsigned c = 0; c < nc; c++)
1469       result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1470 
1471    LLVMValueRef gather_mask = mask_vec_with_helpers(bld_base);
1472    LLVMValueRef gather_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, gather_mask, uint_bld->zero, "");
1473    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1474       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1475       LLVMValueRef element_gather_cond = LLVMBuildExtractElement(gallivm->builder, gather_cond, counter, "");
1476 
1477       struct lp_build_if_state if_gather_element;
1478       lp_build_if(&if_gather_element, gallivm, element_gather_cond);
1479 
1480       LLVMValueRef ssbo_limit;
1481       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1482                                                      counter, &ssbo_limit);
1483 
1484       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1485 
1486       for (unsigned c = 0; c < nc; c++) {
1487          LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1488          LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1489          if (ssbo_limit) {
1490             LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1491             do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1492          }
1493 
1494          struct lp_build_if_state ifthen;
1495          LLVMValueRef fetch_cond, temp_res;
1496 
1497          fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1498 
1499          lp_build_if(&ifthen, gallivm, fetch_cond);
1500          LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index);
1501 
1502          temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1503          temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1504          LLVMBuildStore(builder, temp_res, result[c]);
1505          lp_build_else(&ifthen);
1506          temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1507          LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1508          temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1509          LLVMBuildStore(builder, temp_res, result[c]);
1510          lp_build_endif(&ifthen);
1511       }
1512 
1513       lp_build_endif(&if_gather_element);
1514    }
1515    for (unsigned c = 0; c < nc; c++)
1516       outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1517 
1518 }
1519 
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1520 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1521                            unsigned writemask,
1522                            unsigned nc,
1523                            unsigned bit_size,
1524                            bool index_and_offset_are_uniform,
1525                            bool payload,
1526                            LLVMValueRef index,
1527                            LLVMValueRef offset,
1528                            LLVMValueRef dst)
1529 {
1530    struct gallivm_state *gallivm = bld_base->base.gallivm;
1531    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1532    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1533    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1534    struct lp_build_context *store_bld;
1535    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1536    store_bld = get_int_bld(bld_base, true, bit_size);
1537 
1538    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1539 
1540    /* If the address is uniform, then just store the value from the first
1541     * channel instead of making LLVM unroll the invocation loop.  Note that we
1542     * don't use first_active_uniform(), since we aren't guaranteed that there is
1543     * actually an active invocation.
1544     */
1545    if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1546       LLVMValueRef ssbo_limit;
1547       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1548                                                      lp_build_const_int32(gallivm, 0), &ssbo_limit);
1549 
1550       offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1551 
1552       for (unsigned c = 0; c < nc; c++) {
1553          if (!(writemask & (1u << c)))
1554             continue;
1555 
1556          /* Pick out invocation 0's value. */
1557          LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1558          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1559                                                          lp_build_const_int32(gallivm, 0), "");
1560          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1561 
1562          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1563 
1564          /* If storing outside the SSBO, we need to skip the store instead. */
1565          if (ssbo_limit) {
1566             struct lp_build_if_state ifthen;
1567             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1568             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1569             lp_build_endif(&ifthen);
1570          } else {
1571             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1572          }
1573       }
1574       return;
1575    }
1576 
1577    LLVMValueRef exec_mask = mask_vec(bld_base);
1578    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1579    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1580       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1581       LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1582 
1583       struct lp_build_if_state exec_ifthen;
1584       lp_build_if(&exec_ifthen, gallivm, loop_cond);
1585 
1586       LLVMValueRef ssbo_limit;
1587       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1588                                                      counter, &ssbo_limit);
1589 
1590       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1591 
1592       for (unsigned c = 0; c < nc; c++) {
1593          if (!(writemask & (1u << c)))
1594             continue;
1595          LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1596          LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1597          LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1598 
1599          if (ssbo_limit) {
1600             LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1601             do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1602          }
1603 
1604          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1605                                                           counter, "");
1606          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1607          struct lp_build_if_state ifthen;
1608          LLVMValueRef store_cond;
1609 
1610          store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1611          lp_build_if(&ifthen, gallivm, store_cond);
1612          lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1613          lp_build_endif(&ifthen);
1614       }
1615 
1616       lp_build_endif(&exec_ifthen);
1617    }
1618 }
1619 
1620 
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,uint32_t bit_size,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1621 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1622                             nir_atomic_op nir_op,
1623                             uint32_t bit_size,
1624                             bool payload,
1625                             LLVMValueRef index, LLVMValueRef offset,
1626                             LLVMValueRef val, LLVMValueRef val2,
1627                             LLVMValueRef *result)
1628 {
1629    struct gallivm_state *gallivm = bld_base->base.gallivm;
1630    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1631    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1632    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1633    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1634    bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
1635    struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
1636 
1637    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1638    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1639                                            atomic_bld->vec_type, "");
1640 
1641    LLVMValueRef exec_mask = mask_vec(bld_base);
1642    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1643    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1644       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1645       LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1646 
1647       struct lp_build_if_state exec_ifthen;
1648       lp_build_if(&exec_ifthen, gallivm, loop_cond);
1649 
1650       LLVMValueRef ssbo_limit;
1651       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
1652                                                      counter, &ssbo_limit);
1653 
1654       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1655 
1656       LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1657       if (ssbo_limit) {
1658          LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1659          do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1660       }
1661 
1662       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1663                                                        counter, "");
1664       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1665 
1666       LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1667 
1668       struct lp_build_if_state ifthen;
1669       LLVMValueRef inner_cond, temp_res;
1670       LLVMValueRef scalar;
1671 
1672       inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1673       lp_build_if(&ifthen, gallivm, inner_cond);
1674 
1675       if (val2 != NULL) {
1676          LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1677                                                             counter, "");
1678          cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1679          scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1680                                          cas_src_ptr,
1681                                          LLVMAtomicOrderingSequentiallyConsistent,
1682                                          LLVMAtomicOrderingSequentiallyConsistent,
1683                                          false);
1684          scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1685       } else {
1686          scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1687                                      scalar_ptr, value_ptr,
1688                                      LLVMAtomicOrderingSequentiallyConsistent,
1689                                      false);
1690       }
1691       temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1692       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1693       LLVMBuildStore(builder, temp_res, atom_res);
1694       lp_build_else(&ifthen);
1695       temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1696       LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
1697       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1698       LLVMBuildStore(builder, temp_res, atom_res);
1699       lp_build_endif(&ifthen);
1700 
1701       lp_build_endif(&exec_ifthen);
1702    }
1703    *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1704 }
1705 
emit_barrier(struct lp_build_nir_context * bld_base)1706 static void emit_barrier(struct lp_build_nir_context *bld_base)
1707 {
1708    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1709    struct gallivm_state * gallivm = bld_base->base.gallivm;
1710 
1711    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1712 
1713    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1714    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1715 }
1716 
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1717 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1718                                        LLVMValueRef index)
1719 {
1720    struct lp_build_context *bld_broad = &bld_base->uint_bld;
1721 
1722    LLVMValueRef size;
1723    ssbo_base_pointer(bld_base, 8, index, first_active_invocation(bld_base), &size);
1724 
1725    return lp_build_broadcast_scalar(bld_broad, size);
1726 }
1727 
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1728 static void emit_image_op(struct lp_build_nir_context *bld_base,
1729                           struct lp_img_params *params)
1730 {
1731    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1732    struct gallivm_state *gallivm = bld_base->base.gallivm;
1733 
1734    params->type = bld_base->base.type;
1735    params->resources_type = bld->resources_type;
1736    params->resources_ptr = bld->resources_ptr;
1737    params->thread_data_type = bld->thread_data_type;
1738    params->thread_data_ptr = bld->thread_data_ptr;
1739    params->exec_mask = mask_vec(bld_base);
1740 
1741    if (params->image_index_offset)
1742       params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1743                                                            first_active_invocation(bld_base), "");
1744 
1745    if (params->resource)
1746       params->resource = build_resource_to_scalar(bld_base, params->resource);
1747 
1748    bld->image->emit_op(bld->image,
1749                        bld->bld_base.base.gallivm,
1750                        params);
1751 
1752 }
1753 
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1754 static void emit_image_size(struct lp_build_nir_context *bld_base,
1755                             struct lp_sampler_size_query_params *params)
1756 {
1757    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1758    struct gallivm_state *gallivm = bld_base->base.gallivm;
1759 
1760    params->int_type = bld_base->int_bld.type;
1761    params->resources_type = bld->resources_type;
1762    params->resources_ptr = bld->resources_ptr;
1763    if (params->texture_unit_offset)
1764       params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1765                                                             first_active_invocation(bld_base), "");
1766    bld->image->emit_size_query(bld->image,
1767                                bld->bld_base.base.gallivm,
1768                                params);
1769 
1770 }
1771 
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1772 static void init_var_slots(struct lp_build_nir_context *bld_base,
1773                            nir_variable *var, unsigned sc)
1774 {
1775    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1776    unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1777 
1778    if (!bld->outputs)
1779      return;
1780    for (unsigned comp = sc; comp < slots + sc; comp++) {
1781       unsigned this_loc = var->data.driver_location + (comp / 4);
1782       unsigned this_chan = comp % 4;
1783 
1784       if (!bld->outputs[this_loc][this_chan])
1785          bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1786                                                              bld_base->base.vec_type, "output");
1787    }
1788 }
1789 
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1790 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1791                           nir_variable *var)
1792 {
1793    unsigned sc = var->data.location_frac;
1794    switch (var->data.mode) {
1795    case nir_var_shader_out: {
1796       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1797          if (var->data.location == FRAG_RESULT_STENCIL)
1798             sc = 1;
1799          else if (var->data.location == FRAG_RESULT_DEPTH)
1800             sc = 2;
1801       }
1802       init_var_slots(bld_base, var, sc);
1803       break;
1804    }
1805    default:
1806       break;
1807    }
1808 }
1809 
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1810 static void emit_tex(struct lp_build_nir_context *bld_base,
1811                      struct lp_sampler_params *params)
1812 {
1813    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1814    struct gallivm_state *gallivm = bld_base->base.gallivm;
1815 
1816    params->type = bld_base->base.type;
1817    params->resources_type = bld->resources_type;
1818    params->resources_ptr = bld->resources_ptr;
1819    params->thread_data_type = bld->thread_data_type;
1820    params->thread_data_ptr = bld->thread_data_ptr;
1821    params->exec_mask = mask_vec(bld_base);
1822 
1823    if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1824       /* this is horrible but this can be dynamic */
1825       LLVMValueRef coords[5];
1826       LLVMValueRef *orig_texel_ptr;
1827       struct lp_build_context *uint_bld = &bld_base->uint_bld;
1828       LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1829                                  LLVMGetUndef(bld_base->base.vec_type),
1830                                  LLVMGetUndef(bld_base->base.vec_type),
1831                                  LLVMGetUndef(bld_base->base.vec_type) };
1832       LLVMValueRef texel[4], orig_offset, orig_lod;
1833       unsigned i;
1834       orig_texel_ptr = params->texel;
1835       orig_lod = params->lod;
1836       for (i = 0; i < 5; i++) {
1837          coords[i] = params->coords[i];
1838       }
1839       orig_offset = params->texture_index_offset;
1840 
1841       for (unsigned v = 0; v < uint_bld->type.length; v++) {
1842          LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1843          LLVMValueRef new_coords[5];
1844          for (i = 0; i < 5; i++) {
1845             new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1846                                                     coords[i], idx, "");
1847          }
1848          params->coords = new_coords;
1849          params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1850                                                                 orig_offset,
1851                                                                 idx, "");
1852          params->type = lp_elem_type(bld_base->base.type);
1853 
1854          if (orig_lod)
1855             params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1856          params->texel = texel;
1857          bld->sampler->emit_tex_sample(bld->sampler,
1858                                        gallivm,
1859                                        params);
1860 
1861          for (i = 0; i < 4; i++) {
1862             result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1863          }
1864       }
1865       for (i = 0; i < 4; i++) {
1866          orig_texel_ptr[i] = result[i];
1867       }
1868       return;
1869    }
1870 
1871    if (params->texture_index_offset) {
1872       params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1873                                                              first_active_invocation(bld_base), "");
1874    }
1875 
1876    if (params->texture_resource)
1877       params->texture_resource = build_resource_to_scalar(bld_base, params->texture_resource);
1878 
1879    if (params->sampler_resource)
1880       params->sampler_resource = build_resource_to_scalar(bld_base, params->sampler_resource);
1881 
1882    params->type = bld_base->base.type;
1883    bld->sampler->emit_tex_sample(bld->sampler,
1884                                  bld->bld_base.base.gallivm,
1885                                  params);
1886 }
1887 
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1888 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1889                           struct lp_sampler_size_query_params *params)
1890 {
1891    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1892 
1893    params->int_type = bld_base->int_bld.type;
1894    params->resources_type = bld->resources_type;
1895    params->resources_ptr = bld->resources_ptr;
1896    if (params->texture_unit_offset)
1897       params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1898                                                              params->texture_unit_offset,
1899                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1900 
1901    params->exec_mask = mask_vec(bld_base);
1902    if (params->resource)
1903       params->resource = build_resource_to_scalar(bld_base, params->resource);
1904 
1905    bld->sampler->emit_size_query(bld->sampler,
1906                                  bld->bld_base.base.gallivm,
1907                                  params);
1908 }
1909 
get_local_invocation_index(struct lp_build_nir_soa_context * bld)1910 static LLVMValueRef get_local_invocation_index(struct lp_build_nir_soa_context *bld)
1911 {
1912    struct lp_build_nir_context *bld_base = &bld->bld_base;
1913    LLVMValueRef tmp, tmp2;
1914 
1915    tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[1]);
1916    tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[0]);
1917    tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1918    tmp = lp_build_mul(&bld_base->uint_bld, tmp, bld->system_values.thread_id[2]);
1919 
1920    tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, bld->system_values.thread_id[1]);
1921    tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1922    tmp = lp_build_add(&bld_base->uint_bld, tmp, bld->system_values.thread_id[0]);
1923    return tmp;
1924 }
1925 
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1926 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1927                                nir_intrinsic_instr *instr,
1928                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1929 {
1930    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1931    struct gallivm_state *gallivm = bld_base->base.gallivm;
1932    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->def.bit_size);
1933    switch (instr->intrinsic) {
1934    case nir_intrinsic_load_instance_id:
1935       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1936       break;
1937    case nir_intrinsic_load_base_instance:
1938       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1939       break;
1940    case nir_intrinsic_load_base_vertex:
1941       result[0] = bld->system_values.basevertex;
1942       break;
1943    case nir_intrinsic_load_first_vertex:
1944       result[0] = bld->system_values.firstvertex;
1945       break;
1946    case nir_intrinsic_load_vertex_id:
1947       result[0] = bld->system_values.vertex_id;
1948       break;
1949    case nir_intrinsic_load_primitive_id:
1950       result[0] = bld->system_values.prim_id;
1951       break;
1952    case nir_intrinsic_load_workgroup_id: {
1953       LLVMValueRef tmp[3];
1954       for (unsigned i = 0; i < 3; i++) {
1955          tmp[i] = bld->system_values.block_id[i];
1956          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1957       }
1958       break;
1959    }
1960    case nir_intrinsic_load_local_invocation_id:
1961       for (unsigned i = 0; i < 3; i++)
1962          result[i] = bld->system_values.thread_id[i];
1963       break;
1964    case nir_intrinsic_load_local_invocation_index:
1965       result[0] = get_local_invocation_index(bld);
1966       break;
1967    case nir_intrinsic_load_num_workgroups: {
1968       LLVMValueRef tmp[3];
1969       for (unsigned i = 0; i < 3; i++) {
1970          tmp[i] = bld->system_values.grid_size[i];
1971          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1972       }
1973       break;
1974    }
1975    case nir_intrinsic_load_invocation_id:
1976       if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1977          result[0] = bld->system_values.invocation_id;
1978       else
1979          result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1980       break;
1981    case nir_intrinsic_load_front_face:
1982       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1983       break;
1984    case nir_intrinsic_load_draw_id:
1985       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1986       break;
1987    default:
1988       break;
1989    case nir_intrinsic_load_workgroup_size:
1990      for (unsigned i = 0; i < 3; i++)
1991        result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[i]);
1992      break;
1993    case nir_intrinsic_load_work_dim:
1994       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1995       break;
1996    case nir_intrinsic_load_tess_coord:
1997       for (unsigned i = 0; i < 3; i++) {
1998 	 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1999       }
2000       break;
2001    case nir_intrinsic_load_tess_level_outer:
2002       for (unsigned i = 0; i < 4; i++)
2003          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
2004       break;
2005    case nir_intrinsic_load_tess_level_inner:
2006       for (unsigned i = 0; i < 2; i++)
2007          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
2008       break;
2009    case nir_intrinsic_load_patch_vertices_in:
2010       result[0] = bld->system_values.vertices_in;
2011       break;
2012    case nir_intrinsic_load_sample_id:
2013       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
2014       break;
2015    case nir_intrinsic_load_sample_pos:
2016       for (unsigned i = 0; i < 2; i++) {
2017          LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
2018          idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
2019          LLVMValueRef val = lp_build_array_get2(gallivm, bld->system_values.sample_pos_type,
2020                                                 bld->system_values.sample_pos, idx);
2021          result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
2022       }
2023       break;
2024    case nir_intrinsic_load_sample_mask_in:
2025       result[0] = bld->system_values.sample_mask_in;
2026       break;
2027    case nir_intrinsic_load_view_index:
2028       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
2029       break;
2030    case nir_intrinsic_load_subgroup_invocation: {
2031       LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2032       for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
2033          elems[i] = lp_build_const_int32(gallivm, i);
2034       result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
2035       break;
2036    }
2037    case nir_intrinsic_load_subgroup_id:
2038       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
2039       break;
2040    case nir_intrinsic_load_num_subgroups:
2041       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
2042       break;
2043    }
2044 }
2045 
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)2046 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
2047                                    LLVMValueRef *dst)
2048 {
2049    struct gallivm_state *gallivm = bld_base->base.gallivm;
2050    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2051    *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
2052 }
2053 
bgnloop(struct lp_build_nir_context * bld_base)2054 static void bgnloop(struct lp_build_nir_context *bld_base)
2055 {
2056    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2057    lp_exec_bgnloop(&bld->exec_mask, true);
2058 }
2059 
endloop(struct lp_build_nir_context * bld_base)2060 static void endloop(struct lp_build_nir_context *bld_base)
2061 {
2062    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2063    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
2064 }
2065 
lp_build_skip_branch(struct lp_build_nir_context * bld_base,bool flatten)2066 static void lp_build_skip_branch(struct lp_build_nir_context *bld_base, bool flatten)
2067 {
2068    if (flatten)
2069       return;
2070 
2071    struct gallivm_state *gallivm = bld_base->base.gallivm;
2072    LLVMBuilderRef builder = gallivm->builder;
2073 
2074    LLVMValueRef exec_mask = mask_vec_with_helpers(bld_base);
2075 
2076    LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2077    bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, bld_base->uint_bld.type.length), "");
2078    bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
2079 
2080    LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
2081 
2082    assert(bld_base->if_stack_size < LP_MAX_TGSI_NESTING);
2083    lp_build_if(&bld_base->if_stack[bld_base->if_stack_size], gallivm, any_active);
2084    bld_base->if_stack_size++;
2085 }
2086 
lp_build_skip_branch_end(struct lp_build_nir_context * bld_base,bool flatten)2087 static void lp_build_skip_branch_end(struct lp_build_nir_context *bld_base, bool flatten)
2088 {
2089    if (flatten)
2090       return;
2091 
2092    assert(bld_base->if_stack_size);
2093    bld_base->if_stack_size--;
2094    lp_build_endif(&bld_base->if_stack[bld_base->if_stack_size]);
2095 }
2096 
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond,bool flatten)2097 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond, bool flatten)
2098 {
2099    struct gallivm_state *gallivm = bld_base->base.gallivm;
2100 
2101    LLVMBuilderRef builder = gallivm->builder;
2102    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2103    lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
2104 
2105    lp_build_skip_branch(bld_base, flatten);
2106 }
2107 
else_stmt(struct lp_build_nir_context * bld_base,bool flatten_then,bool flatten_else)2108 static void else_stmt(struct lp_build_nir_context *bld_base, bool flatten_then, bool flatten_else)
2109 {
2110    lp_build_skip_branch_end(bld_base, flatten_then);
2111 
2112    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2113    lp_exec_mask_cond_invert(&bld->exec_mask);
2114 
2115    lp_build_skip_branch(bld_base, flatten_else);
2116 }
2117 
endif_stmt(struct lp_build_nir_context * bld_base,bool flatten)2118 static void endif_stmt(struct lp_build_nir_context *bld_base, bool flatten)
2119 {
2120    lp_build_skip_branch_end(bld_base, flatten);
2121 
2122    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2123    lp_exec_mask_cond_pop(&bld->exec_mask);
2124 }
2125 
break_stmt(struct lp_build_nir_context * bld_base)2126 static void break_stmt(struct lp_build_nir_context *bld_base)
2127 {
2128    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2129 
2130    lp_exec_break(&bld->exec_mask, NULL, false);
2131 }
2132 
continue_stmt(struct lp_build_nir_context * bld_base)2133 static void continue_stmt(struct lp_build_nir_context *bld_base)
2134 {
2135    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2136    lp_exec_continue(&bld->exec_mask);
2137 }
2138 
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2139 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2140 {
2141    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2142    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2143    LLVMValueRef mask;
2144 
2145    if (!cond) {
2146       if (bld->exec_mask.has_mask) {
2147          mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2148       } else {
2149          mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
2150       }
2151    } else {
2152       mask = LLVMBuildNot(builder, cond, "");
2153       if (bld->exec_mask.has_mask) {
2154          LLVMValueRef invmask;
2155          invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2156          mask = LLVMBuildOr(builder, mask, invmask, "");
2157       }
2158    }
2159    lp_build_mask_update(bld->mask, mask);
2160 }
2161 
2162 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2163 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
2164                           LLVMValueRef ptr,
2165                           LLVMValueRef mask)
2166 {
2167    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2168    LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
2169 
2170    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2171 
2172    LLVMBuildStore(builder, current_vec, ptr);
2173 }
2174 
2175 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2176 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
2177                              LLVMValueRef ptr,
2178                              LLVMValueRef mask)
2179 {
2180    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2181    LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
2182 
2183    current_vec = lp_build_select(&bld_base->uint_bld,
2184                                  mask,
2185                                  bld_base->uint_bld.zero,
2186                                  current_vec);
2187 
2188    LLVMBuildStore(builder, current_vec, ptr);
2189 }
2190 
2191 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)2192 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
2193                                   LLVMValueRef current_mask_vec,
2194                                   LLVMValueRef total_emitted_vertices_vec)
2195 {
2196    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2197    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2198    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2199                                             total_emitted_vertices_vec,
2200                                             bld->max_output_vertices_vec);
2201 
2202    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2203 }
2204 
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)2205 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2206 {
2207    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2208    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2209 
2210    if (stream_id >= bld->gs_vertex_streams)
2211       return;
2212    assert(bld->gs_iface->emit_vertex);
2213    LLVMValueRef total_emitted_vertices_vec =
2214       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2215    LLVMValueRef mask = mask_vec(bld_base);
2216    mask = clamp_mask_to_max_output_vertices(bld, mask,
2217                                             total_emitted_vertices_vec);
2218    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
2219                               bld->outputs,
2220                               total_emitted_vertices_vec,
2221                               mask,
2222                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
2223 
2224    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2225                              mask);
2226    increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2227                              mask);
2228 }
2229 
2230 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2231 end_primitive_masked(struct lp_build_nir_context * bld_base,
2232                      LLVMValueRef mask, uint32_t stream_id)
2233 {
2234    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2235    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2236 
2237    if (stream_id >= bld->gs_vertex_streams)
2238       return;
2239    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2240    LLVMValueRef emitted_vertices_vec =
2241       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2242    LLVMValueRef emitted_prims_vec =
2243       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2244    LLVMValueRef total_emitted_vertices_vec =
2245       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2246 
2247    LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2248                                             PIPE_FUNC_NOTEQUAL,
2249                                             emitted_vertices_vec,
2250                                             uint_bld->zero);
2251    mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2252    bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2253 				total_emitted_vertices_vec,
2254 				emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2255    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2256                              mask);
2257    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2258                                 mask);
2259 }
2260 
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2261 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2262 {
2263    ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2264 
2265    assert(bld->gs_iface->end_primitive);
2266 
2267    LLVMValueRef mask = mask_vec(bld_base);
2268    end_primitive_masked(bld_base, mask, stream_id);
2269 }
2270 
2271 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2272 emit_prologue(struct lp_build_nir_soa_context *bld)
2273 {
2274    struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2275    if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2276       uint32_t num_inputs = bld->num_inputs;
2277       /* If this is an indirect case, the number of inputs should not be 0 */
2278       assert(num_inputs > 0);
2279 
2280       unsigned index, chan;
2281       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2282       LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2283       bld->inputs_array = lp_build_array_alloca(gallivm,
2284                                                vec_type, array_size,
2285                                                "input_array");
2286 
2287       for (index = 0; index < num_inputs; ++index) {
2288          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2289             LLVMValueRef lindex =
2290                lp_build_const_int32(gallivm, index * 4 + chan);
2291             LLVMValueRef input_ptr =
2292                LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2293             LLVMValueRef value = bld->inputs[index][chan];
2294             if (value)
2295                LLVMBuildStore(gallivm->builder, value, input_ptr);
2296          }
2297       }
2298    }
2299 }
2300 
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2301 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2302                       nir_intrinsic_instr *instr, LLVMValueRef result[4])
2303 {
2304    struct gallivm_state * gallivm = bld_base->base.gallivm;
2305    LLVMBuilderRef builder = gallivm->builder;
2306    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2307    LLVMValueRef exec_mask = mask_vec(bld_base);
2308    struct lp_build_loop_state loop_state;
2309    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2310 
2311    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2312    LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2313    LLVMValueRef init_val = NULL;
2314    if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2315        instr->intrinsic == nir_intrinsic_vote_feq) {
2316       /* for equal we unfortunately have to loop and find the first valid one. */
2317       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2318       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2319 
2320       struct lp_build_if_state ifthen;
2321       lp_build_if(&ifthen, gallivm, if_cond);
2322       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2323                                                        loop_state.counter, "");
2324       LLVMBuildStore(builder, value_ptr, eq_store);
2325       LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2326       lp_build_endif(&ifthen);
2327       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2328                              NULL, LLVMIntUGE);
2329       init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2330    } else {
2331       LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2332    }
2333 
2334    LLVMValueRef res;
2335    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2336    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2337                                                        loop_state.counter, "");
2338    struct lp_build_if_state ifthen;
2339    LLVMValueRef if_cond;
2340    if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2341 
2342    lp_build_if(&ifthen, gallivm, if_cond);
2343    res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2344 
2345    if (instr->intrinsic == nir_intrinsic_vote_feq) {
2346       struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2347       LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2348                                        LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2349                                        LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2350       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2351       res = LLVMBuildAnd(builder, res, tmp, "");
2352    } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2353       LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2354       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2355       res = LLVMBuildAnd(builder, res, tmp, "");
2356    } else if (instr->intrinsic == nir_intrinsic_vote_any)
2357       res = LLVMBuildOr(builder, res, value_ptr, "");
2358    else
2359       res = LLVMBuildAnd(builder, res, value_ptr, "");
2360    LLVMBuildStore(builder, res, res_store);
2361    lp_build_endif(&ifthen);
2362    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2363                           NULL, LLVMIntUGE);
2364    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2365                                          LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2366 }
2367 
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2368 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2369 {
2370    struct gallivm_state * gallivm = bld_base->base.gallivm;
2371    LLVMBuilderRef builder = gallivm->builder;
2372    LLVMValueRef exec_mask = mask_vec(bld_base);
2373    struct lp_build_loop_state loop_state;
2374    src = LLVMBuildAnd(builder, src, exec_mask, "");
2375    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2376    LLVMValueRef res;
2377    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2378    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2379                                                     loop_state.counter, "");
2380    res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2381    res = LLVMBuildOr(builder,
2382                      res,
2383                      LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2384    LLVMBuildStore(builder, res, res_store);
2385 
2386    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2387                           NULL, LLVMIntUGE);
2388    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2389                                          LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2390 }
2391 
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2392 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2393 {
2394    struct gallivm_state *gallivm = bld_base->base.gallivm;
2395    LLVMBuilderRef builder = gallivm->builder;
2396    LLVMValueRef exec_mask = mask_vec(bld_base);
2397    struct lp_build_loop_state loop_state;
2398 
2399    LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2400    LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2401    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2402    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2403                                                     loop_state.counter, "");
2404    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2405                                      LLVMIntEQ,
2406                                      value_ptr,
2407                                      lp_build_const_int32(gallivm, -1), "");
2408    LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2409                                       LLVMIntEQ,
2410                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2411                                       lp_build_const_int32(gallivm, 0), "");
2412 
2413    cond = LLVMBuildAnd(builder, cond, cond2, "");
2414    struct lp_build_if_state ifthen;
2415    lp_build_if(&ifthen, gallivm, cond);
2416    LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2417    LLVMBuildStore(builder, loop_state.counter, idx_store);
2418    lp_build_endif(&ifthen);
2419    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2420                           NULL, LLVMIntUGE);
2421 
2422    result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2423                                       lp_build_const_int32(gallivm, -1),
2424                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2425                                       "");
2426 }
2427 
2428 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2429 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2430                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2431 {
2432    assert(instr->intrinsic == nir_intrinsic_shuffle);
2433 
2434    struct gallivm_state *gallivm = bld_base->base.gallivm;
2435    LLVMBuilderRef builder = gallivm->builder;
2436    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2437    uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2438    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2439 
2440    if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2441       /* freeze `src` in case inactive invocations contain poison */
2442       src = LLVMBuildFreeze(builder, src, "");
2443       result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2444    } else {
2445       LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2446       struct lp_build_loop_state loop_state;
2447       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2448 
2449       LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2450 
2451       LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2452       /* freeze `src_value` in case an out-of-bounds index or an index into an
2453        * inactive invocation results in poison
2454        */
2455       src_value = LLVMBuildFreeze(builder, src_value, "");
2456 
2457       LLVMValueRef res = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2458       res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2459       LLVMBuildStore(builder, res, res_store);
2460 
2461       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2462                              NULL, LLVMIntUGE);
2463 
2464       result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2465    }
2466 }
2467 #endif
2468 
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2469 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2470                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2471 {
2472    struct gallivm_state *gallivm = bld_base->base.gallivm;
2473    LLVMBuilderRef builder = gallivm->builder;
2474    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2475    /* can't use llvm reduction intrinsics because of exec_mask */
2476    LLVMValueRef exec_mask = mask_vec(bld_base);
2477    nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2478 
2479    uint32_t cluster_size = 0;
2480 
2481    if (instr->intrinsic == nir_intrinsic_reduce)
2482       cluster_size = nir_intrinsic_cluster_size(instr);
2483 
2484    if (cluster_size == 0)
2485       cluster_size = bld_base->int_bld.type.length;
2486 
2487    LLVMValueRef res_store = NULL;
2488    LLVMValueRef scan_store;
2489    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2490 
2491    res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2492    scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2493 
2494    struct lp_build_context elem_bld;
2495    bool is_flt = reduction_op == nir_op_fadd ||
2496       reduction_op == nir_op_fmul ||
2497       reduction_op == nir_op_fmin ||
2498       reduction_op == nir_op_fmax;
2499    bool is_unsigned = reduction_op == nir_op_umin ||
2500       reduction_op == nir_op_umax;
2501 
2502    struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2503       get_int_bld(bld_base, is_unsigned, bit_size);
2504 
2505    lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2506 
2507    LLVMValueRef store_val = NULL;
2508    /*
2509     * Put the identity value for the operation into the storage
2510     */
2511    switch (reduction_op) {
2512    case nir_op_fmin: {
2513       LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2514          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2515       store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2516       break;
2517    }
2518    case nir_op_fmax: {
2519       LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2520          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2521       store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2522       break;
2523    }
2524    case nir_op_fmul: {
2525       LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2526          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2527       store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2528       break;
2529    }
2530    case nir_op_umin:
2531       switch (bit_size) {
2532       case 8:
2533          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2534          break;
2535       case 16:
2536          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2537          break;
2538       case 32:
2539       default:
2540          store_val  = lp_build_const_int32(gallivm, UINT_MAX);
2541          break;
2542       case 64:
2543          store_val  = lp_build_const_int64(gallivm, UINT64_MAX);
2544          break;
2545       }
2546       break;
2547    case nir_op_imin:
2548       switch (bit_size) {
2549       case 8:
2550          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2551          break;
2552       case 16:
2553          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2554          break;
2555       case 32:
2556       default:
2557          store_val  = lp_build_const_int32(gallivm, INT_MAX);
2558          break;
2559       case 64:
2560          store_val  = lp_build_const_int64(gallivm, INT64_MAX);
2561          break;
2562       }
2563       break;
2564    case nir_op_imax:
2565       switch (bit_size) {
2566       case 8:
2567          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2568          break;
2569       case 16:
2570          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2571          break;
2572       case 32:
2573       default:
2574          store_val  = lp_build_const_int32(gallivm, INT_MIN);
2575          break;
2576       case 64:
2577          store_val  = lp_build_const_int64(gallivm, INT64_MIN);
2578          break;
2579       }
2580       break;
2581    case nir_op_imul:
2582       switch (bit_size) {
2583       case 8:
2584          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2585          break;
2586       case 16:
2587          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2588          break;
2589       case 32:
2590       default:
2591          store_val  = lp_build_const_int32(gallivm, 1);
2592          break;
2593       case 64:
2594          store_val  = lp_build_const_int64(gallivm, 1);
2595          break;
2596       }
2597       break;
2598    case nir_op_iand:
2599       switch (bit_size) {
2600       case 8:
2601          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2602          break;
2603       case 16:
2604          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2605          break;
2606       case 32:
2607       default:
2608          store_val  = lp_build_const_int32(gallivm, 0xffffffff);
2609          break;
2610       case 64:
2611          store_val  = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2612          break;
2613       }
2614       break;
2615    default:
2616       break;
2617    }
2618    if (store_val)
2619       LLVMBuildStore(builder, store_val, scan_store);
2620 
2621    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2622 
2623    for (uint32_t i = 0; i < bld_base->uint_bld.type.length; i++) {
2624       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
2625 
2626       struct lp_build_if_state ifthen;
2627       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, counter, "");
2628       lp_build_if(&ifthen, gallivm, if_cond);
2629 
2630       LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, counter, "");
2631 
2632       LLVMValueRef res = NULL;
2633       LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2634 
2635       if (instr->intrinsic != nir_intrinsic_reduce)
2636          res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2637 
2638       if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2639          res = LLVMBuildInsertElement(builder, res, scan_val, counter, "");
2640 
2641       if (is_flt) {
2642          scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2643          value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2644       }
2645 
2646       switch (reduction_op) {
2647       case nir_op_fadd:
2648       case nir_op_iadd:
2649          scan_val = lp_build_add(&elem_bld, value, scan_val);
2650          break;
2651       case nir_op_fmul:
2652       case nir_op_imul:
2653          scan_val = lp_build_mul(&elem_bld, value, scan_val);
2654          break;
2655       case nir_op_imin:
2656       case nir_op_umin:
2657       case nir_op_fmin:
2658          scan_val = lp_build_min(&elem_bld, value, scan_val);
2659          break;
2660       case nir_op_imax:
2661       case nir_op_umax:
2662       case nir_op_fmax:
2663          scan_val = lp_build_max(&elem_bld, value, scan_val);
2664          break;
2665       case nir_op_iand:
2666          scan_val = lp_build_and(&elem_bld, value, scan_val);
2667          break;
2668       case nir_op_ior:
2669          scan_val = lp_build_or(&elem_bld, value, scan_val);
2670          break;
2671       case nir_op_ixor:
2672          scan_val = lp_build_xor(&elem_bld, value, scan_val);
2673          break;
2674       default:
2675          assert(0);
2676          break;
2677       }
2678 
2679       if (is_flt)
2680          scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2681       LLVMBuildStore(builder, scan_val, scan_store);
2682 
2683       if (instr->intrinsic == nir_intrinsic_inclusive_scan)
2684          res = LLVMBuildInsertElement(builder, res, scan_val, counter, "");
2685 
2686       if (instr->intrinsic != nir_intrinsic_reduce)
2687          LLVMBuildStore(builder, res, res_store);
2688 
2689       lp_build_endif(&ifthen);
2690 
2691       if (instr->intrinsic == nir_intrinsic_reduce && (i % cluster_size) == (cluster_size - 1)) {
2692          res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2693          scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2694 
2695          if (store_val)
2696             LLVMBuildStore(builder, store_val, scan_store);
2697          else
2698             LLVMBuildStore(builder, LLVMConstNull(int_bld->elem_type), scan_store);
2699 
2700          LLVMValueRef cluster_index = lp_build_const_int32(gallivm, i / cluster_size);
2701          res = LLVMBuildInsertElement(builder, res, scan_val, cluster_index, "");
2702 
2703          LLVMBuildStore(builder, res, res_store);
2704       }
2705    }
2706 
2707    LLVMValueRef res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2708 
2709    if (instr->intrinsic == nir_intrinsic_reduce) {
2710       LLVMValueRef swizzle[LP_MAX_VECTOR_LENGTH];
2711       for (uint32_t i = 0; i < bld_base->int_bld.type.length; i++)
2712          swizzle[i] = lp_build_const_int32(gallivm, i / cluster_size);
2713 
2714       LLVMValueRef undef = LLVMGetUndef(int_bld->vec_type);
2715       result[0] = LLVMBuildShuffleVector(
2716          builder, res, undef, LLVMConstVector(swizzle, bld_base->int_bld.type.length), "");
2717    } else {
2718       result[0] = res;
2719    }
2720 
2721 }
2722 
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2723 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2724                                  LLVMValueRef src,
2725                                  unsigned bit_size,
2726                                  LLVMValueRef invoc,
2727                                  LLVMValueRef result[4])
2728 {
2729    struct gallivm_state *gallivm = bld_base->base.gallivm;
2730    LLVMValueRef idx = first_active_invocation(bld_base);
2731    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2732 
2733    /* If we're emitting readInvocation() (as opposed to readFirstInvocation),
2734     * use the first active channel to pull the invocation index number out of
2735     * the invocation arg.
2736     */
2737    if (invoc)
2738       idx = LLVMBuildExtractElement(gallivm->builder, invoc, idx, "");
2739 
2740    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2741                                                 src, idx, "");
2742    result[0] = lp_build_broadcast_scalar(uint_bld, value);
2743 }
2744 
2745 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2746 emit_interp_at(struct lp_build_nir_context *bld_base,
2747                unsigned num_components,
2748                nir_variable *var,
2749                bool centroid,
2750                bool sample,
2751                unsigned const_index,
2752                LLVMValueRef indir_index,
2753                LLVMValueRef offsets[2],
2754                LLVMValueRef dst[4])
2755 {
2756    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2757 
2758    for (unsigned i = 0; i < num_components; i++) {
2759       dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2760                                         const_index + var->data.driver_location, i + var->data.location_frac,
2761                                         centroid, sample, indir_index, offsets);
2762    }
2763 }
2764 
2765 static void
emit_set_vertex_and_primitive_count(struct lp_build_nir_context * bld_base,LLVMValueRef vert_count,LLVMValueRef prim_count)2766 emit_set_vertex_and_primitive_count(struct lp_build_nir_context *bld_base,
2767                                     LLVMValueRef vert_count,
2768                                     LLVMValueRef prim_count)
2769 {
2770    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2771    struct gallivm_state *gallivm = bld_base->base.gallivm;
2772    assert(bld->mesh_iface);
2773    LLVMValueRef idx = first_active_invocation(bld_base);
2774 
2775    LLVMValueRef vcount = LLVMBuildExtractElement(gallivm->builder,
2776                                                  vert_count, idx, "");
2777    LLVMValueRef pcount = LLVMBuildExtractElement(gallivm->builder,
2778                                                  prim_count, idx, "");
2779 
2780    bld->mesh_iface->emit_vertex_and_primitive_count(bld->mesh_iface, &bld_base->base, vcount, pcount);
2781 }
2782 
2783 static void
emit_launch_mesh_workgroups(struct lp_build_nir_context * bld_base,LLVMValueRef launch_grid)2784 emit_launch_mesh_workgroups(struct lp_build_nir_context *bld_base,
2785                             LLVMValueRef launch_grid)
2786 {
2787    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2788    struct gallivm_state *gallivm = bld_base->base.gallivm;
2789    LLVMTypeRef vec_type = LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), 3);
2790 
2791    LLVMValueRef local_invoc_idx = get_local_invocation_index(bld);
2792 
2793    vec_type = LLVMPointerType(vec_type, 0);
2794 
2795    local_invoc_idx = LLVMBuildExtractElement(gallivm->builder, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2796    LLVMValueRef if_cond = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2797    struct lp_build_if_state ifthen;
2798    lp_build_if(&ifthen, gallivm, if_cond);
2799    LLVMValueRef ptr = bld->payload_ptr;
2800    ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
2801    for (unsigned i = 0; i < 3; i++) {
2802       LLVMValueRef lg = LLVMBuildExtractValue(gallivm->builder, launch_grid, i, "");
2803       lg = LLVMBuildExtractElement(gallivm->builder, lg, lp_build_const_int32(gallivm, 0), "");
2804       LLVMValueRef this_ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
2805       LLVMBuildStore(gallivm->builder, lg, this_ptr);
2806       ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 4), "");
2807    }
2808    lp_build_endif(&ifthen);
2809 }
2810 
2811 static void
emit_call(struct lp_build_nir_context * bld_base,struct lp_build_fn * fn,int num_args,LLVMValueRef * args)2812 emit_call(struct lp_build_nir_context *bld_base,
2813           struct lp_build_fn *fn,
2814           int num_args,
2815           LLVMValueRef *args)
2816 {
2817    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2818 
2819    args[0] = mask_vec(bld_base);
2820    args[1] = bld->call_context_ptr;
2821    LLVMBuildCall2(bld_base->base.gallivm->builder, fn->fn_type, fn->fn, args, num_args, "");
2822 }
2823 
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2824 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2825                                                struct lp_type type,
2826                                                unsigned scratch_size)
2827 {
2828    LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2829    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2830    unsigned i;
2831 
2832    if (type.length == 1)
2833       return LLVMConstInt(elem_type, 0, 0);
2834 
2835    for (i = 0; i < type.length; ++i)
2836       elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2837 
2838    return LLVMConstVector(elems, type.length);
2839 }
2840 
2841 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2842 emit_load_scratch(struct lp_build_nir_context *bld_base,
2843                   unsigned nc, unsigned bit_size,
2844                   LLVMValueRef offset,
2845                   LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2846 {
2847    struct gallivm_state * gallivm = bld_base->base.gallivm;
2848    LLVMBuilderRef builder = gallivm->builder;
2849    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2850    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2851    struct lp_build_context *load_bld;
2852    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2853    LLVMValueRef exec_mask = mask_vec(bld_base);
2854    LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2855                                                      LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2856                                                      bld->scratch_ptr);
2857    load_bld = get_int_bld(bld_base, true, bit_size);
2858 
2859    offset = lp_build_add(uint_bld, offset, thread_offsets);
2860 
2861    for (unsigned c = 0; c < nc; c++) {
2862       LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2863 
2864       outval[c] = lp_build_masked_gather(gallivm, load_bld->type.length, bit_size,
2865                                          load_bld->vec_type,
2866                                          lp_vec_add_offset_ptr(bld_base, bit_size,
2867                                                                scratch_ptr_vec,
2868                                                                chan_offset),
2869                                          exec_mask);
2870       outval[c] = LLVMBuildBitCast(builder, outval[c], load_bld->vec_type, "");
2871    }
2872 }
2873 
2874 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2875 emit_store_scratch(struct lp_build_nir_context *bld_base,
2876                    unsigned writemask, unsigned nc,
2877                    unsigned bit_size, LLVMValueRef offset,
2878                    LLVMValueRef dst)
2879 {
2880    struct gallivm_state * gallivm = bld_base->base.gallivm;
2881    LLVMBuilderRef builder = gallivm->builder;
2882    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2883    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2884    struct lp_build_context *store_bld;
2885    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2886    LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2887                                                      LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2888                                                      bld->scratch_ptr);
2889    store_bld = get_int_bld(bld_base, true, bit_size);
2890 
2891    LLVMValueRef exec_mask = mask_vec(bld_base);
2892    offset = lp_build_add(uint_bld, offset, thread_offsets);
2893 
2894    for (unsigned c = 0; c < nc; c++) {
2895       if (!(writemask & (1u << c)))
2896          continue;
2897       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2898 
2899       LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2900 
2901       val = LLVMBuildBitCast(builder, val, store_bld->vec_type, "");
2902 
2903       lp_build_masked_scatter(gallivm, store_bld->type.length, bit_size,
2904                               lp_vec_add_offset_ptr(bld_base, bit_size,
2905                                                     scratch_ptr_vec, chan_offset),
2906                               val, exec_mask);
2907    }
2908 }
2909 
2910 static void
emit_clock(struct lp_build_nir_context * bld_base,LLVMValueRef dst[4])2911 emit_clock(struct lp_build_nir_context *bld_base,
2912            LLVMValueRef dst[4])
2913 {
2914    struct gallivm_state *gallivm = bld_base->base.gallivm;
2915    LLVMBuilderRef builder = gallivm->builder;
2916    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, 32);
2917 
2918    lp_init_clock_hook(gallivm);
2919 
2920    LLVMTypeRef get_time_type = LLVMFunctionType(LLVMInt64TypeInContext(gallivm->context), NULL, 0, 1);
2921    LLVMValueRef result = LLVMBuildCall2(builder, get_time_type, gallivm->get_time_hook, NULL, 0, "");
2922 
2923    LLVMValueRef hi = LLVMBuildShl(builder, result, lp_build_const_int64(gallivm, 32), "");
2924    hi = LLVMBuildTrunc(builder, hi, uint_bld->elem_type, "");
2925    LLVMValueRef lo = LLVMBuildTrunc(builder, result, uint_bld->elem_type, "");
2926    dst[0] = lp_build_broadcast_scalar(uint_bld, lo);
2927    dst[1] = lp_build_broadcast_scalar(uint_bld, hi);
2928 }
2929 
2930 LLVMTypeRef
lp_build_cs_func_call_context(struct gallivm_state * gallivm,int length,LLVMTypeRef context_type,LLVMTypeRef resources_type)2931 lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length,
2932                               LLVMTypeRef context_type, LLVMTypeRef resources_type)
2933 {
2934    LLVMTypeRef args[LP_NIR_CALL_CONTEXT_MAX_ARGS];
2935 
2936    args[LP_NIR_CALL_CONTEXT_CONTEXT] = LLVMPointerType(context_type, 0);
2937    args[LP_NIR_CALL_CONTEXT_RESOURCES] = LLVMPointerType(resources_type, 0);
2938    args[LP_NIR_CALL_CONTEXT_SHARED] = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); /* shared_ptr */
2939    args[LP_NIR_CALL_CONTEXT_SCRATCH] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* scratch ptr */
2940    args[LP_NIR_CALL_CONTEXT_WORK_DIM] = LLVMInt32TypeInContext(gallivm->context); /* work_dim */
2941    args[LP_NIR_CALL_CONTEXT_THREAD_ID_0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[0] */
2942    args[LP_NIR_CALL_CONTEXT_THREAD_ID_1] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[1] */
2943    args[LP_NIR_CALL_CONTEXT_THREAD_ID_2] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[2] */
2944    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[0] */
2945    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[1] */
2946    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[2] */
2947 
2948    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[0] */
2949    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[1] */
2950    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[2] */
2951    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[0] */
2952    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[1] */
2953    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[2] */
2954 
2955    LLVMTypeRef stype = LLVMStructTypeInContext(gallivm->context, args, LP_NIR_CALL_CONTEXT_MAX_ARGS, 0);
2956    return stype;
2957 }
2958 
2959 static void
build_call_context(struct lp_build_nir_soa_context * bld)2960 build_call_context(struct lp_build_nir_soa_context *bld)
2961 {
2962    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2963    bld->call_context_ptr = lp_build_alloca(gallivm, bld->call_context_type, "callcontext");
2964    LLVMValueRef call_context = LLVMGetUndef(bld->call_context_type);
2965    call_context = LLVMBuildInsertValue(gallivm->builder,
2966                                        call_context, bld->context_ptr, LP_NIR_CALL_CONTEXT_CONTEXT, "");
2967    call_context = LLVMBuildInsertValue(gallivm->builder,
2968                                        call_context, bld->resources_ptr, LP_NIR_CALL_CONTEXT_RESOURCES, "");
2969    if (bld->shared_ptr) {
2970       call_context = LLVMBuildInsertValue(gallivm->builder,
2971                                           call_context, bld->shared_ptr, LP_NIR_CALL_CONTEXT_SHARED, "");
2972    } else {
2973       call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2974                                           LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2975                                           LP_NIR_CALL_CONTEXT_SHARED, "");
2976    }
2977    if (bld->scratch_ptr) {
2978       call_context = LLVMBuildInsertValue(gallivm->builder,
2979                                           call_context, bld->scratch_ptr, LP_NIR_CALL_CONTEXT_SCRATCH, "");
2980    } else {
2981       call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2982                                           LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2983                                           LP_NIR_CALL_CONTEXT_SCRATCH, "");
2984    }
2985    call_context = LLVMBuildInsertValue(gallivm->builder,
2986                                        call_context, bld->system_values.work_dim, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
2987    call_context = LLVMBuildInsertValue(gallivm->builder,
2988                                        call_context, bld->system_values.thread_id[0], LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
2989    call_context = LLVMBuildInsertValue(gallivm->builder,
2990                                        call_context, bld->system_values.thread_id[1], LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
2991    call_context = LLVMBuildInsertValue(gallivm->builder,
2992                                        call_context, bld->system_values.thread_id[2], LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
2993    call_context = LLVMBuildInsertValue(gallivm->builder,
2994                                        call_context, bld->system_values.block_id[0], LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
2995    call_context = LLVMBuildInsertValue(gallivm->builder,
2996                                        call_context, bld->system_values.block_id[1], LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
2997    call_context = LLVMBuildInsertValue(gallivm->builder,
2998                                        call_context, bld->system_values.block_id[2], LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
2999    call_context = LLVMBuildInsertValue(gallivm->builder,
3000                                        call_context, bld->system_values.grid_size[0], LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
3001    call_context = LLVMBuildInsertValue(gallivm->builder,
3002                                        call_context, bld->system_values.grid_size[1], LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
3003    call_context = LLVMBuildInsertValue(gallivm->builder,
3004                                        call_context, bld->system_values.grid_size[2], LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
3005    call_context = LLVMBuildInsertValue(gallivm->builder,
3006                                        call_context, bld->system_values.block_size[0], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
3007    call_context = LLVMBuildInsertValue(gallivm->builder,
3008                                        call_context, bld->system_values.block_size[1], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
3009    call_context = LLVMBuildInsertValue(gallivm->builder,
3010                                        call_context, bld->system_values.block_size[2], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
3011    LLVMBuildStore(gallivm->builder, call_context, bld->call_context_ptr);
3012 }
3013 
lp_build_nir_soa_func(struct gallivm_state * gallivm,struct nir_shader * shader,nir_function_impl * impl,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3014 void lp_build_nir_soa_func(struct gallivm_state *gallivm,
3015                            struct nir_shader *shader,
3016                            nir_function_impl *impl,
3017                            const struct lp_build_tgsi_params *params,
3018                            LLVMValueRef (*outputs)[4])
3019 {
3020    struct lp_build_nir_soa_context bld;
3021    const struct lp_type type = params->type;
3022    struct lp_type res_type;
3023 
3024    assert(type.length <= LP_MAX_VECTOR_LENGTH);
3025    memset(&res_type, 0, sizeof res_type);
3026    res_type.width = type.width;
3027    res_type.length = type.length;
3028    res_type.sign = 1;
3029 
3030    /* Setup build context */
3031    memset(&bld, 0, sizeof bld);
3032    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3033    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3034    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3035    lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
3036    {
3037       struct lp_type float_type = type;
3038       float_type.signed_zero_preserve =
3039          !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32);
3040       float_type.nan_preserve =
3041          !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32);
3042       lp_build_context_init(&bld.bld_base.base, gallivm, float_type);
3043    }
3044    {
3045       struct lp_type dbl_type;
3046       dbl_type = type;
3047       dbl_type.width *= 2;
3048       dbl_type.signed_zero_preserve =
3049          !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
3050       dbl_type.nan_preserve =
3051          !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
3052       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3053    }
3054    {
3055       struct lp_type half_type;
3056       half_type = type;
3057       half_type.width /= 2;
3058       half_type.signed_zero_preserve =
3059          !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16);
3060       half_type.nan_preserve =
3061          !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16);
3062       lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
3063    }
3064    {
3065       struct lp_type uint64_type;
3066       uint64_type = lp_uint_type(type);
3067       uint64_type.width *= 2;
3068       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3069    }
3070    {
3071       struct lp_type int64_type;
3072       int64_type = lp_int_type(type);
3073       int64_type.width *= 2;
3074       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3075    }
3076    {
3077       struct lp_type uint16_type;
3078       uint16_type = lp_uint_type(type);
3079       uint16_type.width /= 2;
3080       lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
3081    }
3082    {
3083       struct lp_type int16_type;
3084       int16_type = lp_int_type(type);
3085       int16_type.width /= 2;
3086       lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
3087    }
3088    {
3089       struct lp_type uint8_type;
3090       uint8_type = lp_uint_type(type);
3091       uint8_type.width /= 4;
3092       lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
3093    }
3094    {
3095       struct lp_type int8_type;
3096       int8_type = lp_int_type(type);
3097       int8_type.width /= 4;
3098       lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
3099    }
3100    bld.bld_base.load_var = emit_load_var;
3101    bld.bld_base.store_var = emit_store_var;
3102    bld.bld_base.load_reg = emit_load_reg;
3103    bld.bld_base.store_reg = emit_store_reg;
3104    bld.bld_base.emit_var_decl = emit_var_decl;
3105    bld.bld_base.load_ubo = emit_load_ubo;
3106    bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
3107    bld.bld_base.load_global = emit_load_global;
3108    bld.bld_base.store_global = emit_store_global;
3109    bld.bld_base.atomic_global = emit_atomic_global;
3110    bld.bld_base.tex = emit_tex;
3111    bld.bld_base.tex_size = emit_tex_size;
3112    bld.bld_base.bgnloop = bgnloop;
3113    bld.bld_base.endloop = endloop;
3114    bld.bld_base.if_cond = if_cond;
3115    bld.bld_base.else_stmt = else_stmt;
3116    bld.bld_base.endif_stmt = endif_stmt;
3117    bld.bld_base.break_stmt = break_stmt;
3118    bld.bld_base.continue_stmt = continue_stmt;
3119    bld.bld_base.sysval_intrin = emit_sysval_intrin;
3120    bld.bld_base.discard = discard;
3121    bld.bld_base.emit_vertex = emit_vertex;
3122    bld.bld_base.end_primitive = end_primitive;
3123    bld.bld_base.load_mem = emit_load_mem;
3124    bld.bld_base.store_mem = emit_store_mem;
3125    bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
3126    bld.bld_base.atomic_mem = emit_atomic_mem;
3127    bld.bld_base.barrier = emit_barrier;
3128    bld.bld_base.image_op = emit_image_op;
3129    bld.bld_base.image_size = emit_image_size;
3130    bld.bld_base.vote = emit_vote;
3131    bld.bld_base.elect = emit_elect;
3132    bld.bld_base.reduce = emit_reduce;
3133    bld.bld_base.ballot = emit_ballot;
3134 #if LLVM_VERSION_MAJOR >= 10
3135    bld.bld_base.shuffle = emit_shuffle;
3136 #endif
3137    bld.bld_base.read_invocation = emit_read_invocation;
3138    bld.bld_base.helper_invocation = emit_helper_invocation;
3139    bld.bld_base.interp_at = emit_interp_at;
3140    bld.bld_base.call = emit_call;
3141    bld.bld_base.load_scratch = emit_load_scratch;
3142    bld.bld_base.store_scratch = emit_store_scratch;
3143    bld.bld_base.load_const = emit_load_const;
3144    bld.bld_base.clock = emit_clock;
3145    bld.bld_base.set_vertex_and_primitive_count = emit_set_vertex_and_primitive_count;
3146    bld.bld_base.launch_mesh_workgroups = emit_launch_mesh_workgroups;
3147 
3148    bld.bld_base.fns = params->fns;
3149    bld.bld_base.func = params->current_func;
3150    bld.mask = params->mask;
3151    bld.inputs = params->inputs;
3152    bld.outputs = outputs;
3153    bld.consts_ptr = params->consts_ptr;
3154    bld.ssbo_ptr = params->ssbo_ptr;
3155    bld.sampler = params->sampler;
3156 
3157    bld.context_type = params->context_type;
3158    bld.context_ptr = params->context_ptr;
3159    bld.resources_type = params->resources_type;
3160    bld.resources_ptr = params->resources_ptr;
3161    bld.thread_data_type = params->thread_data_type;
3162    bld.thread_data_ptr = params->thread_data_ptr;
3163    bld.image = params->image;
3164    bld.shared_ptr = params->shared_ptr;
3165    bld.payload_ptr = params->payload_ptr;
3166    bld.coro = params->coro;
3167    bld.kernel_args_ptr = params->kernel_args;
3168    bld.num_inputs = params->num_inputs;
3169    bld.indirects = 0;
3170    if (shader->info.inputs_read_indirectly)
3171       bld.indirects |= nir_var_shader_in;
3172 
3173    bld.gs_iface = params->gs_iface;
3174    bld.tcs_iface = params->tcs_iface;
3175    bld.tes_iface = params->tes_iface;
3176    bld.fs_iface = params->fs_iface;
3177    bld.mesh_iface = params->mesh_iface;
3178    if (bld.gs_iface) {
3179       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
3180 
3181       bld.gs_vertex_streams = params->gs_vertex_streams;
3182       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3183                                                            shader->info.gs.vertices_out);
3184       for (int i = 0; i < params->gs_vertex_streams; i++) {
3185          bld.emitted_prims_vec_ptr[i] =
3186             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
3187          bld.emitted_vertices_vec_ptr[i] =
3188             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
3189          bld.total_emitted_vertices_vec_ptr[i] =
3190             lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
3191       }
3192    }
3193    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3194 
3195    if (params->system_values)
3196       bld.system_values = *params->system_values;
3197 
3198    bld.bld_base.shader = shader;
3199 
3200    bld.scratch_size = ALIGN(shader->scratch_size, 8);
3201    if (params->scratch_ptr)
3202       bld.scratch_ptr = params->scratch_ptr;
3203    else if (shader->scratch_size) {
3204       bld.scratch_ptr = lp_build_array_alloca(gallivm,
3205                                               LLVMInt8TypeInContext(gallivm->context),
3206                                               lp_build_const_int32(gallivm, bld.scratch_size * type.length),
3207                                               "scratch");
3208    }
3209 
3210    if (!exec_list_is_singular(&shader->functions)) {
3211       bld.call_context_type = lp_build_cs_func_call_context(gallivm, type.length, bld.context_type, bld.resources_type);
3212       if (!params->call_context_ptr) {
3213          build_call_context(&bld);
3214       } else
3215          bld.call_context_ptr = params->call_context_ptr;
3216    }
3217 
3218    emit_prologue(&bld);
3219    lp_build_nir_llvm(&bld.bld_base, shader, impl);
3220 
3221    if (bld.gs_iface) {
3222       LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
3223       LLVMValueRef total_emitted_vertices_vec;
3224       LLVMValueRef emitted_prims_vec;
3225 
3226       for (int i = 0; i < params->gs_vertex_streams; i++) {
3227          end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
3228 
3229          total_emitted_vertices_vec =
3230             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
3231 
3232          emitted_prims_vec =
3233             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
3234          bld.gs_iface->gs_epilogue(bld.gs_iface,
3235                                    total_emitted_vertices_vec,
3236                                    emitted_prims_vec, i);
3237       }
3238    }
3239    lp_exec_mask_fini(&bld.exec_mask);
3240 }
3241 
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3242 void lp_build_nir_soa(struct gallivm_state *gallivm,
3243                       struct nir_shader *shader,
3244                       const struct lp_build_tgsi_params *params,
3245                       LLVMValueRef (*outputs)[4])
3246 {
3247    lp_build_nir_prepasses(shader);
3248    lp_build_nir_soa_func(gallivm, shader,
3249                          nir_shader_get_entrypoint(shader),
3250                          params, outputs);
3251 }
3252