• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "lp_bld_intr.h"
38 #include "util/u_cpu_detect.h"
39 #include "util/u_math.h"
40 
bit_size_to_shift_size(int bit_size)41 static int bit_size_to_shift_size(int bit_size)
42 {
43    switch (bit_size) {
44    case 64:
45       return 3;
46    default:
47    case 32:
48       return 2;
49    case 16:
50       return 1;
51    case 8:
52       return 0;
53    }
54 }
55 
56 /*
57  * combine the execution mask if there is one with the current mask.
58  */
59 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)60 mask_vec(struct lp_build_nir_context *bld_base)
61 {
62    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
63    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
64    struct lp_exec_mask *exec_mask = &bld->exec_mask;
65    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
66    if (!exec_mask->has_mask) {
67       return bld_mask;
68    }
69    if (!bld_mask)
70       return exec_mask->exec_mask;
71    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
72                        exec_mask->exec_mask, "");
73 }
74 
75 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)76 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
77 {
78    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
79 
80    /* Fragment shaders may dispatch with invocation 0 inactive.  All other
81     * stages have invocation 0 active at the top.  (See
82     * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
83     */
84    if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
85       return false;
86 
87    /* If we're in some control flow right now, then invocation 0 may be
88     * disabled.
89     */
90    if (bld->exec_mask.has_mask)
91       return false;
92 
93    return true;
94 }
95 
96 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size)97 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size)
98 {
99    if (bit_size == 64)
100       return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
101    else if (bit_size == 16)
102       return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
103    else if (bit_size == 8)
104       return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
105    else
106       return lp_build_const_int32(gallivm, 0);
107 }
108 
109 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)110 emit_fetch_64bit(
111    struct lp_build_nir_context * bld_base,
112    LLVMValueRef input,
113    LLVMValueRef input2)
114 {
115    struct gallivm_state *gallivm = bld_base->base.gallivm;
116    LLVMBuilderRef builder = gallivm->builder;
117    LLVMValueRef res;
118    int i;
119    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
120    int len = bld_base->base.type.length * 2;
121    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
122 
123    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
124 #if UTIL_ARCH_LITTLE_ENDIAN
125       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
126       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
127 #else
128       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
129       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
130 #endif
131    }
132    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
133 
134    return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
135 }
136 
137 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])138 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
139                        LLVMValueRef value,
140                        LLVMValueRef split_values[2])
141 {
142    struct gallivm_state *gallivm = bld_base->base.gallivm;
143    LLVMBuilderRef builder = gallivm->builder;
144    unsigned i;
145    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
146    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
147    int len = bld_base->base.type.length * 2;
148 
149    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
150    for (i = 0; i < bld_base->base.type.length; i++) {
151 #if UTIL_ARCH_LITTLE_ENDIAN
152       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
153       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
154 #else
155       shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
156       shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
157 #endif
158    }
159 
160    split_values[0] = LLVMBuildShuffleVector(builder, value,
161                                  LLVMGetUndef(LLVMTypeOf(value)),
162                                  LLVMConstVector(shuffles,
163                                                  bld_base->base.type.length),
164                                  "");
165    split_values[1] = LLVMBuildShuffleVector(builder, value,
166                                   LLVMGetUndef(LLVMTypeOf(value)),
167                                   LLVMConstVector(shuffles2,
168                                                   bld_base->base.type.length),
169                                   "");
170 }
171 
172 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)173 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
174                       LLVMValueRef chan_ptr,
175                       LLVMValueRef chan_ptr2,
176                       LLVMValueRef value)
177 {
178    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
179    struct lp_build_context *float_bld = &bld_base->base;
180    LLVMValueRef split_vals[2];
181 
182    emit_store_64bit_split(bld_base, value, split_vals);
183 
184    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
185    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
186 }
187 
188 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)189 get_soa_array_offsets(struct lp_build_context *uint_bld,
190                       LLVMValueRef indirect_index,
191                       int num_components,
192                       unsigned chan_index,
193                       bool need_perelement_offset)
194 {
195    struct gallivm_state *gallivm = uint_bld->gallivm;
196    LLVMValueRef chan_vec =
197       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
198    LLVMValueRef length_vec =
199       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
200    LLVMValueRef index_vec;
201 
202    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
203    index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
204    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
205    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
206 
207    if (need_perelement_offset) {
208       LLVMValueRef pixel_offsets;
209       unsigned i;
210      /* build pixel offset vector: {0, 1, 2, 3, ...} */
211       pixel_offsets = uint_bld->undef;
212       for (i = 0; i < uint_bld->type.length; i++) {
213          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
214          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
215                                                 ii, ii, "");
216       }
217       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
218    }
219    return index_vec;
220 }
221 
222 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)223 build_gather(struct lp_build_nir_context *bld_base,
224              struct lp_build_context *bld,
225              LLVMValueRef base_ptr,
226              LLVMValueRef indexes,
227              LLVMValueRef overflow_mask,
228              LLVMValueRef indexes2)
229 {
230    struct gallivm_state *gallivm = bld_base->base.gallivm;
231    LLVMBuilderRef builder = gallivm->builder;
232    struct lp_build_context *uint_bld = &bld_base->uint_bld;
233    LLVMValueRef res;
234    unsigned i;
235 
236    if (indexes2)
237       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
238    else
239       res = bld->undef;
240    /*
241     * overflow_mask is a vector telling us which channels
242     * in the vector overflowed. We use the overflow behavior for
243     * constant buffers which is defined as:
244     * Out of bounds access to constant buffer returns 0 in all
245     * components. Out of bounds behavior is always with respect
246     * to the size of the buffer bound at that slot.
247     */
248 
249    if (overflow_mask) {
250       /*
251        * We avoid per-element control flow here (also due to llvm going crazy,
252        * though I suspect it's better anyway since overflow is likely rare).
253        * Note that since we still fetch from buffers even if num_elements was
254        * zero (in this case we'll fetch from index zero) the jit func callers
255        * MUST provide valid fake constant buffers of size 4x32 (the values do
256        * not matter), otherwise we'd still need (not per element though)
257        * control flow.
258        */
259       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
260       if (indexes2)
261          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
262    }
263 
264    /*
265     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
266     */
267    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
268       LLVMValueRef si, di;
269       LLVMValueRef index;
270       LLVMValueRef scalar_ptr, scalar;
271 
272       di = lp_build_const_int32(gallivm, i);
273       if (indexes2)
274          si = lp_build_const_int32(gallivm, i >> 1);
275       else
276          si = di;
277 
278       if (indexes2 && (i & 1)) {
279          index = LLVMBuildExtractElement(builder,
280                                          indexes2, si, "");
281       } else {
282          index = LLVMBuildExtractElement(builder,
283                                          indexes, si, "");
284       }
285 
286       scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "gather_ptr");
287       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
288 
289       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
290    }
291 
292    if (overflow_mask) {
293       if (indexes2) {
294          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
295          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
296                                        bld_base->dbl_bld.int_vec_type, "");
297          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
298                                bld_base->dbl_bld.zero, res);
299       } else
300          res = lp_build_select(bld, overflow_mask, bld->zero, res);
301    }
302 
303    return res;
304 }
305 
306 /**
307  * Scatter/store vector.
308  */
309 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)310 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
311                   LLVMValueRef base_ptr,
312                   LLVMValueRef indexes,
313                   LLVMValueRef values,
314                   struct lp_exec_mask *mask)
315 {
316    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
317    LLVMBuilderRef builder = gallivm->builder;
318    unsigned i;
319    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
320 
321    /*
322     * Loop over elements of index_vec, store scalar value.
323     */
324    for (i = 0; i < bld->bld_base.base.type.length; i++) {
325       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
326       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
327       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
328       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
329       LLVMValueRef scalar_pred = pred ?
330          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
331 
332       if (0)
333          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
334                          ii, val, index, scalar_ptr);
335 
336       if (scalar_pred) {
337          LLVMValueRef real_val, dst_val;
338          dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
339          scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
340          real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
341          LLVMBuildStore(builder, real_val, scalar_ptr);
342       }
343       else {
344          LLVMBuildStore(builder, val, scalar_ptr);
345       }
346    }
347 }
348 
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])349 static void emit_load_var(struct lp_build_nir_context *bld_base,
350                            nir_variable_mode deref_mode,
351                            unsigned num_components,
352                            unsigned bit_size,
353                            nir_variable *var,
354                            unsigned vertex_index,
355                            LLVMValueRef indir_vertex_index,
356                            unsigned const_index,
357                            LLVMValueRef indir_index,
358                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
359 {
360    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
361    struct gallivm_state *gallivm = bld_base->base.gallivm;
362    int dmul = bit_size == 64 ? 2 : 1;
363    unsigned location = var->data.driver_location;
364    unsigned location_frac = var->data.location_frac;
365 
366    if (!var->data.compact && !indir_index)
367       location += const_index;
368    else if (var->data.compact) {
369       location += const_index / 4;
370       location_frac += const_index % 4;
371       const_index = 0;
372    }
373    switch (deref_mode) {
374    case nir_var_shader_in:
375       for (unsigned i = 0; i < num_components; i++) {
376          int idx = (i * dmul) + location_frac;
377          int comp_loc = location;
378 
379          if (bit_size == 64 && idx >= 4) {
380             comp_loc++;
381             idx = idx % 4;
382          }
383 
384          if (bld->gs_iface) {
385             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
386             LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
387             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
388             LLVMValueRef result2;
389 
390             result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
391                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
392             if (bit_size == 64) {
393                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
394                result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
395                                                     false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
396                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
397             }
398          } else if (bld->tes_iface) {
399             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
400             LLVMValueRef attrib_index_val;
401             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
402             LLVMValueRef result2;
403 
404             if (indir_index) {
405                if (var->data.compact) {
406                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
407                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
408                } else
409                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
410             } else
411                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
412 
413             if (var->data.patch) {
414                result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
415                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
416                if (bit_size == 64) {
417                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
418                   result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
419                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
420                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
421                }
422             }
423             else {
424                result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
425                                                               indir_vertex_index ? true : false,
426                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
427                                                               (indir_index && !var->data.compact) ? true : false, attrib_index_val,
428                                                               (indir_index && var->data.compact) ? true : false, swizzle_index_val);
429                if (bit_size == 64) {
430                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
431                   result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
432                                                                indir_vertex_index ? true : false,
433                                                                indir_vertex_index ? indir_vertex_index : vertex_index_val,
434                                                                indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
435                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
436                }
437             }
438          } else if (bld->tcs_iface) {
439             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
440             LLVMValueRef attrib_index_val;
441             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
442 
443             if (indir_index) {
444                if (var->data.compact) {
445                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447                } else
448                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
449             } else
450                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451             result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
452                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
453                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
454                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val);
455             if (bit_size == 64) {
456                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
458                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
459                                                                        indir_index ? true : false, attrib_index_val,
460                                                                        false, swizzle_index_val);
461                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
462             }
463          } else {
464             if (indir_index) {
465                LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
466                LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
467                                                               attrib_index_val, 4, idx,
468                                                               TRUE);
469                LLVMValueRef index_vec2 = NULL;
470                LLVMTypeRef fptr_type;
471                LLVMValueRef inputs_array;
472                fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
473                inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
474 
475                if (bit_size == 64)
476                   index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
477                                                      indir_index, 4, idx + 1, TRUE);
478 
479                /* Gather values from the input register array */
480                result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
481             } else {
482                if (bld->indirects & nir_var_shader_in) {
483                   LLVMValueRef lindex = lp_build_const_int32(gallivm,
484                                                              comp_loc * 4 + idx);
485                   LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
486                                                              bld->inputs_array, lindex);
487                   if (bit_size == 64) {
488                      LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
489                                                                  comp_loc * 4 + (idx + 1));
490                      LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
491                                                                     bld->inputs_array, lindex2);
492                      result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
493                   } else {
494                      result[i] = input_ptr;
495                   }
496                } else {
497                   if (bit_size == 64) {
498                      LLVMValueRef tmp[2];
499                      tmp[0] = bld->inputs[comp_loc][idx];
500                      tmp[1] = bld->inputs[comp_loc][idx + 1];
501                      result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
502                   } else {
503                      result[i] = bld->inputs[comp_loc][idx];
504                   }
505                }
506             }
507          }
508       }
509       break;
510    case nir_var_shader_out:
511       if (bld->fs_iface && bld->fs_iface->fb_fetch) {
512          bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
513          return;
514       }
515       for (unsigned i = 0; i < num_components; i++) {
516          int idx = (i * dmul) + location_frac;
517          if (bld->tcs_iface) {
518             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
519             LLVMValueRef attrib_index_val;
520             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
521 
522             if (indir_index)
523                attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
524             else
525                attrib_index_val = lp_build_const_int32(gallivm, location);
526 
527             result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
528                                                           indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
529                                                           (indir_index && !var->data.compact) ? true : false, attrib_index_val,
530                                                           (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
531             if (bit_size == 64) {
532                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
533                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
534                                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
535                                                                         indir_index ? true : false, attrib_index_val,
536                                                                         false, swizzle_index_val, 0);
537                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
538             }
539          }
540       }
541       break;
542    default:
543       break;
544    }
545 }
546 
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)547 static void emit_store_chan(struct lp_build_nir_context *bld_base,
548                             nir_variable_mode deref_mode,
549                             unsigned bit_size,
550                             unsigned location, unsigned comp,
551                             unsigned chan,
552                             LLVMValueRef dst)
553 {
554    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
555    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
556    struct lp_build_context *float_bld = &bld_base->base;
557 
558    if (bit_size == 64) {
559       chan *= 2;
560       chan += comp;
561       if (chan >= 4) {
562          chan -= 4;
563          location++;
564       }
565       emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
566                             bld->outputs[location][chan + 1], dst);
567    } else {
568       dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
569       lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
570                          bld->outputs[location][chan + comp]);
571    }
572 }
573 
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)574 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
575                                 bool is_compact,
576                                 unsigned bit_size,
577                                 unsigned location,
578                                 unsigned const_index,
579                                 LLVMValueRef indir_vertex_index,
580                                 LLVMValueRef indir_index,
581                                 unsigned comp,
582                                 unsigned chan,
583                                 LLVMValueRef chan_val)
584 {
585    struct gallivm_state *gallivm = bld_base->base.gallivm;
586    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
587    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
588    unsigned swizzle = chan;
589    if (bit_size == 64) {
590       swizzle *= 2;
591       swizzle += comp;
592       if (swizzle >= 4) {
593          swizzle -= 4;
594          location++;
595       }
596    } else
597       swizzle += comp;
598    LLVMValueRef attrib_index_val;
599    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
600 
601    if (indir_index) {
602       if (is_compact) {
603          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
604          attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
605       } else
606          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
607    } else
608       attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
609    LLVMValueRef exec_mask = mask_vec(bld_base);
610    if (bit_size == 64) {
611       LLVMValueRef split_vals[2];
612       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
613       emit_store_64bit_split(bld_base, chan_val, split_vals);
614       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
615                                         indir_vertex_index ? true : false,
616                                         indir_vertex_index,
617                                         indir_index ? true : false,
618                                         attrib_index_val,
619                                         false, swizzle_index_val,
620                                         split_vals[0], exec_mask);
621       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
622                                         indir_vertex_index ? true : false,
623                                         indir_vertex_index,
624                                         indir_index ? true : false,
625                                         attrib_index_val,
626                                         false, swizzle_index_val2,
627                                         split_vals[1], exec_mask);
628    } else {
629       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
630       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
631                                         indir_vertex_index ? true : false,
632                                         indir_vertex_index,
633                                         indir_index && !is_compact ? true : false,
634                                         attrib_index_val,
635                                         indir_index && is_compact ? true : false,
636                                         swizzle_index_val,
637                                         chan_val, exec_mask);
638    }
639 }
640 
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)641 static void emit_store_var(struct lp_build_nir_context *bld_base,
642                            nir_variable_mode deref_mode,
643                            unsigned num_components,
644                            unsigned bit_size,
645                            nir_variable *var,
646                            unsigned writemask,
647                            LLVMValueRef indir_vertex_index,
648                            unsigned const_index,
649                            LLVMValueRef indir_index,
650                            LLVMValueRef dst)
651 {
652    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
653    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
654    switch (deref_mode) {
655    case nir_var_shader_out: {
656       unsigned location = var->data.driver_location;
657       unsigned comp = var->data.location_frac;
658       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
659          if (var->data.location == FRAG_RESULT_STENCIL)
660             comp = 1;
661          else if (var->data.location == FRAG_RESULT_DEPTH)
662             comp = 2;
663       }
664 
665       if (var->data.compact) {
666          location += const_index / 4;
667          comp += const_index % 4;
668          const_index = 0;
669       }
670 
671       for (unsigned chan = 0; chan < num_components; chan++) {
672          if (writemask & (1u << chan)) {
673             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
674             if (bld->tcs_iface) {
675                emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
676             } else
677                emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
678          }
679       }
680       break;
681    }
682    default:
683       break;
684    }
685 }
686 
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_src * reg,LLVMValueRef indir_src,LLVMValueRef reg_storage)687 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
688                                   struct lp_build_context *reg_bld,
689                                   const nir_reg_src *reg,
690                                   LLVMValueRef indir_src,
691                                   LLVMValueRef reg_storage)
692 {
693    struct gallivm_state *gallivm = bld_base->base.gallivm;
694    LLVMBuilderRef builder = gallivm->builder;
695    int nc = reg->reg->num_components;
696    LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
697    struct lp_build_context *uint_bld = &bld_base->uint_bld;
698    if (reg->reg->num_array_elems) {
699       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
700       if (reg->indirect) {
701          LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
702          indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
703          indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
704       }
705       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
706       for (unsigned i = 0; i < nc; i++) {
707          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
708          vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
709       }
710    } else {
711       for (unsigned i = 0; i < nc; i++) {
712          LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
713                                                                                     lp_build_const_int32(gallivm, i));
714          vals[i] = LLVMBuildLoad(builder, this_storage, "");
715       }
716    }
717    return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
718 }
719 
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_dest * reg,unsigned writemask,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])720 static void emit_store_reg(struct lp_build_nir_context *bld_base,
721                            struct lp_build_context *reg_bld,
722                            const nir_reg_dest *reg,
723                            unsigned writemask,
724                            LLVMValueRef indir_src,
725                            LLVMValueRef reg_storage,
726                            LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
727 {
728    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
729    struct gallivm_state *gallivm = bld_base->base.gallivm;
730    LLVMBuilderRef builder = gallivm->builder;
731    struct lp_build_context *uint_bld = &bld_base->uint_bld;
732    int nc = reg->reg->num_components;
733    if (reg->reg->num_array_elems > 0) {
734       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
735       if (reg->indirect) {
736          LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
737          indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
738          indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
739       }
740       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
741       for (unsigned i = 0; i < nc; i++) {
742          if (!(writemask & (1 << i)))
743             continue;
744          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
745          dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
746          emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
747       }
748       return;
749    }
750 
751    for (unsigned i = 0; i < nc; i++) {
752       LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
753                                                          lp_build_const_int32(gallivm, i));
754       dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
755       lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
756    }
757 }
758 
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])759 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
760                                  unsigned nc,
761                                  unsigned bit_size,
762                                  unsigned offset_bit_size,
763                                  bool offset_is_uniform,
764                                  LLVMValueRef offset,
765                                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
766 {
767    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
768    struct gallivm_state *gallivm = bld_base->base.gallivm;
769    LLVMBuilderRef builder = gallivm->builder;
770    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
771    LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
772    unsigned size_shift = bit_size_to_shift_size(bit_size);
773    struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
774    if (size_shift)
775       offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
776 
777    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
778    kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
779 
780    if (!invocation_0_must_be_active(bld_base)) {
781       mesa_logw_once("Treating load_kernel_arg in control flow as uniform, results may be incorrect.");
782    }
783 
784    if (offset_is_uniform) {
785       offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
786 
787       for (unsigned c = 0; c < nc; c++) {
788          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
789 
790          LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
791          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
792       }
793    }
794 }
795 
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)796 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
797 {
798    LLVMBuilderRef builder = gallivm->builder;
799    switch (bit_size) {
800    case 8:
801       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
802       break;
803    case 16:
804       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
805       break;
806    case 32:
807    default:
808       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
809       break;
810    case 64:
811       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
812       break;
813    }
814    return addr_ptr;
815 }
816 
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])817 static void emit_load_global(struct lp_build_nir_context *bld_base,
818                              unsigned nc,
819                              unsigned bit_size,
820                              unsigned addr_bit_size,
821                              bool offset_is_uniform,
822                              LLVMValueRef addr,
823                              LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
824 {
825    struct gallivm_state *gallivm = bld_base->base.gallivm;
826    LLVMBuilderRef builder = gallivm->builder;
827    struct lp_build_context *uint_bld = &bld_base->uint_bld;
828    struct lp_build_context *res_bld;
829    LLVMValueRef exec_mask = mask_vec(bld_base);
830 
831    res_bld = get_int_bld(bld_base, true, bit_size);
832 
833    if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
834       /* If the offset is uniform, then use the address from invocation 0 to
835        * load, and broadcast to all invocations.
836        */
837       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
838                                                       lp_build_const_int32(gallivm, 0), "");
839       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
840 
841       for (unsigned c = 0; c < nc; c++) {
842          LLVMValueRef scalar = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
843          outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
844       }
845       return;
846    }
847 
848    for (unsigned c = 0; c < nc; c++) {
849       LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
850       struct lp_build_loop_state loop_state;
851       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
852 
853       struct lp_build_if_state ifthen;
854       LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
855       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
856       lp_build_if(&ifthen, gallivm, cond);
857 
858       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
859                                                       loop_state.counter, "");
860       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
861 
862       LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
863 
864       LLVMValueRef temp_res;
865       temp_res = LLVMBuildLoad2(builder, res_bld->vec_type, result, "");
866       temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
867       LLVMBuildStore(builder, temp_res, result);
868       lp_build_endif(&ifthen);
869       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
870                              NULL, LLVMIntUGE);
871       outval[c] = LLVMBuildLoad2(builder, res_bld->vec_type, result, "");
872    }
873 }
874 
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)875 static void emit_store_global(struct lp_build_nir_context *bld_base,
876                               unsigned writemask,
877                               unsigned nc, unsigned bit_size,
878                               unsigned addr_bit_size,
879                               LLVMValueRef addr,
880                               LLVMValueRef dst)
881 {
882    struct gallivm_state *gallivm = bld_base->base.gallivm;
883    LLVMBuilderRef builder = gallivm->builder;
884    struct lp_build_context *uint_bld = &bld_base->uint_bld;
885    LLVMValueRef exec_mask = mask_vec(bld_base);
886 
887    for (unsigned c = 0; c < nc; c++) {
888       if (!(writemask & (1u << c)))
889          continue;
890       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
891 
892       struct lp_build_loop_state loop_state;
893       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
894       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
895                                                        loop_state.counter, "");
896 
897       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
898                                                       loop_state.counter, "");
899       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
900       switch (bit_size) {
901       case 8:
902          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
903          break;
904       case 16:
905          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
906          break;
907       case 32:
908          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
909          break;
910       case 64:
911          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
912          break;
913       default:
914          break;
915       }
916       struct lp_build_if_state ifthen;
917 
918       LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
919       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
920       lp_build_if(&ifthen, gallivm, cond);
921       lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
922       lp_build_endif(&ifthen);
923       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
924                              NULL, LLVMIntUGE);
925    }
926 }
927 
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)928 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
929                                nir_intrinsic_op nir_op,
930                                unsigned addr_bit_size,
931                                unsigned val_bit_size,
932                                LLVMValueRef addr,
933                                LLVMValueRef val, LLVMValueRef val2,
934                                LLVMValueRef *result)
935 {
936    struct gallivm_state *gallivm = bld_base->base.gallivm;
937    LLVMBuilderRef builder = gallivm->builder;
938    struct lp_build_context *uint_bld = &bld_base->uint_bld;
939    struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
940    LLVMValueRef atom_res = lp_build_alloca(gallivm,
941                                            LLVMTypeOf(val), "");
942    LLVMValueRef exec_mask = mask_vec(bld_base);
943    struct lp_build_loop_state loop_state;
944    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
945 
946    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
947                                                     loop_state.counter, "");
948 
949    LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
950                                                    loop_state.counter, "");
951    addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
952    struct lp_build_if_state ifthen;
953    LLVMValueRef cond, temp_res;
954    LLVMValueRef scalar;
955    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
956    cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
957    lp_build_if(&ifthen, gallivm, cond);
958 
959    addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
960    if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
961       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
962                                                          loop_state.counter, "");
963       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
964       scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
965                                       cas_src_ptr,
966                                       LLVMAtomicOrderingSequentiallyConsistent,
967                                       LLVMAtomicOrderingSequentiallyConsistent,
968                                       false);
969       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
970    } else {
971       LLVMAtomicRMWBinOp op;
972       switch (nir_op) {
973       case nir_intrinsic_global_atomic_add:
974          op = LLVMAtomicRMWBinOpAdd;
975          break;
976       case nir_intrinsic_global_atomic_exchange:
977 
978          op = LLVMAtomicRMWBinOpXchg;
979          break;
980       case nir_intrinsic_global_atomic_and:
981          op = LLVMAtomicRMWBinOpAnd;
982          break;
983       case nir_intrinsic_global_atomic_or:
984          op = LLVMAtomicRMWBinOpOr;
985          break;
986       case nir_intrinsic_global_atomic_xor:
987          op = LLVMAtomicRMWBinOpXor;
988          break;
989       case nir_intrinsic_global_atomic_umin:
990          op = LLVMAtomicRMWBinOpUMin;
991          break;
992       case nir_intrinsic_global_atomic_umax:
993          op = LLVMAtomicRMWBinOpUMax;
994          break;
995       case nir_intrinsic_global_atomic_imin:
996          op = LLVMAtomicRMWBinOpMin;
997          break;
998       case nir_intrinsic_global_atomic_imax:
999          op = LLVMAtomicRMWBinOpMax;
1000          break;
1001       default:
1002          unreachable("unknown atomic op");
1003       }
1004 
1005       scalar = LLVMBuildAtomicRMW(builder, op,
1006                                   addr_ptr, value_ptr,
1007                                   LLVMAtomicOrderingSequentiallyConsistent,
1008                                   false);
1009    }
1010    temp_res = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1011    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1012    LLVMBuildStore(builder, temp_res, atom_res);
1013    lp_build_else(&ifthen);
1014    temp_res = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1015    bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;
1016    LLVMValueRef zero_val;
1017    if (is_float) {
1018       if (val_bit_size == 64)
1019          zero_val = lp_build_const_double(gallivm, 0);
1020       else
1021          zero_val = lp_build_const_float(gallivm, 0);
1022    } else {
1023       if (val_bit_size == 64)
1024          zero_val = lp_build_const_int64(gallivm, 0);
1025       else
1026          zero_val = lp_build_const_int32(gallivm, 0);
1027    }
1028 
1029    temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1030    LLVMBuildStore(builder, temp_res, atom_res);
1031    lp_build_endif(&ifthen);
1032    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1033                           NULL, LLVMIntUGE);
1034    *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1035 }
1036 
1037 /* Returns a boolean for whether the offset is in range of the given limit for
1038  * SSBO/UBO dereferences.
1039  */
1040 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1041 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1042                    LLVMValueRef offset,
1043                    LLVMValueRef limit)
1044 {
1045    struct gallivm_state *gallivm = bld_base->base.gallivm;
1046    LLVMBuilderRef builder = gallivm->builder;
1047 
1048    LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1049    LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1050    LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1051    return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1052 }
1053 
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1054 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1055                           unsigned nc,
1056                           unsigned bit_size,
1057                           bool offset_is_uniform,
1058                           LLVMValueRef index,
1059                           LLVMValueRef offset,
1060                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1061 {
1062    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1063    struct gallivm_state *gallivm = bld_base->base.gallivm;
1064    LLVMBuilderRef builder = gallivm->builder;
1065    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1066    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1067    LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
1068    LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
1069    unsigned size_shift = bit_size_to_shift_size(bit_size);
1070    if (size_shift)
1071       offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1072 
1073    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1074    consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1075 
1076    if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1077       offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
1078       struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1079       switch (bit_size) {
1080       case 8:
1081          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1082          break;
1083       case 16:
1084          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1085          break;
1086       case 64:
1087          num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1088          break;
1089       default: break;
1090       }
1091       for (unsigned c = 0; c < nc; c++) {
1092          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1093 
1094          LLVMValueRef scalar;
1095          /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1096          LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1097          LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1098          LLVMBuildStore(builder, zero, res_store);
1099 
1100          struct lp_build_if_state ifthen;
1101          lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1102          LLVMBuildStore(builder, lp_build_pointer_get(builder, consts_ptr, chan_offset), res_store);
1103          lp_build_endif(&ifthen);
1104 
1105          scalar = LLVMBuildLoad(builder, res_store, "");
1106 
1107          result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1108       }
1109    } else {
1110       LLVMValueRef overflow_mask;
1111 
1112       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1113       if (bit_size == 64)
1114          num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1115       else if (bit_size == 16)
1116          num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1117       else if (bit_size == 8)
1118          num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1119 
1120       for (unsigned c = 0; c < nc; c++) {
1121          LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1122          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1123                                           this_offset, num_consts);
1124          result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1125       }
1126    }
1127 }
1128 
1129 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1130 emit_load_const(struct lp_build_nir_context *bld_base,
1131                 const nir_load_const_instr *instr,
1132                 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1133 {
1134    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1135    const unsigned bits = instr->def.bit_size;
1136 
1137    for (unsigned i = 0; i < instr->def.num_components; i++) {
1138      outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1139                                         bits == 32 ? instr->value[i].u32
1140                                                    : instr->value[i].u64);
1141    }
1142    for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1143       outval[i] = NULL;
1144    }
1145 }
1146 
1147 /**
1148  * Get the base address of SSBO[@index] for the @invocation channel, returning
1149  * the address and also the bounds (in units of the bit_size).
1150  */
1151 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1152 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1153                   unsigned bit_size,
1154                   LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1155 {
1156    struct gallivm_state *gallivm = bld_base->base.gallivm;
1157    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1158    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1159 
1160    LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1161    LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1162    LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1163    if (bounds)
1164       *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1165 
1166    return ssbo_ptr;
1167 }
1168 
1169 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1170 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1171                         struct lp_build_context *mem_bld,
1172                         unsigned bit_size,
1173                         LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1174 {
1175    struct gallivm_state *gallivm = bld_base->base.gallivm;
1176    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1177    LLVMValueRef ptr;
1178 
1179    if (index) {
1180       ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1181    } else {
1182       ptr = bld->shared_ptr;
1183       *bounds = NULL;
1184    }
1185 
1186    /* Cast it to the pointer type of the access this instruction is doing. */
1187    if (bit_size == 32)
1188       return ptr;
1189    else
1190       return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1191 }
1192 
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1193 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1194                           unsigned nc,
1195                           unsigned bit_size,
1196                           bool index_and_offset_are_uniform,
1197                           LLVMValueRef index,
1198                           LLVMValueRef offset,
1199                           LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1200 {
1201    struct gallivm_state *gallivm = bld_base->base.gallivm;
1202    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1203    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1204    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1205    struct lp_build_context *load_bld;
1206    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1207 
1208    load_bld = get_int_bld(bld_base, true, bit_size);
1209 
1210    offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1211 
1212    /* If the address is uniform, then use the address from invocation 0 to load,
1213     * and broadcast to all invocations.
1214     */
1215    if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1216       LLVMValueRef ssbo_limit;
1217       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1218                                                      lp_build_const_int32(gallivm, 0), &ssbo_limit);
1219 
1220       offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1221 
1222       for (unsigned c = 0; c < nc; c++) {
1223          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1224 
1225          LLVMValueRef scalar;
1226          /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1227          if (ssbo_limit) {
1228             LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1229             LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1230             LLVMBuildStore(builder, zero, res_store);
1231 
1232             struct lp_build_if_state ifthen;
1233             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1234             LLVMBuildStore(builder, lp_build_pointer_get(builder, mem_ptr, chan_offset), res_store);
1235             lp_build_endif(&ifthen);
1236 
1237             scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1238          } else {
1239             scalar = lp_build_pointer_get(builder, mem_ptr, chan_offset);
1240          }
1241 
1242          outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1243       }
1244       return;
1245    }
1246 
1247    /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1248 
1249    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1250    for (unsigned c = 0; c < nc; c++)
1251       result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1252 
1253    LLVMValueRef exec_mask = mask_vec(bld_base);
1254    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1255    struct lp_build_loop_state loop_state;
1256    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1257    LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1258    LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1259 
1260    struct lp_build_if_state exec_ifthen;
1261    lp_build_if(&exec_ifthen, gallivm, loop_cond);
1262 
1263    LLVMValueRef ssbo_limit;
1264    LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1265                                                   loop_state.counter, &ssbo_limit);
1266 
1267    for (unsigned c = 0; c < nc; c++) {
1268       LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1269       LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1270       if (ssbo_limit) {
1271          LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1272          do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1273       }
1274 
1275       struct lp_build_if_state ifthen;
1276       LLVMValueRef fetch_cond, temp_res;
1277 
1278       fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1279 
1280       lp_build_if(&ifthen, gallivm, fetch_cond);
1281       LLVMValueRef scalar = lp_build_pointer_get(builder, mem_ptr, loop_index);
1282 
1283       temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1284       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1285       LLVMBuildStore(builder, temp_res, result[c]);
1286       lp_build_else(&ifthen);
1287       temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1288       LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1289       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1290       LLVMBuildStore(builder, temp_res, result[c]);
1291       lp_build_endif(&ifthen);
1292    }
1293 
1294    lp_build_endif(&exec_ifthen);
1295    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1296                           NULL, LLVMIntUGE);
1297    for (unsigned c = 0; c < nc; c++)
1298       outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1299 
1300 }
1301 
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1302 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1303                            unsigned writemask,
1304                            unsigned nc,
1305                            unsigned bit_size,
1306                            bool index_and_offset_are_uniform,
1307                            LLVMValueRef index,
1308                            LLVMValueRef offset,
1309                            LLVMValueRef dst)
1310 {
1311    struct gallivm_state *gallivm = bld_base->base.gallivm;
1312    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1313    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1314    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1315    struct lp_build_context *store_bld;
1316    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1317    store_bld = get_int_bld(bld_base, true, bit_size);
1318 
1319    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1320 
1321    /* If the address is uniform, then just store the value from the first
1322     * channel instead of making LLVM unroll the invocation loop.
1323     */
1324    if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1325       LLVMValueRef ssbo_limit;
1326       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
1327                                                      lp_build_const_int32(gallivm, 0), &ssbo_limit);
1328 
1329       offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1330 
1331       for (unsigned c = 0; c < nc; c++) {
1332          if (!(writemask & (1u << c)))
1333             continue;
1334 
1335          /* Pick out invocation 0's value. */
1336          LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1337          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1338                                                          lp_build_const_int32(gallivm, 0), "");
1339          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1340 
1341          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1342 
1343          /* If storing outside the SSBO, we need to skip the store instead. */
1344          if (ssbo_limit) {
1345             struct lp_build_if_state ifthen;
1346             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1347             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1348             lp_build_endif(&ifthen);
1349          } else {
1350             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1351          }
1352       }
1353       return;
1354    }
1355 
1356    LLVMValueRef exec_mask = mask_vec(bld_base);
1357    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1358    struct lp_build_loop_state loop_state;
1359    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1360    LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1361    LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1362 
1363    struct lp_build_if_state exec_ifthen;
1364    lp_build_if(&exec_ifthen, gallivm, loop_cond);
1365 
1366    LLVMValueRef ssbo_limit;
1367    LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
1368                                                   loop_state.counter, &ssbo_limit);
1369 
1370    for (unsigned c = 0; c < nc; c++) {
1371       if (!(writemask & (1u << c)))
1372          continue;
1373       LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1374       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1375       LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1376 
1377       if (ssbo_limit) {
1378          LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1379          do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1380       }
1381 
1382       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1383                                                        loop_state.counter, "");
1384       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1385       struct lp_build_if_state ifthen;
1386       LLVMValueRef store_cond;
1387 
1388       store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1389       lp_build_if(&ifthen, gallivm, store_cond);
1390       lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1391       lp_build_endif(&ifthen);
1392    }
1393 
1394    lp_build_endif(&exec_ifthen);
1395    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1396                              NULL, LLVMIntUGE);
1397 
1398 }
1399 
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,uint32_t bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1400 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1401                             nir_intrinsic_op nir_op,
1402                             uint32_t bit_size,
1403                             LLVMValueRef index, LLVMValueRef offset,
1404                             LLVMValueRef val, LLVMValueRef val2,
1405                             LLVMValueRef *result)
1406 {
1407    struct gallivm_state *gallivm = bld_base->base.gallivm;
1408    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1409    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1410    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1411    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1412    struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
1413 
1414    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1415    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1416                                            atomic_bld->vec_type, "");
1417 
1418    LLVMValueRef exec_mask = mask_vec(bld_base);
1419    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1420    struct lp_build_loop_state loop_state;
1421    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1422    LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1423    LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1424 
1425    struct lp_build_if_state exec_ifthen;
1426    lp_build_if(&exec_ifthen, gallivm, loop_cond);
1427 
1428    LLVMValueRef ssbo_limit;
1429    LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, index,
1430                                                   loop_state.counter, &ssbo_limit);
1431 
1432    LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1433    if (ssbo_limit) {
1434       LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1435       do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1436    }
1437 
1438    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1439                                                     loop_state.counter, "");
1440    value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1441 
1442    LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1443 
1444    struct lp_build_if_state ifthen;
1445    LLVMValueRef inner_cond, temp_res;
1446    LLVMValueRef scalar;
1447 
1448    inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1449    lp_build_if(&ifthen, gallivm, inner_cond);
1450 
1451    if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1452       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1453                                                          loop_state.counter, "");
1454       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1455       scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1456                                       cas_src_ptr,
1457                                       LLVMAtomicOrderingSequentiallyConsistent,
1458                                       LLVMAtomicOrderingSequentiallyConsistent,
1459                                       false);
1460       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1461    } else {
1462       LLVMAtomicRMWBinOp op;
1463 
1464       switch (nir_op) {
1465       case nir_intrinsic_shared_atomic_add:
1466       case nir_intrinsic_ssbo_atomic_add:
1467          op = LLVMAtomicRMWBinOpAdd;
1468          break;
1469       case nir_intrinsic_shared_atomic_exchange:
1470       case nir_intrinsic_ssbo_atomic_exchange:
1471          op = LLVMAtomicRMWBinOpXchg;
1472          break;
1473       case nir_intrinsic_shared_atomic_and:
1474       case nir_intrinsic_ssbo_atomic_and:
1475          op = LLVMAtomicRMWBinOpAnd;
1476          break;
1477       case nir_intrinsic_shared_atomic_or:
1478       case nir_intrinsic_ssbo_atomic_or:
1479          op = LLVMAtomicRMWBinOpOr;
1480          break;
1481       case nir_intrinsic_shared_atomic_xor:
1482       case nir_intrinsic_ssbo_atomic_xor:
1483          op = LLVMAtomicRMWBinOpXor;
1484          break;
1485       case nir_intrinsic_shared_atomic_umin:
1486       case nir_intrinsic_ssbo_atomic_umin:
1487          op = LLVMAtomicRMWBinOpUMin;
1488          break;
1489       case nir_intrinsic_shared_atomic_umax:
1490       case nir_intrinsic_ssbo_atomic_umax:
1491          op = LLVMAtomicRMWBinOpUMax;
1492          break;
1493       case nir_intrinsic_ssbo_atomic_imin:
1494       case nir_intrinsic_shared_atomic_imin:
1495          op = LLVMAtomicRMWBinOpMin;
1496          break;
1497       case nir_intrinsic_ssbo_atomic_imax:
1498       case nir_intrinsic_shared_atomic_imax:
1499          op = LLVMAtomicRMWBinOpMax;
1500          break;
1501       default:
1502          unreachable("unknown atomic op");
1503       }
1504       scalar = LLVMBuildAtomicRMW(builder, op,
1505                                   scalar_ptr, value_ptr,
1506                                   LLVMAtomicOrderingSequentiallyConsistent,
1507                                   false);
1508    }
1509    temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1510    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1511    LLVMBuildStore(builder, temp_res, atom_res);
1512    lp_build_else(&ifthen);
1513    temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1514    LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1515    temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1516    LLVMBuildStore(builder, temp_res, atom_res);
1517    lp_build_endif(&ifthen);
1518 
1519    lp_build_endif(&exec_ifthen);
1520    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1521                           NULL, LLVMIntUGE);
1522    *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1523 }
1524 
emit_barrier(struct lp_build_nir_context * bld_base)1525 static void emit_barrier(struct lp_build_nir_context *bld_base)
1526 {
1527    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1528    struct gallivm_state * gallivm = bld_base->base.gallivm;
1529 
1530    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1531 
1532    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1533    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1534 }
1535 
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1536 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1537                                        LLVMValueRef index)
1538 {
1539    struct gallivm_state *gallivm = bld_base->base.gallivm;
1540    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1541    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1542    struct lp_build_context *bld_broad = &bld_base->uint_bld;
1543    LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1544                                               LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1545    return lp_build_broadcast_scalar(bld_broad, size_ptr);
1546 }
1547 
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1548 static void emit_image_op(struct lp_build_nir_context *bld_base,
1549                           struct lp_img_params *params)
1550 {
1551    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1552    struct gallivm_state *gallivm = bld_base->base.gallivm;
1553 
1554    params->type = bld_base->base.type;
1555    params->context_ptr = bld->context_ptr;
1556    params->thread_data_ptr = bld->thread_data_ptr;
1557    params->exec_mask = mask_vec(bld_base);
1558 
1559    if (params->image_index_offset)
1560       params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1561                                                            lp_build_const_int32(gallivm, 0), "");
1562 
1563    bld->image->emit_op(bld->image,
1564                        bld->bld_base.base.gallivm,
1565                        params);
1566 
1567 }
1568 
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1569 static void emit_image_size(struct lp_build_nir_context *bld_base,
1570                             struct lp_sampler_size_query_params *params)
1571 {
1572    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1573    struct gallivm_state *gallivm = bld_base->base.gallivm;
1574 
1575    params->int_type = bld_base->int_bld.type;
1576    params->context_ptr = bld->context_ptr;
1577 
1578    if (params->texture_unit_offset)
1579       params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1580                                                             lp_build_const_int32(gallivm, 0), "");
1581    bld->image->emit_size_query(bld->image,
1582                                bld->bld_base.base.gallivm,
1583                                params);
1584 
1585 }
1586 
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1587 static void init_var_slots(struct lp_build_nir_context *bld_base,
1588                            nir_variable *var, unsigned sc)
1589 {
1590    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1591    unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1592 
1593    if (!bld->outputs)
1594      return;
1595    for (unsigned comp = sc; comp < slots + sc; comp++) {
1596       unsigned this_loc = var->data.driver_location + (comp / 4);
1597       unsigned this_chan = comp % 4;
1598 
1599       if (!bld->outputs[this_loc][this_chan])
1600          bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1601                                                              bld_base->base.vec_type, "output");
1602    }
1603 }
1604 
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1605 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1606                           nir_variable *var)
1607 {
1608    unsigned sc = var->data.location_frac;
1609    switch (var->data.mode) {
1610    case nir_var_shader_out: {
1611       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1612          if (var->data.location == FRAG_RESULT_STENCIL)
1613             sc = 1;
1614          else if (var->data.location == FRAG_RESULT_DEPTH)
1615             sc = 2;
1616       }
1617       init_var_slots(bld_base, var, sc);
1618       break;
1619    }
1620    default:
1621       break;
1622    }
1623 }
1624 
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1625 static void emit_tex(struct lp_build_nir_context *bld_base,
1626                      struct lp_sampler_params *params)
1627 {
1628    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1629    struct gallivm_state *gallivm = bld_base->base.gallivm;
1630    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1631 
1632    params->type = bld_base->base.type;
1633    params->context_ptr = bld->context_ptr;
1634    params->thread_data_ptr = bld->thread_data_ptr;
1635 
1636    if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1637       /* this is horrible but this can be dynamic */
1638       LLVMValueRef coords[5];
1639       LLVMValueRef *orig_texel_ptr;
1640       struct lp_build_context *uint_bld = &bld_base->uint_bld;
1641       LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1642                                  LLVMGetUndef(bld_base->base.vec_type),
1643                                  LLVMGetUndef(bld_base->base.vec_type),
1644                                  LLVMGetUndef(bld_base->base.vec_type) };
1645       LLVMValueRef texel[4], orig_offset, orig_lod;
1646       unsigned i;
1647       orig_texel_ptr = params->texel;
1648       orig_lod = params->lod;
1649       for (i = 0; i < 5; i++) {
1650          coords[i] = params->coords[i];
1651       }
1652       orig_offset = params->texture_index_offset;
1653 
1654       for (unsigned v = 0; v < uint_bld->type.length; v++) {
1655          LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1656          LLVMValueRef new_coords[5];
1657          for (i = 0; i < 5; i++) {
1658             new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1659                                                     coords[i], idx, "");
1660          }
1661          params->coords = new_coords;
1662          params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1663                                                                 orig_offset,
1664                                                                 idx, "");
1665          params->type = lp_elem_type(bld_base->base.type);
1666 
1667          if (orig_lod)
1668             params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1669          params->texel = texel;
1670          bld->sampler->emit_tex_sample(bld->sampler,
1671                                        gallivm,
1672                                        params);
1673 
1674          for (i = 0; i < 4; i++) {
1675             result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1676          }
1677       }
1678       for (i = 0; i < 4; i++) {
1679          orig_texel_ptr[i] = result[i];
1680       }
1681       return;
1682    }
1683 
1684    if (params->texture_index_offset) {
1685       struct lp_build_loop_state loop_state;
1686       LLVMValueRef exec_mask = mask_vec(bld_base);
1687       LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1688       LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
1689       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1690       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1691 
1692       struct lp_build_if_state ifthen;
1693       lp_build_if(&ifthen, gallivm, if_cond);
1694       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1695                                                        loop_state.counter, "");
1696       LLVMBuildStore(builder, value_ptr, res_store);
1697       lp_build_endif(&ifthen);
1698       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1699                              NULL, LLVMIntUGE);
1700       LLVMValueRef idx_val = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
1701       params->texture_index_offset = idx_val;
1702    }
1703 
1704    params->type = bld_base->base.type;
1705    bld->sampler->emit_tex_sample(bld->sampler,
1706                                  bld->bld_base.base.gallivm,
1707                                  params);
1708 }
1709 
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1710 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1711                           struct lp_sampler_size_query_params *params)
1712 {
1713    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1714 
1715    params->int_type = bld_base->int_bld.type;
1716    params->context_ptr = bld->context_ptr;
1717 
1718    if (params->texture_unit_offset)
1719       params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1720                                                              params->texture_unit_offset,
1721                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1722    bld->sampler->emit_size_query(bld->sampler,
1723                                  bld->bld_base.base.gallivm,
1724                                  params);
1725 }
1726 
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1727 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1728                                nir_intrinsic_instr *instr,
1729                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1730 {
1731    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1732    struct gallivm_state *gallivm = bld_base->base.gallivm;
1733    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1734    switch (instr->intrinsic) {
1735    case nir_intrinsic_load_instance_id:
1736       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1737       break;
1738    case nir_intrinsic_load_base_instance:
1739       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1740       break;
1741    case nir_intrinsic_load_base_vertex:
1742       result[0] = bld->system_values.basevertex;
1743       break;
1744    case nir_intrinsic_load_first_vertex:
1745       result[0] = bld->system_values.firstvertex;
1746       break;
1747    case nir_intrinsic_load_vertex_id:
1748       result[0] = bld->system_values.vertex_id;
1749       break;
1750    case nir_intrinsic_load_primitive_id:
1751       result[0] = bld->system_values.prim_id;
1752       break;
1753    case nir_intrinsic_load_workgroup_id: {
1754       LLVMValueRef tmp[3];
1755       for (unsigned i = 0; i < 3; i++) {
1756          tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1757          if (instr->dest.ssa.bit_size == 64)
1758             tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1759          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1760       }
1761       break;
1762    }
1763    case nir_intrinsic_load_local_invocation_id:
1764       for (unsigned i = 0; i < 3; i++)
1765          result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1766       break;
1767    case nir_intrinsic_load_local_invocation_index: {
1768       LLVMValueRef tmp, tmp2;
1769       tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), ""));
1770       tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1771       tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1772       tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, ""));
1773 
1774       tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1775       tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, ""));
1776       tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1777       tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, ""));
1778       result[0] = tmp;
1779       break;
1780    }
1781    case nir_intrinsic_load_num_workgroups: {
1782       LLVMValueRef tmp[3];
1783       for (unsigned i = 0; i < 3; i++) {
1784          tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1785          if (instr->dest.ssa.bit_size == 64)
1786             tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1787          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1788       }
1789       break;
1790    }
1791    case nir_intrinsic_load_invocation_id:
1792       if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1793          result[0] = bld->system_values.invocation_id;
1794       else
1795          result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1796       break;
1797    case nir_intrinsic_load_front_face:
1798       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1799       break;
1800    case nir_intrinsic_load_draw_id:
1801       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1802       break;
1803    default:
1804       break;
1805    case nir_intrinsic_load_workgroup_size:
1806      for (unsigned i = 0; i < 3; i++)
1807        result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1808      break;
1809    case nir_intrinsic_load_work_dim:
1810       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1811       break;
1812    case nir_intrinsic_load_tess_coord:
1813       for (unsigned i = 0; i < 3; i++) {
1814 	 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1815       }
1816       break;
1817    case nir_intrinsic_load_tess_level_outer:
1818       for (unsigned i = 0; i < 4; i++)
1819          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1820       break;
1821    case nir_intrinsic_load_tess_level_inner:
1822       for (unsigned i = 0; i < 2; i++)
1823          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1824       break;
1825    case nir_intrinsic_load_patch_vertices_in:
1826       result[0] = bld->system_values.vertices_in;
1827       break;
1828    case nir_intrinsic_load_sample_id:
1829       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1830       break;
1831    case nir_intrinsic_load_sample_pos:
1832       for (unsigned i = 0; i < 2; i++) {
1833          LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1834          idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1835          LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1836          result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1837       }
1838       break;
1839    case nir_intrinsic_load_sample_mask_in:
1840       result[0] = bld->system_values.sample_mask_in;
1841       break;
1842    case nir_intrinsic_load_view_index:
1843       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1844       break;
1845    case nir_intrinsic_load_subgroup_invocation: {
1846       LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1847       for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1848          elems[i] = lp_build_const_int32(gallivm, i);
1849       result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1850       break;
1851    }
1852    case nir_intrinsic_load_subgroup_id:
1853       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1854       break;
1855    case nir_intrinsic_load_num_subgroups:
1856       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
1857       break;
1858    }
1859 }
1860 
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)1861 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1862                                    LLVMValueRef *dst)
1863 {
1864    struct gallivm_state *gallivm = bld_base->base.gallivm;
1865    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1866    *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1867 }
1868 
bgnloop(struct lp_build_nir_context * bld_base)1869 static void bgnloop(struct lp_build_nir_context *bld_base)
1870 {
1871    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1872    lp_exec_bgnloop(&bld->exec_mask, true);
1873 }
1874 
endloop(struct lp_build_nir_context * bld_base)1875 static void endloop(struct lp_build_nir_context *bld_base)
1876 {
1877    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1878    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1879 }
1880 
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1881 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1882 {
1883    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1884    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1885    lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1886 }
1887 
else_stmt(struct lp_build_nir_context * bld_base)1888 static void else_stmt(struct lp_build_nir_context *bld_base)
1889 {
1890    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1891    lp_exec_mask_cond_invert(&bld->exec_mask);
1892 }
1893 
endif_stmt(struct lp_build_nir_context * bld_base)1894 static void endif_stmt(struct lp_build_nir_context *bld_base)
1895 {
1896    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1897    lp_exec_mask_cond_pop(&bld->exec_mask);
1898 }
1899 
break_stmt(struct lp_build_nir_context * bld_base)1900 static void break_stmt(struct lp_build_nir_context *bld_base)
1901 {
1902    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1903 
1904    lp_exec_break(&bld->exec_mask, NULL, false);
1905 }
1906 
continue_stmt(struct lp_build_nir_context * bld_base)1907 static void continue_stmt(struct lp_build_nir_context *bld_base)
1908 {
1909    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1910    lp_exec_continue(&bld->exec_mask);
1911 }
1912 
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1913 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1914 {
1915    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1916    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1917    LLVMValueRef mask;
1918 
1919    if (!cond) {
1920       if (bld->exec_mask.has_mask) {
1921          mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1922       } else {
1923          mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1924       }
1925    } else {
1926       mask = LLVMBuildNot(builder, cond, "");
1927       if (bld->exec_mask.has_mask) {
1928          LLVMValueRef invmask;
1929          invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1930          mask = LLVMBuildOr(builder, mask, invmask, "");
1931       }
1932    }
1933    lp_build_mask_update(bld->mask, mask);
1934 }
1935 
1936 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1937 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1938                           LLVMValueRef ptr,
1939                           LLVMValueRef mask)
1940 {
1941    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1942    LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
1943 
1944    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1945 
1946    LLVMBuildStore(builder, current_vec, ptr);
1947 }
1948 
1949 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1950 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1951                              LLVMValueRef ptr,
1952                              LLVMValueRef mask)
1953 {
1954    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1955    LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
1956 
1957    current_vec = lp_build_select(&bld_base->uint_bld,
1958                                  mask,
1959                                  bld_base->uint_bld.zero,
1960                                  current_vec);
1961 
1962    LLVMBuildStore(builder, current_vec, ptr);
1963 }
1964 
1965 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)1966 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1967                                   LLVMValueRef current_mask_vec,
1968                                   LLVMValueRef total_emitted_vertices_vec)
1969 {
1970    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1971    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1972    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1973                                             total_emitted_vertices_vec,
1974                                             bld->max_output_vertices_vec);
1975 
1976    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1977 }
1978 
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)1979 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1980 {
1981    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1982    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1983 
1984    if (stream_id >= bld->gs_vertex_streams)
1985       return;
1986    assert(bld->gs_iface->emit_vertex);
1987    LLVMValueRef total_emitted_vertices_vec =
1988       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1989    LLVMValueRef mask = mask_vec(bld_base);
1990    mask = clamp_mask_to_max_output_vertices(bld, mask,
1991                                             total_emitted_vertices_vec);
1992    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1993                               bld->outputs,
1994                               total_emitted_vertices_vec,
1995                               mask,
1996                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1997 
1998    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1999                              mask);
2000    increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2001                              mask);
2002 }
2003 
2004 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2005 end_primitive_masked(struct lp_build_nir_context * bld_base,
2006                      LLVMValueRef mask, uint32_t stream_id)
2007 {
2008    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2009    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2010 
2011    if (stream_id >= bld->gs_vertex_streams)
2012       return;
2013    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2014    LLVMValueRef emitted_vertices_vec =
2015       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2016    LLVMValueRef emitted_prims_vec =
2017       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2018    LLVMValueRef total_emitted_vertices_vec =
2019       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2020 
2021    LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2022                                             PIPE_FUNC_NOTEQUAL,
2023                                             emitted_vertices_vec,
2024                                             uint_bld->zero);
2025    mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2026    bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2027 				total_emitted_vertices_vec,
2028 				emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2029    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2030                              mask);
2031    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2032                                 mask);
2033 }
2034 
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2035 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2036 {
2037    ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2038 
2039    assert(bld->gs_iface->end_primitive);
2040 
2041    LLVMValueRef mask = mask_vec(bld_base);
2042    end_primitive_masked(bld_base, mask, stream_id);
2043 }
2044 
2045 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2046 emit_prologue(struct lp_build_nir_soa_context *bld)
2047 {
2048    struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2049    if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2050       uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
2051       unsigned index, chan;
2052       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2053       LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2054       bld->inputs_array = lp_build_array_alloca(gallivm,
2055                                                vec_type, array_size,
2056                                                "input_array");
2057 
2058       for (index = 0; index < num_inputs; ++index) {
2059          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2060             LLVMValueRef lindex =
2061                lp_build_const_int32(gallivm, index * 4 + chan);
2062             LLVMValueRef input_ptr =
2063                LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2064             LLVMValueRef value = bld->inputs[index][chan];
2065             if (value)
2066                LLVMBuildStore(gallivm->builder, value, input_ptr);
2067          }
2068       }
2069    }
2070 }
2071 
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2072 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2073                       nir_intrinsic_instr *instr, LLVMValueRef result[4])
2074 {
2075    struct gallivm_state * gallivm = bld_base->base.gallivm;
2076    LLVMBuilderRef builder = gallivm->builder;
2077    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2078    LLVMValueRef exec_mask = mask_vec(bld_base);
2079    struct lp_build_loop_state loop_state;
2080    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2081 
2082    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2083    LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2084    LLVMValueRef init_val = NULL;
2085    if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2086        instr->intrinsic == nir_intrinsic_vote_feq) {
2087       /* for equal we unfortunately have to loop and find the first valid one. */
2088       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2089       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2090 
2091       struct lp_build_if_state ifthen;
2092       lp_build_if(&ifthen, gallivm, if_cond);
2093       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2094                                                        loop_state.counter, "");
2095       LLVMBuildStore(builder, value_ptr, eq_store);
2096       LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2097       lp_build_endif(&ifthen);
2098       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2099                              NULL, LLVMIntUGE);
2100       init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2101    } else {
2102       LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2103    }
2104 
2105    LLVMValueRef res;
2106    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2107    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2108                                                        loop_state.counter, "");
2109    struct lp_build_if_state ifthen;
2110    LLVMValueRef if_cond;
2111    if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2112 
2113    lp_build_if(&ifthen, gallivm, if_cond);
2114    res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2115 
2116    if (instr->intrinsic == nir_intrinsic_vote_feq) {
2117       struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2118       LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2119                                        LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2120                                        LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2121       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2122       res = LLVMBuildAnd(builder, res, tmp, "");
2123    } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2124       LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2125       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2126       res = LLVMBuildAnd(builder, res, tmp, "");
2127    } else if (instr->intrinsic == nir_intrinsic_vote_any)
2128       res = LLVMBuildOr(builder, res, value_ptr, "");
2129    else
2130       res = LLVMBuildAnd(builder, res, value_ptr, "");
2131    LLVMBuildStore(builder, res, res_store);
2132    lp_build_endif(&ifthen);
2133    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2134                           NULL, LLVMIntUGE);
2135    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2136                                          LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2137 }
2138 
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2139 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2140 {
2141    struct gallivm_state * gallivm = bld_base->base.gallivm;
2142    LLVMBuilderRef builder = gallivm->builder;
2143    LLVMValueRef exec_mask = mask_vec(bld_base);
2144    struct lp_build_loop_state loop_state;
2145    src = LLVMBuildAnd(builder, src, exec_mask, "");
2146    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2147    LLVMValueRef res;
2148    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2149    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2150                                                     loop_state.counter, "");
2151    res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2152    res = LLVMBuildOr(builder,
2153                      res,
2154                      LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2155    LLVMBuildStore(builder, res, res_store);
2156 
2157    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2158                           NULL, LLVMIntUGE);
2159    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2160                                          LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2161 }
2162 
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2163 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2164 {
2165    struct gallivm_state *gallivm = bld_base->base.gallivm;
2166    LLVMBuilderRef builder = gallivm->builder;
2167    LLVMValueRef exec_mask = mask_vec(bld_base);
2168    struct lp_build_loop_state loop_state;
2169 
2170    LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2171    LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2172    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2173    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2174                                                     loop_state.counter, "");
2175    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2176                                      LLVMIntEQ,
2177                                      value_ptr,
2178                                      lp_build_const_int32(gallivm, -1), "");
2179    LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2180                                       LLVMIntEQ,
2181                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2182                                       lp_build_const_int32(gallivm, 0), "");
2183 
2184    cond = LLVMBuildAnd(builder, cond, cond2, "");
2185    struct lp_build_if_state ifthen;
2186    lp_build_if(&ifthen, gallivm, cond);
2187    LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2188    LLVMBuildStore(builder, loop_state.counter, idx_store);
2189    lp_build_endif(&ifthen);
2190    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2191                           NULL, LLVMIntUGE);
2192 
2193    result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2194                                       lp_build_const_int32(gallivm, -1),
2195                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2196                                       "");
2197 }
2198 
2199 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2200 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2201                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2202 {
2203    assert(instr->intrinsic == nir_intrinsic_shuffle);
2204 
2205    struct gallivm_state *gallivm = bld_base->base.gallivm;
2206    LLVMBuilderRef builder = gallivm->builder;
2207    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2208    uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2209    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2210 
2211    bool index_is_constant_data = LLVMIsAConstantAggregateZero(index) || LLVMIsAConstantDataSequential(index) || LLVMIsAUndefValue(index);
2212 
2213    if (index_is_constant_data) {
2214       /* freeze `src` in case inactive invocations contain poison */
2215       src = LLVMBuildFreeze(builder, src, "");
2216       result[0] = LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), index, "");
2217    } else if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2218       /* freeze `src` in case inactive invocations contain poison */
2219       src = LLVMBuildFreeze(builder, src, "");
2220       result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2221    } else {
2222       LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2223       struct lp_build_loop_state loop_state;
2224       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2225 
2226       LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2227 
2228       LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2229       /* freeze `src_value` in case an out-of-bounds index or an index into an
2230        * inactive invocation results in poison
2231        */
2232       src_value = LLVMBuildFreeze(builder, src_value, "");
2233 
2234       LLVMValueRef res = LLVMBuildLoad(builder, res_store, "");
2235       res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2236       LLVMBuildStore(builder, res, res_store);
2237 
2238       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2239                              NULL, LLVMIntUGE);
2240 
2241       result[0] = LLVMBuildLoad(builder, res_store, "");
2242    }
2243 }
2244 #endif
2245 
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2246 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2247                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2248 {
2249    struct gallivm_state *gallivm = bld_base->base.gallivm;
2250    LLVMBuilderRef builder = gallivm->builder;
2251    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2252    /* can't use llvm reduction intrinsics because of exec_mask */
2253    LLVMValueRef exec_mask = mask_vec(bld_base);
2254    struct lp_build_loop_state loop_state;
2255    nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2256 
2257    LLVMValueRef res_store = NULL;
2258    LLVMValueRef scan_store;
2259    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2260 
2261    if (instr->intrinsic != nir_intrinsic_reduce)
2262       res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2263 
2264    scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2265 
2266    struct lp_build_context elem_bld;
2267    bool is_flt = reduction_op == nir_op_fadd ||
2268       reduction_op == nir_op_fmul ||
2269       reduction_op == nir_op_fmin ||
2270       reduction_op == nir_op_fmax;
2271    bool is_unsigned = reduction_op == nir_op_umin ||
2272       reduction_op == nir_op_umax;
2273 
2274    struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2275       get_int_bld(bld_base, is_unsigned, bit_size);
2276 
2277    lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2278 
2279    LLVMValueRef store_val = NULL;
2280    /*
2281     * Put the identity value for the operation into the storage
2282     */
2283    switch (reduction_op) {
2284    case nir_op_fmin: {
2285       LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2286          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2287       store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2288       break;
2289    }
2290    case nir_op_fmax: {
2291       LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2292          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2293       store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2294       break;
2295    }
2296    case nir_op_fmul: {
2297       LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2298          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2299       store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2300       break;
2301    }
2302    case nir_op_umin:
2303       switch (bit_size) {
2304       case 8:
2305          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2306          break;
2307       case 16:
2308          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2309          break;
2310       case 32:
2311       default:
2312          store_val  = lp_build_const_int32(gallivm, UINT_MAX);
2313          break;
2314       case 64:
2315          store_val  = lp_build_const_int64(gallivm, UINT64_MAX);
2316          break;
2317       }
2318       break;
2319    case nir_op_imin:
2320       switch (bit_size) {
2321       case 8:
2322          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2323          break;
2324       case 16:
2325          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2326          break;
2327       case 32:
2328       default:
2329          store_val  = lp_build_const_int32(gallivm, INT_MAX);
2330          break;
2331       case 64:
2332          store_val  = lp_build_const_int64(gallivm, INT64_MAX);
2333          break;
2334       }
2335       break;
2336    case nir_op_imax:
2337       switch (bit_size) {
2338       case 8:
2339          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2340          break;
2341       case 16:
2342          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2343          break;
2344       case 32:
2345       default:
2346          store_val  = lp_build_const_int32(gallivm, INT_MIN);
2347          break;
2348       case 64:
2349          store_val  = lp_build_const_int64(gallivm, INT64_MIN);
2350          break;
2351       }
2352       break;
2353    case nir_op_imul:
2354       switch (bit_size) {
2355       case 8:
2356          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2357          break;
2358       case 16:
2359          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2360          break;
2361       case 32:
2362       default:
2363          store_val  = lp_build_const_int32(gallivm, 1);
2364          break;
2365       case 64:
2366          store_val  = lp_build_const_int64(gallivm, 1);
2367          break;
2368       }
2369       break;
2370    case nir_op_iand:
2371       switch (bit_size) {
2372       case 8:
2373          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2374          break;
2375       case 16:
2376          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2377          break;
2378       case 32:
2379       default:
2380          store_val  = lp_build_const_int32(gallivm, 0xffffffff);
2381          break;
2382       case 64:
2383          store_val  = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2384          break;
2385       }
2386       break;
2387    default:
2388       break;
2389    }
2390    if (store_val)
2391       LLVMBuildStore(builder, store_val, scan_store);
2392 
2393    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2394 
2395    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2396 
2397    struct lp_build_if_state ifthen;
2398    LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2399    lp_build_if(&ifthen, gallivm, if_cond);
2400    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2401 
2402    LLVMValueRef res = NULL;
2403    LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2404    if (instr->intrinsic != nir_intrinsic_reduce)
2405       res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2406 
2407    if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2408       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2409 
2410    if (is_flt) {
2411       scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2412       value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2413    }
2414    switch (reduction_op) {
2415    case nir_op_fadd:
2416    case nir_op_iadd:
2417       scan_val = lp_build_add(&elem_bld, value, scan_val);
2418       break;
2419    case nir_op_fmul:
2420    case nir_op_imul:
2421       scan_val = lp_build_mul(&elem_bld, value, scan_val);
2422       break;
2423    case nir_op_imin:
2424    case nir_op_umin:
2425    case nir_op_fmin:
2426       scan_val = lp_build_min(&elem_bld, value, scan_val);
2427       break;
2428    case nir_op_imax:
2429    case nir_op_umax:
2430    case nir_op_fmax:
2431       scan_val = lp_build_max(&elem_bld, value, scan_val);
2432       break;
2433    case nir_op_iand:
2434       scan_val = lp_build_and(&elem_bld, value, scan_val);
2435       break;
2436    case nir_op_ior:
2437       scan_val = lp_build_or(&elem_bld, value, scan_val);
2438       break;
2439    case nir_op_ixor:
2440       scan_val = lp_build_xor(&elem_bld, value, scan_val);
2441       break;
2442    default:
2443       assert(0);
2444       break;
2445    }
2446    if (is_flt)
2447       scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2448    LLVMBuildStore(builder, scan_val, scan_store);
2449 
2450    if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2451       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2452    }
2453 
2454    if (instr->intrinsic != nir_intrinsic_reduce)
2455       LLVMBuildStore(builder, res, res_store);
2456    lp_build_endif(&ifthen);
2457 
2458    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2459                           NULL, LLVMIntUGE);
2460    if (instr->intrinsic == nir_intrinsic_reduce)
2461       result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad2(builder, int_bld->elem_type, scan_store, ""));
2462    else
2463       result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2464 }
2465 
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2466 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2467                                  LLVMValueRef src,
2468                                  unsigned bit_size,
2469                                  LLVMValueRef invoc,
2470                                  LLVMValueRef result[4])
2471 {
2472    struct gallivm_state *gallivm = bld_base->base.gallivm;
2473    LLVMBuilderRef builder = gallivm->builder;
2474    LLVMValueRef idx;
2475    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2476 
2477    /* have to find the first active invocation */
2478    LLVMValueRef exec_mask = mask_vec(bld_base);
2479    struct lp_build_loop_state loop_state;
2480    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2481    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2482    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));
2483 
2484    LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2485    struct lp_build_if_state ifthen;
2486 
2487    lp_build_if(&ifthen, gallivm, if_cond);
2488    LLVMValueRef store_val = loop_state.counter;
2489    if (invoc)
2490       store_val = LLVMBuildExtractElement(gallivm->builder, invoc, loop_state.counter, "");
2491    LLVMBuildStore(builder, store_val, res_store);
2492    lp_build_endif(&ifthen);
2493 
2494    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),
2495                           lp_build_const_int32(gallivm, -1), LLVMIntEQ);
2496    idx = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2497 
2498    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2499                                                 src, idx, "");
2500    result[0] = lp_build_broadcast_scalar(uint_bld, value);
2501 }
2502 
2503 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2504 emit_interp_at(struct lp_build_nir_context *bld_base,
2505                unsigned num_components,
2506                nir_variable *var,
2507                bool centroid,
2508                bool sample,
2509                unsigned const_index,
2510                LLVMValueRef indir_index,
2511                LLVMValueRef offsets[2],
2512                LLVMValueRef dst[4])
2513 {
2514    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2515 
2516    for (unsigned i = 0; i < num_components; i++) {
2517       dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2518                                         const_index + var->data.driver_location, i + var->data.location_frac,
2519                                         centroid, sample, indir_index, offsets);
2520    }
2521 }
2522 
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2523 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2524                                                struct lp_type type,
2525                                                unsigned scratch_size)
2526 {
2527    LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2528    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2529    unsigned i;
2530 
2531    if (type.length == 1)
2532       return LLVMConstInt(elem_type, 0, 0);
2533 
2534    for (i = 0; i < type.length; ++i)
2535       elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2536 
2537    return LLVMConstVector(elems, type.length);
2538 }
2539 
2540 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2541 emit_load_scratch(struct lp_build_nir_context *bld_base,
2542                   unsigned nc, unsigned bit_size,
2543                   LLVMValueRef offset,
2544                   LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2545 {
2546    struct gallivm_state * gallivm = bld_base->base.gallivm;
2547    LLVMBuilderRef builder = gallivm->builder;
2548    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2549    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2550    struct lp_build_context *load_bld;
2551    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2552    uint32_t shift_val = bit_size_to_shift_size(bit_size);
2553    LLVMValueRef exec_mask = mask_vec(bld_base);
2554 
2555    load_bld = get_int_bld(bld_base, true, bit_size);
2556 
2557    offset = lp_build_add(uint_bld, offset, thread_offsets);
2558    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2559    for (unsigned c = 0; c < nc; c++) {
2560       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2561 
2562       LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
2563       struct lp_build_loop_state loop_state;
2564       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2565 
2566       struct lp_build_if_state ifthen;
2567       LLVMValueRef cond, temp_res;
2568 
2569       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2570                                            loop_state.counter, "");
2571       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2572       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2573 
2574       lp_build_if(&ifthen, gallivm, cond);
2575       LLVMValueRef scalar;
2576       LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
2577       scalar = lp_build_pointer_get(builder, ptr2, loop_index);
2578 
2579       temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result, "");
2580       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
2581       LLVMBuildStore(builder, temp_res, result);
2582       lp_build_else(&ifthen);
2583       temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result, "");
2584       LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
2585       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
2586       LLVMBuildStore(builder, temp_res, result);
2587       lp_build_endif(&ifthen);
2588       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2589                                 NULL, LLVMIntUGE);
2590       outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result, "");
2591    }
2592 }
2593 
2594 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2595 emit_store_scratch(struct lp_build_nir_context *bld_base,
2596                    unsigned writemask, unsigned nc,
2597                    unsigned bit_size, LLVMValueRef offset,
2598                    LLVMValueRef dst)
2599 {
2600    struct gallivm_state * gallivm = bld_base->base.gallivm;
2601    LLVMBuilderRef builder = gallivm->builder;
2602    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2603    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2604    struct lp_build_context *store_bld;
2605    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2606    uint32_t shift_val = bit_size_to_shift_size(bit_size);
2607    store_bld = get_int_bld(bld_base, true, bit_size);
2608 
2609    LLVMValueRef exec_mask = mask_vec(bld_base);
2610    offset = lp_build_add(uint_bld, offset, thread_offsets);
2611    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2612 
2613    for (unsigned c = 0; c < nc; c++) {
2614       if (!(writemask & (1u << c)))
2615          continue;
2616       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2617       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2618 
2619       struct lp_build_loop_state loop_state;
2620       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2621 
2622       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
2623                                                        loop_state.counter, "");
2624       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
2625 
2626       struct lp_build_if_state ifthen;
2627       LLVMValueRef cond;
2628 
2629       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2630                                                         loop_state.counter, "");
2631 
2632       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2633       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2634       lp_build_if(&ifthen, gallivm, cond);
2635 
2636       LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
2637       lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
2638 
2639       lp_build_endif(&ifthen);
2640       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2641                              NULL, LLVMIntUGE);
2642    }
2643 }
2644 
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2645 void lp_build_nir_soa(struct gallivm_state *gallivm,
2646                       struct nir_shader *shader,
2647                       const struct lp_build_tgsi_params *params,
2648                       LLVMValueRef (*outputs)[4])
2649 {
2650    struct lp_build_nir_soa_context bld;
2651    const struct lp_type type = params->type;
2652    struct lp_type res_type;
2653 
2654    assert(type.length <= LP_MAX_VECTOR_LENGTH);
2655    memset(&res_type, 0, sizeof res_type);
2656    res_type.width = type.width;
2657    res_type.length = type.length;
2658    res_type.sign = 1;
2659 
2660    /* Setup build context */
2661    memset(&bld, 0, sizeof bld);
2662    lp_build_context_init(&bld.bld_base.base, gallivm, type);
2663    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2664    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2665    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2666    lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2667    {
2668       struct lp_type dbl_type;
2669       dbl_type = type;
2670       dbl_type.width *= 2;
2671       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2672    }
2673    {
2674       struct lp_type half_type;
2675       half_type = type;
2676       half_type.width /= 2;
2677       lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2678    }
2679    {
2680       struct lp_type uint64_type;
2681       uint64_type = lp_uint_type(type);
2682       uint64_type.width *= 2;
2683       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2684    }
2685    {
2686       struct lp_type int64_type;
2687       int64_type = lp_int_type(type);
2688       int64_type.width *= 2;
2689       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2690    }
2691    {
2692       struct lp_type uint16_type;
2693       uint16_type = lp_uint_type(type);
2694       uint16_type.width /= 2;
2695       lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2696    }
2697    {
2698       struct lp_type int16_type;
2699       int16_type = lp_int_type(type);
2700       int16_type.width /= 2;
2701       lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2702    }
2703    {
2704       struct lp_type uint8_type;
2705       uint8_type = lp_uint_type(type);
2706       uint8_type.width /= 4;
2707       lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2708    }
2709    {
2710       struct lp_type int8_type;
2711       int8_type = lp_int_type(type);
2712       int8_type.width /= 4;
2713       lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2714    }
2715    bld.bld_base.load_var = emit_load_var;
2716    bld.bld_base.store_var = emit_store_var;
2717    bld.bld_base.load_reg = emit_load_reg;
2718    bld.bld_base.store_reg = emit_store_reg;
2719    bld.bld_base.emit_var_decl = emit_var_decl;
2720    bld.bld_base.load_ubo = emit_load_ubo;
2721    bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2722    bld.bld_base.load_global = emit_load_global;
2723    bld.bld_base.store_global = emit_store_global;
2724    bld.bld_base.atomic_global = emit_atomic_global;
2725    bld.bld_base.tex = emit_tex;
2726    bld.bld_base.tex_size = emit_tex_size;
2727    bld.bld_base.bgnloop = bgnloop;
2728    bld.bld_base.endloop = endloop;
2729    bld.bld_base.if_cond = if_cond;
2730    bld.bld_base.else_stmt = else_stmt;
2731    bld.bld_base.endif_stmt = endif_stmt;
2732    bld.bld_base.break_stmt = break_stmt;
2733    bld.bld_base.continue_stmt = continue_stmt;
2734    bld.bld_base.sysval_intrin = emit_sysval_intrin;
2735    bld.bld_base.discard = discard;
2736    bld.bld_base.emit_vertex = emit_vertex;
2737    bld.bld_base.end_primitive = end_primitive;
2738    bld.bld_base.load_mem = emit_load_mem;
2739    bld.bld_base.store_mem = emit_store_mem;
2740    bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2741    bld.bld_base.atomic_mem = emit_atomic_mem;
2742    bld.bld_base.barrier = emit_barrier;
2743    bld.bld_base.image_op = emit_image_op;
2744    bld.bld_base.image_size = emit_image_size;
2745    bld.bld_base.vote = emit_vote;
2746    bld.bld_base.elect = emit_elect;
2747    bld.bld_base.reduce = emit_reduce;
2748    bld.bld_base.ballot = emit_ballot;
2749 #if LLVM_VERSION_MAJOR >= 10
2750    bld.bld_base.shuffle = emit_shuffle;
2751 #endif
2752    bld.bld_base.read_invocation = emit_read_invocation;
2753    bld.bld_base.helper_invocation = emit_helper_invocation;
2754    bld.bld_base.interp_at = emit_interp_at;
2755    bld.bld_base.load_scratch = emit_load_scratch;
2756    bld.bld_base.store_scratch = emit_store_scratch;
2757    bld.bld_base.load_const = emit_load_const;
2758 
2759    bld.mask = params->mask;
2760    bld.inputs = params->inputs;
2761    bld.outputs = outputs;
2762    bld.consts_ptr = params->consts_ptr;
2763    bld.const_sizes_ptr = params->const_sizes_ptr;
2764    bld.ssbo_ptr = params->ssbo_ptr;
2765    bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2766    bld.sampler = params->sampler;
2767 //   bld.bld_base.info = params->info;
2768 
2769    bld.context_ptr = params->context_ptr;
2770    bld.thread_data_ptr = params->thread_data_ptr;
2771    bld.bld_base.aniso_filter_table = params->aniso_filter_table;
2772    bld.image = params->image;
2773    bld.shared_ptr = params->shared_ptr;
2774    bld.coro = params->coro;
2775    bld.kernel_args_ptr = params->kernel_args;
2776    bld.indirects = 0;
2777    if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2778       bld.indirects |= nir_var_shader_in;
2779 
2780    bld.gs_iface = params->gs_iface;
2781    bld.tcs_iface = params->tcs_iface;
2782    bld.tes_iface = params->tes_iface;
2783    bld.fs_iface = params->fs_iface;
2784    if (bld.gs_iface) {
2785       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2786 
2787       bld.gs_vertex_streams = params->gs_vertex_streams;
2788       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2789                                                            shader->info.gs.vertices_out);
2790       for (int i = 0; i < params->gs_vertex_streams; i++) {
2791          bld.emitted_prims_vec_ptr[i] =
2792             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2793          bld.emitted_vertices_vec_ptr[i] =
2794             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2795          bld.total_emitted_vertices_vec_ptr[i] =
2796             lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2797       }
2798    }
2799    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2800 
2801    bld.system_values = *params->system_values;
2802 
2803    bld.bld_base.shader = shader;
2804 
2805    bld.scratch_size = ALIGN(shader->scratch_size, 8);
2806    if (shader->scratch_size) {
2807       bld.scratch_ptr = lp_build_array_alloca(gallivm,
2808                                               LLVMInt8TypeInContext(gallivm->context),
2809                                               lp_build_const_int32(gallivm, bld.scratch_size * type.length),
2810                                               "scratch");
2811    }
2812 
2813    emit_prologue(&bld);
2814    lp_build_nir_llvm(&bld.bld_base, shader);
2815 
2816    if (bld.gs_iface) {
2817       LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2818       LLVMValueRef total_emitted_vertices_vec;
2819       LLVMValueRef emitted_prims_vec;
2820 
2821       for (int i = 0; i < params->gs_vertex_streams; i++) {
2822          end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2823 
2824          total_emitted_vertices_vec =
2825             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
2826 
2827          emitted_prims_vec =
2828             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
2829          bld.gs_iface->gs_epilogue(bld.gs_iface,
2830                                    total_emitted_vertices_vec,
2831                                    emitted_prims_vec, i);
2832       }
2833    }
2834    lp_exec_mask_fini(&bld.exec_mask);
2835 }
2836