• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "util/u_math.h"
38 
bit_size_to_shift_size(int bit_size)39 static int bit_size_to_shift_size(int bit_size)
40 {
41    switch (bit_size) {
42    case 64:
43       return 3;
44    default:
45    case 32:
46       return 2;
47    case 16:
48       return 1;
49    case 8:
50       return 0;
51    }
52 }
53 
54 /*
55  * combine the execution mask if there is one with the current mask.
56  */
57 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)58 mask_vec(struct lp_build_nir_context *bld_base)
59 {
60    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
61    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
62    struct lp_exec_mask *exec_mask = &bld->exec_mask;
63    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
64    if (!exec_mask->has_mask) {
65       return bld_mask;
66    }
67    if (!bld_mask)
68       return exec_mask->exec_mask;
69    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
70                        exec_mask->exec_mask, "");
71 }
72 
73 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)74 emit_fetch_64bit(
75    struct lp_build_nir_context * bld_base,
76    LLVMValueRef input,
77    LLVMValueRef input2)
78 {
79    struct gallivm_state *gallivm = bld_base->base.gallivm;
80    LLVMBuilderRef builder = gallivm->builder;
81    LLVMValueRef res;
82    int i;
83    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
84    int len = bld_base->base.type.length * 2;
85    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
86 
87    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
88 #if UTIL_ARCH_LITTLE_ENDIAN
89       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
90       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
91 #else
92       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
93       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
94 #endif
95    }
96    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
97 
98    return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
99 }
100 
101 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])102 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
103                        LLVMValueRef value,
104                        LLVMValueRef split_values[2])
105 {
106    struct gallivm_state *gallivm = bld_base->base.gallivm;
107    LLVMBuilderRef builder = gallivm->builder;
108    unsigned i;
109    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
110    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
111    int len = bld_base->base.type.length * 2;
112 
113    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
114    for (i = 0; i < bld_base->base.type.length; i++) {
115 #if UTIL_ARCH_LITTLE_ENDIAN
116       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
117       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
118 #else
119       shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
120       shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
121 #endif
122    }
123 
124    split_values[0] = LLVMBuildShuffleVector(builder, value,
125                                  LLVMGetUndef(LLVMTypeOf(value)),
126                                  LLVMConstVector(shuffles,
127                                                  bld_base->base.type.length),
128                                  "");
129    split_values[1] = LLVMBuildShuffleVector(builder, value,
130                                   LLVMGetUndef(LLVMTypeOf(value)),
131                                   LLVMConstVector(shuffles2,
132                                                   bld_base->base.type.length),
133                                   "");
134 }
135 
136 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)137 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
138                       LLVMValueRef chan_ptr,
139                       LLVMValueRef chan_ptr2,
140                       LLVMValueRef value)
141 {
142    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
143    struct lp_build_context *float_bld = &bld_base->base;
144    LLVMValueRef split_vals[2];
145 
146    emit_store_64bit_split(bld_base, value, split_vals);
147 
148    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
149    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
150 }
151 
152 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)153 get_soa_array_offsets(struct lp_build_context *uint_bld,
154                       LLVMValueRef indirect_index,
155                       int num_components,
156                       unsigned chan_index,
157                       bool need_perelement_offset)
158 {
159    struct gallivm_state *gallivm = uint_bld->gallivm;
160    LLVMValueRef chan_vec =
161       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
162    LLVMValueRef length_vec =
163       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
164    LLVMValueRef index_vec;
165 
166    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
167    index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
168    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
169    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
170 
171    if (need_perelement_offset) {
172       LLVMValueRef pixel_offsets;
173       unsigned i;
174      /* build pixel offset vector: {0, 1, 2, 3, ...} */
175       pixel_offsets = uint_bld->undef;
176       for (i = 0; i < uint_bld->type.length; i++) {
177          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
178          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
179                                                 ii, ii, "");
180       }
181       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
182    }
183    return index_vec;
184 }
185 
186 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)187 build_gather(struct lp_build_nir_context *bld_base,
188              struct lp_build_context *bld,
189              LLVMValueRef base_ptr,
190              LLVMValueRef indexes,
191              LLVMValueRef overflow_mask,
192              LLVMValueRef indexes2)
193 {
194    struct gallivm_state *gallivm = bld_base->base.gallivm;
195    LLVMBuilderRef builder = gallivm->builder;
196    struct lp_build_context *uint_bld = &bld_base->uint_bld;
197    LLVMValueRef res;
198    unsigned i;
199 
200    if (indexes2)
201       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
202    else
203       res = bld->undef;
204    /*
205     * overflow_mask is a vector telling us which channels
206     * in the vector overflowed. We use the overflow behavior for
207     * constant buffers which is defined as:
208     * Out of bounds access to constant buffer returns 0 in all
209     * components. Out of bounds behavior is always with respect
210     * to the size of the buffer bound at that slot.
211     */
212 
213    if (overflow_mask) {
214       /*
215        * We avoid per-element control flow here (also due to llvm going crazy,
216        * though I suspect it's better anyway since overflow is likely rare).
217        * Note that since we still fetch from buffers even if num_elements was
218        * zero (in this case we'll fetch from index zero) the jit func callers
219        * MUST provide valid fake constant buffers of size 4x32 (the values do
220        * not matter), otherwise we'd still need (not per element though)
221        * control flow.
222        */
223       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
224       if (indexes2)
225          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
226    }
227 
228    /*
229     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
230     */
231    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
232       LLVMValueRef si, di;
233       LLVMValueRef index;
234       LLVMValueRef scalar_ptr, scalar;
235 
236       di = lp_build_const_int32(gallivm, i);
237       if (indexes2)
238          si = lp_build_const_int32(gallivm, i >> 1);
239       else
240          si = di;
241 
242       if (indexes2 && (i & 1)) {
243          index = LLVMBuildExtractElement(builder,
244                                          indexes2, si, "");
245       } else {
246          index = LLVMBuildExtractElement(builder,
247                                          indexes, si, "");
248       }
249       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
250                                 &index, 1, "gather_ptr");
251       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
252 
253       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
254    }
255 
256    if (overflow_mask) {
257       if (indexes2) {
258          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
259          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
260                                        bld_base->dbl_bld.int_vec_type, "");
261          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
262                                bld_base->dbl_bld.zero, res);
263       } else
264          res = lp_build_select(bld, overflow_mask, bld->zero, res);
265    }
266 
267    return res;
268 }
269 
270 /**
271  * Scatter/store vector.
272  */
273 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)274 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
275                   LLVMValueRef base_ptr,
276                   LLVMValueRef indexes,
277                   LLVMValueRef values,
278                   struct lp_exec_mask *mask)
279 {
280    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
281    LLVMBuilderRef builder = gallivm->builder;
282    unsigned i;
283    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
284 
285    /*
286     * Loop over elements of index_vec, store scalar value.
287     */
288    for (i = 0; i < bld->bld_base.base.type.length; i++) {
289       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
290       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
291       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
292       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
293       LLVMValueRef scalar_pred = pred ?
294          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
295 
296       if (0)
297          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
298                          ii, val, index, scalar_ptr);
299 
300       if (scalar_pred) {
301          LLVMValueRef real_val, dst_val;
302          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
303          scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
304          real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
305          LLVMBuildStore(builder, real_val, scalar_ptr);
306       }
307       else {
308          LLVMBuildStore(builder, val, scalar_ptr);
309       }
310    }
311 }
312 
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])313 static void emit_load_var(struct lp_build_nir_context *bld_base,
314                            nir_variable_mode deref_mode,
315                            unsigned num_components,
316                            unsigned bit_size,
317                            nir_variable *var,
318                            unsigned vertex_index,
319                            LLVMValueRef indir_vertex_index,
320                            unsigned const_index,
321                            LLVMValueRef indir_index,
322                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
323 {
324    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
325    struct gallivm_state *gallivm = bld_base->base.gallivm;
326    int dmul = bit_size == 64 ? 2 : 1;
327    unsigned location = var->data.driver_location;
328    unsigned location_frac = var->data.location_frac;
329 
330    if (!var->data.compact && !indir_index)
331       location += const_index;
332    else if (var->data.compact) {
333       location += const_index / 4;
334       location_frac += const_index % 4;
335       const_index = 0;
336    }
337    switch (deref_mode) {
338    case nir_var_shader_in:
339       for (unsigned i = 0; i < num_components; i++) {
340          int idx = (i * dmul) + location_frac;
341          int comp_loc = location;
342 
343          if (bit_size == 64 && idx >= 4) {
344             comp_loc++;
345             idx = idx % 4;
346          }
347 
348          if (bld->gs_iface) {
349             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
350             LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
351             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
352             LLVMValueRef result2;
353 
354             result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
355                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
356             if (bit_size == 64) {
357                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
358                result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
359                                                     false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
360                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
361             }
362          } else if (bld->tes_iface) {
363             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
364             LLVMValueRef attrib_index_val;
365             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
366             LLVMValueRef result2;
367 
368             if (indir_index) {
369                if (var->data.compact) {
370                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
371                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
372                } else
373                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
374             } else
375                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
376 
377             if (var->data.patch) {
378                result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
379                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
380                if (bit_size == 64) {
381                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
382                   result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
383                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
384                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
385                }
386             }
387             else {
388                result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
389                                                               indir_vertex_index ? true : false,
390                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
391                                                               (indir_index && !var->data.compact) ? true : false, attrib_index_val,
392                                                               (indir_index && var->data.compact) ? true : false, swizzle_index_val);
393                if (bit_size == 64) {
394                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
395                   result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
396                                                                indir_vertex_index ? true : false,
397                                                                indir_vertex_index ? indir_vertex_index : vertex_index_val,
398                                                                indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
399                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
400                }
401             }
402          } else if (bld->tcs_iface) {
403             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
404             LLVMValueRef attrib_index_val;
405             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
406 
407             if (indir_index) {
408                if (var->data.compact) {
409                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
410                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
411                } else
412                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
413             } else
414                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
415             result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
416                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
417                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
418                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val);
419             if (bit_size == 64) {
420                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
421                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
422                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
423                                                                        indir_index ? true : false, attrib_index_val,
424                                                                        false, swizzle_index_val);
425                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
426             }
427          } else {
428             if (indir_index) {
429                LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
430                LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
431                                                               attrib_index_val, 4, idx,
432                                                               TRUE);
433                LLVMValueRef index_vec2 = NULL;
434                LLVMTypeRef fptr_type;
435                LLVMValueRef inputs_array;
436                fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
437                inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
438 
439                if (bit_size == 64)
440                   index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
441                                                      indir_index, 4, idx + 1, TRUE);
442 
443                /* Gather values from the input register array */
444                result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
445             } else {
446                if (bld->indirects & nir_var_shader_in) {
447                   LLVMValueRef lindex = lp_build_const_int32(gallivm,
448                                                              comp_loc * 4 + idx);
449                   LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
450                                                              bld->inputs_array, lindex);
451                   if (bit_size == 64) {
452                      LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
453                                                                  comp_loc * 4 + (idx + 1));
454                      LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
455                                                                     bld->inputs_array, lindex2);
456                      result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
457                   } else {
458                      result[i] = input_ptr;
459                   }
460                } else {
461                   if (bit_size == 64) {
462                      LLVMValueRef tmp[2];
463                      tmp[0] = bld->inputs[comp_loc][idx];
464                      tmp[1] = bld->inputs[comp_loc][idx + 1];
465                      result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
466                   } else {
467                      result[i] = bld->inputs[comp_loc][idx];
468                   }
469                }
470             }
471          }
472       }
473       break;
474    case nir_var_shader_out:
475       if (bld->fs_iface && bld->fs_iface->fb_fetch) {
476          bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
477          return;
478       }
479       for (unsigned i = 0; i < num_components; i++) {
480          int idx = (i * dmul) + location_frac;
481          if (bld->tcs_iface) {
482             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
483             LLVMValueRef attrib_index_val;
484             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
485 
486             if (indir_index)
487                attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
488             else
489                attrib_index_val = lp_build_const_int32(gallivm, location);
490 
491             result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
492                                                           indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
493                                                           (indir_index && !var->data.compact) ? true : false, attrib_index_val,
494                                                           (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
495             if (bit_size == 64) {
496                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
497                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
498                                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
499                                                                         indir_index ? true : false, attrib_index_val,
500                                                                         false, swizzle_index_val, 0);
501                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
502             }
503          }
504       }
505       break;
506    default:
507       break;
508    }
509 }
510 
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)511 static void emit_store_chan(struct lp_build_nir_context *bld_base,
512                             nir_variable_mode deref_mode,
513                             unsigned bit_size,
514                             unsigned location, unsigned comp,
515                             unsigned chan,
516                             LLVMValueRef dst)
517 {
518    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
519    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
520    struct lp_build_context *float_bld = &bld_base->base;
521 
522    if (bit_size == 64) {
523       chan *= 2;
524       chan += comp;
525       if (chan >= 4) {
526          chan -= 4;
527          location++;
528       }
529       emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
530                             bld->outputs[location][chan + 1], dst);
531    } else {
532       dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
533       lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
534                          bld->outputs[location][chan + comp]);
535    }
536 }
537 
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)538 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
539                                 bool is_compact,
540                                 unsigned bit_size,
541                                 unsigned location,
542                                 unsigned const_index,
543                                 LLVMValueRef indir_vertex_index,
544                                 LLVMValueRef indir_index,
545                                 unsigned comp,
546                                 unsigned chan,
547                                 LLVMValueRef chan_val)
548 {
549    struct gallivm_state *gallivm = bld_base->base.gallivm;
550    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
551    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
552    unsigned swizzle = chan;
553    if (bit_size == 64) {
554       swizzle *= 2;
555       swizzle += comp;
556       if (swizzle >= 4) {
557          swizzle -= 4;
558          location++;
559       }
560    } else
561       swizzle += comp;
562    LLVMValueRef attrib_index_val;
563    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
564 
565    if (indir_index) {
566       if (is_compact) {
567          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
568          attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
569       } else
570          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
571    } else
572       attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
573    if (bit_size == 64) {
574       LLVMValueRef split_vals[2];
575       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
576       emit_store_64bit_split(bld_base, chan_val, split_vals);
577       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
578                                         indir_vertex_index ? true : false,
579                                         indir_vertex_index,
580                                         indir_index ? true : false,
581                                         attrib_index_val,
582                                         false, swizzle_index_val,
583                                         split_vals[0], mask_vec(bld_base));
584       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
585                                         indir_vertex_index ? true : false,
586                                         indir_vertex_index,
587                                         indir_index ? true : false,
588                                         attrib_index_val,
589                                         false, swizzle_index_val2,
590                                         split_vals[1], mask_vec(bld_base));
591    } else {
592       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
593       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
594                                         indir_vertex_index ? true : false,
595                                         indir_vertex_index,
596                                         indir_index && !is_compact ? true : false,
597                                         attrib_index_val,
598                                         indir_index && is_compact ? true : false,
599                                         swizzle_index_val,
600                                         chan_val, mask_vec(bld_base));
601    }
602 }
603 
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)604 static void emit_store_var(struct lp_build_nir_context *bld_base,
605                            nir_variable_mode deref_mode,
606                            unsigned num_components,
607                            unsigned bit_size,
608                            nir_variable *var,
609                            unsigned writemask,
610                            LLVMValueRef indir_vertex_index,
611                            unsigned const_index,
612                            LLVMValueRef indir_index,
613                            LLVMValueRef dst)
614 {
615    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
616    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
617    switch (deref_mode) {
618    case nir_var_shader_out: {
619       unsigned location = var->data.driver_location;
620       unsigned comp = var->data.location_frac;
621       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
622          if (var->data.location == FRAG_RESULT_STENCIL)
623             comp = 1;
624          else if (var->data.location == FRAG_RESULT_DEPTH)
625             comp = 2;
626       }
627 
628       if (var->data.compact) {
629          location += const_index / 4;
630          comp += const_index % 4;
631          const_index = 0;
632       }
633 
634       for (unsigned chan = 0; chan < num_components; chan++) {
635          if (writemask & (1u << chan)) {
636             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
637             if (bld->tcs_iface) {
638                emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
639             } else
640                emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
641          }
642       }
643       break;
644    }
645    default:
646       break;
647    }
648 }
649 
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_src * reg,LLVMValueRef indir_src,LLVMValueRef reg_storage)650 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
651                                   struct lp_build_context *reg_bld,
652                                   const nir_reg_src *reg,
653                                   LLVMValueRef indir_src,
654                                   LLVMValueRef reg_storage)
655 {
656    struct gallivm_state *gallivm = bld_base->base.gallivm;
657    LLVMBuilderRef builder = gallivm->builder;
658    int nc = reg->reg->num_components;
659    LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
660    struct lp_build_context *uint_bld = &bld_base->uint_bld;
661    if (reg->reg->num_array_elems) {
662       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
663       if (reg->indirect) {
664          LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
665          indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
666          indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
667       }
668       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
669       for (unsigned i = 0; i < nc; i++) {
670          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
671          vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
672       }
673    } else {
674       for (unsigned i = 0; i < nc; i++) {
675          LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
676                                                                                     lp_build_const_int32(gallivm, i));
677          vals[i] = LLVMBuildLoad(builder, this_storage, "");
678       }
679    }
680    return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
681 }
682 
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_dest * reg,unsigned writemask,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])683 static void emit_store_reg(struct lp_build_nir_context *bld_base,
684                            struct lp_build_context *reg_bld,
685                            const nir_reg_dest *reg,
686                            unsigned writemask,
687                            LLVMValueRef indir_src,
688                            LLVMValueRef reg_storage,
689                            LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
690 {
691    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
692    struct gallivm_state *gallivm = bld_base->base.gallivm;
693    LLVMBuilderRef builder = gallivm->builder;
694    struct lp_build_context *uint_bld = &bld_base->uint_bld;
695    int nc = reg->reg->num_components;
696    if (reg->reg->num_array_elems > 0) {
697       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
698       if (reg->indirect) {
699          LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
700          indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
701          indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
702       }
703       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
704       for (unsigned i = 0; i < nc; i++) {
705          if (!(writemask & (1 << i)))
706             continue;
707          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
708          dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
709          emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
710       }
711       return;
712    }
713 
714    for (unsigned i = 0; i < nc; i++) {
715       LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
716                                                          lp_build_const_int32(gallivm, i));
717       dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
718       lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
719    }
720 }
721 
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])722 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
723                                  unsigned nc,
724                                  unsigned bit_size,
725                                  unsigned offset_bit_size,
726                                  bool offset_is_uniform,
727                                  LLVMValueRef offset,
728                                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
729 {
730    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
731    struct gallivm_state *gallivm = bld_base->base.gallivm;
732    LLVMBuilderRef builder = gallivm->builder;
733    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
734    LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
735    unsigned size_shift = bit_size_to_shift_size(bit_size);
736    struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
737    if (size_shift)
738       offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
739 
740    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
741    kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
742 
743    if (offset_is_uniform) {
744       offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
745 
746       for (unsigned c = 0; c < nc; c++) {
747          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
748 
749          LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
750          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
751       }
752    }
753 }
754 
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)755 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
756 {
757    LLVMBuilderRef builder = gallivm->builder;
758    switch (bit_size) {
759    case 8:
760       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
761       break;
762    case 16:
763       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
764       break;
765    case 32:
766    default:
767       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
768       break;
769    case 64:
770       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
771       break;
772    }
773    return addr_ptr;
774 }
775 
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])776 static void emit_load_global(struct lp_build_nir_context *bld_base,
777                              unsigned nc,
778                              unsigned bit_size,
779                              unsigned addr_bit_size,
780                              LLVMValueRef addr,
781                              LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
782 {
783    struct gallivm_state *gallivm = bld_base->base.gallivm;
784    LLVMBuilderRef builder = gallivm->builder;
785    struct lp_build_context *uint_bld = &bld_base->uint_bld;
786    struct lp_build_context *res_bld;
787 
788    res_bld = get_int_bld(bld_base, true, bit_size);
789 
790    for (unsigned c = 0; c < nc; c++) {
791       LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
792       LLVMValueRef exec_mask = mask_vec(bld_base);
793       struct lp_build_loop_state loop_state;
794       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
795 
796       struct lp_build_if_state ifthen;
797       LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
798       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
799       lp_build_if(&ifthen, gallivm, cond);
800 
801       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
802                                                       loop_state.counter, "");
803       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
804 
805       LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
806 
807       LLVMValueRef temp_res;
808       temp_res = LLVMBuildLoad(builder, result, "");
809       temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
810       LLVMBuildStore(builder, temp_res, result);
811       lp_build_endif(&ifthen);
812       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
813                              NULL, LLVMIntUGE);
814       outval[c] = LLVMBuildLoad(builder, result, "");
815    }
816 }
817 
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)818 static void emit_store_global(struct lp_build_nir_context *bld_base,
819                               unsigned writemask,
820                               unsigned nc, unsigned bit_size,
821                               unsigned addr_bit_size,
822                               LLVMValueRef addr,
823                               LLVMValueRef dst)
824 {
825    struct gallivm_state *gallivm = bld_base->base.gallivm;
826    LLVMBuilderRef builder = gallivm->builder;
827    struct lp_build_context *uint_bld = &bld_base->uint_bld;
828 
829    for (unsigned c = 0; c < nc; c++) {
830       if (!(writemask & (1u << c)))
831          continue;
832       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
833 
834       LLVMValueRef exec_mask = mask_vec(bld_base);
835       struct lp_build_loop_state loop_state;
836       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
837       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
838                                                        loop_state.counter, "");
839 
840       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
841                                                       loop_state.counter, "");
842       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
843       switch (bit_size) {
844       case 8:
845          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
846          break;
847       case 16:
848          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
849          break;
850       case 32:
851          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
852          break;
853       case 64:
854          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
855          break;
856       default:
857          break;
858       }
859       struct lp_build_if_state ifthen;
860 
861       LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
862       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
863       lp_build_if(&ifthen, gallivm, cond);
864       lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
865       lp_build_endif(&ifthen);
866       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
867                              NULL, LLVMIntUGE);
868    }
869 }
870 
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)871 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
872                                nir_intrinsic_op nir_op,
873                                unsigned addr_bit_size,
874                                unsigned val_bit_size,
875                                LLVMValueRef addr,
876                                LLVMValueRef val, LLVMValueRef val2,
877                                LLVMValueRef *result)
878 {
879    struct gallivm_state *gallivm = bld_base->base.gallivm;
880    LLVMBuilderRef builder = gallivm->builder;
881    struct lp_build_context *uint_bld = &bld_base->uint_bld;
882    struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
883    LLVMValueRef atom_res = lp_build_alloca(gallivm,
884                                            LLVMTypeOf(val), "");
885    LLVMValueRef exec_mask = mask_vec(bld_base);
886    struct lp_build_loop_state loop_state;
887    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
888 
889    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
890                                                     loop_state.counter, "");
891 
892    LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
893                                                    loop_state.counter, "");
894    addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
895    struct lp_build_if_state ifthen;
896    LLVMValueRef cond, temp_res;
897    LLVMValueRef scalar;
898    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
899    cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
900    lp_build_if(&ifthen, gallivm, cond);
901 
902    addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
903    if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
904       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
905                                                          loop_state.counter, "");
906       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
907       scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
908                                       cas_src_ptr,
909                                       LLVMAtomicOrderingSequentiallyConsistent,
910                                       LLVMAtomicOrderingSequentiallyConsistent,
911                                       false);
912       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
913    } else {
914       LLVMAtomicRMWBinOp op;
915       switch (nir_op) {
916       case nir_intrinsic_global_atomic_add:
917          op = LLVMAtomicRMWBinOpAdd;
918          break;
919       case nir_intrinsic_global_atomic_exchange:
920 
921          op = LLVMAtomicRMWBinOpXchg;
922          break;
923       case nir_intrinsic_global_atomic_and:
924          op = LLVMAtomicRMWBinOpAnd;
925          break;
926       case nir_intrinsic_global_atomic_or:
927          op = LLVMAtomicRMWBinOpOr;
928          break;
929       case nir_intrinsic_global_atomic_xor:
930          op = LLVMAtomicRMWBinOpXor;
931          break;
932       case nir_intrinsic_global_atomic_umin:
933          op = LLVMAtomicRMWBinOpUMin;
934          break;
935       case nir_intrinsic_global_atomic_umax:
936          op = LLVMAtomicRMWBinOpUMax;
937          break;
938       case nir_intrinsic_global_atomic_imin:
939          op = LLVMAtomicRMWBinOpMin;
940          break;
941       case nir_intrinsic_global_atomic_imax:
942          op = LLVMAtomicRMWBinOpMax;
943          break;
944       default:
945          unreachable("unknown atomic op");
946       }
947 
948       scalar = LLVMBuildAtomicRMW(builder, op,
949                                   addr_ptr, value_ptr,
950                                   LLVMAtomicOrderingSequentiallyConsistent,
951                                   false);
952    }
953    temp_res = LLVMBuildLoad(builder, atom_res, "");
954    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
955    LLVMBuildStore(builder, temp_res, atom_res);
956    lp_build_else(&ifthen);
957    temp_res = LLVMBuildLoad(builder, atom_res, "");
958    bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;
959    LLVMValueRef zero_val;
960    if (is_float) {
961       if (val_bit_size == 64)
962          zero_val = lp_build_const_double(gallivm, 0);
963       else
964          zero_val = lp_build_const_float(gallivm, 0);
965    } else {
966       if (val_bit_size == 64)
967          zero_val = lp_build_const_int64(gallivm, 0);
968       else
969          zero_val = lp_build_const_int32(gallivm, 0);
970    }
971 
972    temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
973    LLVMBuildStore(builder, temp_res, atom_res);
974    lp_build_endif(&ifthen);
975    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
976                           NULL, LLVMIntUGE);
977    *result = LLVMBuildLoad(builder, atom_res, "");
978 }
979 
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])980 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
981                           unsigned nc,
982                           unsigned bit_size,
983                           bool offset_is_uniform,
984                           LLVMValueRef index,
985                           LLVMValueRef offset,
986                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
987 {
988    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
989    struct gallivm_state *gallivm = bld_base->base.gallivm;
990    LLVMBuilderRef builder = gallivm->builder;
991    struct lp_build_context *uint_bld = &bld_base->uint_bld;
992    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
993    LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
994    unsigned size_shift = bit_size_to_shift_size(bit_size);
995    if (size_shift)
996       offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
997 
998    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
999    consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1000 
1001    if (offset_is_uniform) {
1002       offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
1003 
1004       for (unsigned c = 0; c < nc; c++) {
1005          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1006 
1007          LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset);
1008          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
1009       }
1010    } else {
1011       LLVMValueRef overflow_mask;
1012       LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
1013 
1014       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1015       if (bit_size == 64)
1016          num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1017       else if (bit_size == 16)
1018          num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1019       else if (bit_size == 8)
1020          num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1021 
1022       for (unsigned c = 0; c < nc; c++) {
1023          LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1024          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1025                                           this_offset, num_consts);
1026          result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1027       }
1028    }
1029 }
1030 
1031 
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1032 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1033                           unsigned nc,
1034                           unsigned bit_size,
1035                           LLVMValueRef index,
1036                           LLVMValueRef offset,
1037                           LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1038 {
1039    struct gallivm_state *gallivm = bld_base->base.gallivm;
1040    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1041    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1042    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1043    LLVMValueRef ssbo_limit = NULL;
1044    struct lp_build_context *load_bld;
1045    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1046 
1047    load_bld = get_int_bld(bld_base, true, bit_size);
1048 
1049    offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1050 
1051    /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1052 
1053    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1054    for (unsigned c = 0; c < nc; c++)
1055       result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1056 
1057    LLVMValueRef exec_mask = mask_vec(bld_base);
1058    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1059    struct lp_build_loop_state loop_state;
1060    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1061    LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1062    LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1063 
1064    struct lp_build_if_state exec_ifthen;
1065    lp_build_if(&exec_ifthen, gallivm, loop_cond);
1066 
1067    LLVMValueRef mem_ptr;
1068 
1069    if (index) {
1070       LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, "");
1071       LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1072       LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1073       ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1074       mem_ptr = ssbo_ptr;
1075    } else
1076       mem_ptr = bld->shared_ptr;
1077 
1078    for (unsigned c = 0; c < nc; c++) {
1079       LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1080       LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1081       if (ssbo_limit) {
1082          LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1083          do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1084       }
1085 
1086       struct lp_build_if_state ifthen;
1087       LLVMValueRef fetch_cond, temp_res;
1088 
1089       fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1090 
1091       lp_build_if(&ifthen, gallivm, fetch_cond);
1092       LLVMValueRef scalar;
1093       if (bit_size != 32) {
1094          LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
1095          scalar = lp_build_pointer_get(builder, mem_ptr2, loop_index);
1096       } else
1097          scalar = lp_build_pointer_get(builder, mem_ptr, loop_index);
1098 
1099       temp_res = LLVMBuildLoad(builder, result[c], "");
1100       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1101       LLVMBuildStore(builder, temp_res, result[c]);
1102       lp_build_else(&ifthen);
1103       temp_res = LLVMBuildLoad(builder, result[c], "");
1104       LLVMValueRef zero;
1105       if (bit_size == 64)
1106          zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
1107       else if (bit_size == 16)
1108          zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
1109       else if (bit_size == 8)
1110          zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
1111       else
1112          zero = lp_build_const_int32(gallivm, 0);
1113       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1114       LLVMBuildStore(builder, temp_res, result[c]);
1115       lp_build_endif(&ifthen);
1116    }
1117 
1118    lp_build_endif(&exec_ifthen);
1119    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1120                           NULL, LLVMIntUGE);
1121    for (unsigned c = 0; c < nc; c++)
1122       outval[c] = LLVMBuildLoad(gallivm->builder, result[c], "");
1123 
1124 }
1125 
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1126 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1127                            unsigned writemask,
1128                            unsigned nc,
1129                            unsigned bit_size,
1130                            LLVMValueRef index,
1131                            LLVMValueRef offset,
1132                            LLVMValueRef dst)
1133 {
1134    struct gallivm_state *gallivm = bld_base->base.gallivm;
1135    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1136    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1137    LLVMValueRef mem_ptr;
1138    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1139    LLVMValueRef ssbo_limit = NULL;
1140    struct lp_build_context *store_bld;
1141    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1142    store_bld = get_int_bld(bld_base, true, bit_size);
1143 
1144    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1145 
1146    LLVMValueRef exec_mask = mask_vec(bld_base);
1147    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1148    struct lp_build_loop_state loop_state;
1149    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1150    LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1151    LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1152 
1153    struct lp_build_if_state exec_ifthen;
1154    lp_build_if(&exec_ifthen, gallivm, loop_cond);
1155 
1156    if (index) {
1157       LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, "");
1158       LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1159       LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1160       ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1161       mem_ptr = ssbo_ptr;
1162    } else
1163       mem_ptr = bld->shared_ptr;
1164 
1165    for (unsigned c = 0; c < nc; c++) {
1166       if (!(writemask & (1u << c)))
1167          continue;
1168       LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1169       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1170       LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1171 
1172       if (ssbo_limit) {
1173          LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1174          do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1175       }
1176 
1177       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1178                                                        loop_state.counter, "");
1179       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1180       struct lp_build_if_state ifthen;
1181       LLVMValueRef store_cond;
1182 
1183       store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1184       lp_build_if(&ifthen, gallivm, store_cond);
1185       if (bit_size != 32) {
1186          LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
1187          lp_build_pointer_set(builder, mem_ptr2, loop_index, value_ptr);
1188       } else
1189          lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1190       lp_build_endif(&ifthen);
1191    }
1192 
1193    lp_build_endif(&exec_ifthen);
1194    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1195                              NULL, LLVMIntUGE);
1196 
1197 }
1198 
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,uint32_t bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1199 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1200                             nir_intrinsic_op nir_op,
1201                             uint32_t bit_size,
1202                             LLVMValueRef index, LLVMValueRef offset,
1203                             LLVMValueRef val, LLVMValueRef val2,
1204                             LLVMValueRef *result)
1205 {
1206    struct gallivm_state *gallivm = bld_base->base.gallivm;
1207    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1208    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1209    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1210    LLVMValueRef ssbo_limit = NULL;
1211    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1212    struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
1213 
1214    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1215    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1216                                            atomic_bld->vec_type, "");
1217 
1218    LLVMValueRef exec_mask = mask_vec(bld_base);
1219    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1220    struct lp_build_loop_state loop_state;
1221    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1222    LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1223    LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1224 
1225    struct lp_build_if_state exec_ifthen;
1226    lp_build_if(&exec_ifthen, gallivm, loop_cond);
1227 
1228    LLVMValueRef mem_ptr;
1229    if (index) {
1230       LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, "");
1231       LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1232       LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1233       ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1234       mem_ptr = ssbo_ptr;
1235    } else
1236       mem_ptr = bld->shared_ptr;
1237 
1238    LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1239    if (ssbo_limit) {
1240       LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1241       do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1242    }
1243 
1244    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1245                                                     loop_state.counter, "");
1246    value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1247 
1248    LLVMValueRef scalar_ptr;
1249    if (bit_size != 32) {
1250       LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(atomic_bld->elem_type, 0), "");
1251       scalar_ptr = LLVMBuildGEP(builder, mem_ptr2, &loop_offset, 1, "");
1252    } else
1253       scalar_ptr = LLVMBuildGEP(builder, mem_ptr, &loop_offset, 1, "");
1254 
1255    struct lp_build_if_state ifthen;
1256    LLVMValueRef inner_cond, temp_res;
1257    LLVMValueRef scalar;
1258 
1259    inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1260    lp_build_if(&ifthen, gallivm, inner_cond);
1261 
1262    if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1263       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1264                                                          loop_state.counter, "");
1265       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1266       scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1267                                       cas_src_ptr,
1268                                       LLVMAtomicOrderingSequentiallyConsistent,
1269                                       LLVMAtomicOrderingSequentiallyConsistent,
1270                                       false);
1271       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1272    } else {
1273       LLVMAtomicRMWBinOp op;
1274 
1275       switch (nir_op) {
1276       case nir_intrinsic_shared_atomic_add:
1277       case nir_intrinsic_ssbo_atomic_add:
1278          op = LLVMAtomicRMWBinOpAdd;
1279          break;
1280       case nir_intrinsic_shared_atomic_exchange:
1281       case nir_intrinsic_ssbo_atomic_exchange:
1282          op = LLVMAtomicRMWBinOpXchg;
1283          break;
1284       case nir_intrinsic_shared_atomic_and:
1285       case nir_intrinsic_ssbo_atomic_and:
1286          op = LLVMAtomicRMWBinOpAnd;
1287          break;
1288       case nir_intrinsic_shared_atomic_or:
1289       case nir_intrinsic_ssbo_atomic_or:
1290          op = LLVMAtomicRMWBinOpOr;
1291          break;
1292       case nir_intrinsic_shared_atomic_xor:
1293       case nir_intrinsic_ssbo_atomic_xor:
1294          op = LLVMAtomicRMWBinOpXor;
1295          break;
1296       case nir_intrinsic_shared_atomic_umin:
1297       case nir_intrinsic_ssbo_atomic_umin:
1298          op = LLVMAtomicRMWBinOpUMin;
1299          break;
1300       case nir_intrinsic_shared_atomic_umax:
1301       case nir_intrinsic_ssbo_atomic_umax:
1302          op = LLVMAtomicRMWBinOpUMax;
1303          break;
1304       case nir_intrinsic_ssbo_atomic_imin:
1305       case nir_intrinsic_shared_atomic_imin:
1306          op = LLVMAtomicRMWBinOpMin;
1307          break;
1308       case nir_intrinsic_ssbo_atomic_imax:
1309       case nir_intrinsic_shared_atomic_imax:
1310          op = LLVMAtomicRMWBinOpMax;
1311          break;
1312       default:
1313          unreachable("unknown atomic op");
1314       }
1315       scalar = LLVMBuildAtomicRMW(builder, op,
1316                                   scalar_ptr, value_ptr,
1317                                   LLVMAtomicOrderingSequentiallyConsistent,
1318                                   false);
1319    }
1320    temp_res = LLVMBuildLoad(builder, atom_res, "");
1321    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1322    LLVMBuildStore(builder, temp_res, atom_res);
1323    lp_build_else(&ifthen);
1324    temp_res = LLVMBuildLoad(builder, atom_res, "");
1325    LLVMValueRef zero = bit_size == 64 ? lp_build_const_int64(gallivm, 0) : lp_build_const_int32(gallivm, 0);
1326    temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1327    LLVMBuildStore(builder, temp_res, atom_res);
1328    lp_build_endif(&ifthen);
1329 
1330    lp_build_endif(&exec_ifthen);
1331    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1332                           NULL, LLVMIntUGE);
1333    *result = LLVMBuildLoad(builder, atom_res, "");
1334 }
1335 
emit_barrier(struct lp_build_nir_context * bld_base)1336 static void emit_barrier(struct lp_build_nir_context *bld_base)
1337 {
1338    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1339    struct gallivm_state * gallivm = bld_base->base.gallivm;
1340 
1341    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1342 
1343    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1344    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1345 }
1346 
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1347 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1348                                        LLVMValueRef index)
1349 {
1350    struct gallivm_state *gallivm = bld_base->base.gallivm;
1351    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1352    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1353    struct lp_build_context *bld_broad = &bld_base->uint_bld;
1354    LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1355                                               LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1356    return lp_build_broadcast_scalar(bld_broad, size_ptr);
1357 }
1358 
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1359 static void emit_image_op(struct lp_build_nir_context *bld_base,
1360                           struct lp_img_params *params)
1361 {
1362    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1363    struct gallivm_state *gallivm = bld_base->base.gallivm;
1364 
1365    params->type = bld_base->base.type;
1366    params->context_ptr = bld->context_ptr;
1367    params->thread_data_ptr = bld->thread_data_ptr;
1368    params->exec_mask = mask_vec(bld_base);
1369 
1370    if (params->image_index_offset)
1371       params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1372                                                            lp_build_const_int32(gallivm, 0), "");
1373 
1374    bld->image->emit_op(bld->image,
1375                        bld->bld_base.base.gallivm,
1376                        params);
1377 
1378 }
1379 
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1380 static void emit_image_size(struct lp_build_nir_context *bld_base,
1381                             struct lp_sampler_size_query_params *params)
1382 {
1383    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1384    struct gallivm_state *gallivm = bld_base->base.gallivm;
1385 
1386    params->int_type = bld_base->int_bld.type;
1387    params->context_ptr = bld->context_ptr;
1388 
1389    if (params->texture_unit_offset)
1390       params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1391                                                             lp_build_const_int32(gallivm, 0), "");
1392    bld->image->emit_size_query(bld->image,
1393                                bld->bld_base.base.gallivm,
1394                                params);
1395 
1396 }
1397 
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1398 static void init_var_slots(struct lp_build_nir_context *bld_base,
1399                            nir_variable *var, unsigned sc)
1400 {
1401    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1402    unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1403 
1404    if (!bld->outputs)
1405      return;
1406    for (unsigned comp = sc; comp < slots + sc; comp++) {
1407       unsigned this_loc = var->data.driver_location + (comp / 4);
1408       unsigned this_chan = comp % 4;
1409 
1410       if (!bld->outputs[this_loc][this_chan])
1411          bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1412                                                              bld_base->base.vec_type, "output");
1413    }
1414 }
1415 
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1416 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1417                           nir_variable *var)
1418 {
1419    unsigned sc = var->data.location_frac;
1420    switch (var->data.mode) {
1421    case nir_var_shader_out: {
1422       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1423          if (var->data.location == FRAG_RESULT_STENCIL)
1424             sc = 1;
1425          else if (var->data.location == FRAG_RESULT_DEPTH)
1426             sc = 2;
1427       }
1428       init_var_slots(bld_base, var, sc);
1429       break;
1430    }
1431    default:
1432       break;
1433    }
1434 }
1435 
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1436 static void emit_tex(struct lp_build_nir_context *bld_base,
1437                      struct lp_sampler_params *params)
1438 {
1439    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1440    struct gallivm_state *gallivm = bld_base->base.gallivm;
1441 
1442    params->type = bld_base->base.type;
1443    params->context_ptr = bld->context_ptr;
1444    params->thread_data_ptr = bld->thread_data_ptr;
1445 
1446    if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1447       /* this is horrible but this can be dynamic */
1448       LLVMValueRef coords[5];
1449       LLVMValueRef *orig_texel_ptr;
1450       struct lp_build_context *uint_bld = &bld_base->uint_bld;
1451       LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1452                                  LLVMGetUndef(bld_base->base.vec_type),
1453                                  LLVMGetUndef(bld_base->base.vec_type),
1454                                  LLVMGetUndef(bld_base->base.vec_type) };
1455       LLVMValueRef texel[4], orig_offset, orig_lod;
1456       unsigned i;
1457       orig_texel_ptr = params->texel;
1458       orig_lod = params->lod;
1459       for (i = 0; i < 5; i++) {
1460          coords[i] = params->coords[i];
1461       }
1462       orig_offset = params->texture_index_offset;
1463 
1464       for (unsigned v = 0; v < uint_bld->type.length; v++) {
1465          LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1466          LLVMValueRef new_coords[5];
1467          for (i = 0; i < 5; i++) {
1468             new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1469                                                     coords[i], idx, "");
1470          }
1471          params->coords = new_coords;
1472          params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1473                                                                 orig_offset,
1474                                                                 idx, "");
1475          params->type = lp_elem_type(bld_base->base.type);
1476 
1477          if (orig_lod)
1478             params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1479          params->texel = texel;
1480          bld->sampler->emit_tex_sample(bld->sampler,
1481                                        gallivm,
1482                                        params);
1483 
1484          for (i = 0; i < 4; i++) {
1485             result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1486          }
1487       }
1488       for (i = 0; i < 4; i++) {
1489          orig_texel_ptr[i] = result[i];
1490       }
1491       return;
1492    }
1493 
1494    if (params->texture_index_offset)
1495       params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1496                                                              params->texture_index_offset,
1497                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1498 
1499    params->type = bld_base->base.type;
1500    bld->sampler->emit_tex_sample(bld->sampler,
1501                                  bld->bld_base.base.gallivm,
1502                                  params);
1503 }
1504 
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1505 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1506                           struct lp_sampler_size_query_params *params)
1507 {
1508    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1509 
1510    params->int_type = bld_base->int_bld.type;
1511    params->context_ptr = bld->context_ptr;
1512 
1513    if (params->texture_unit_offset)
1514       params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1515                                                              params->texture_unit_offset,
1516                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1517    bld->sampler->emit_size_query(bld->sampler,
1518                                  bld->bld_base.base.gallivm,
1519                                  params);
1520 }
1521 
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1522 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1523                                nir_intrinsic_instr *instr,
1524                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1525 {
1526    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1527    struct gallivm_state *gallivm = bld_base->base.gallivm;
1528    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1529    switch (instr->intrinsic) {
1530    case nir_intrinsic_load_instance_id:
1531       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1532       break;
1533    case nir_intrinsic_load_base_instance:
1534       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1535       break;
1536    case nir_intrinsic_load_base_vertex:
1537       result[0] = bld->system_values.basevertex;
1538       break;
1539    case nir_intrinsic_load_first_vertex:
1540       result[0] = bld->system_values.firstvertex;
1541       break;
1542    case nir_intrinsic_load_vertex_id:
1543       result[0] = bld->system_values.vertex_id;
1544       break;
1545    case nir_intrinsic_load_primitive_id:
1546       result[0] = bld->system_values.prim_id;
1547       break;
1548    case nir_intrinsic_load_workgroup_id: {
1549       LLVMValueRef tmp[3];
1550       for (unsigned i = 0; i < 3; i++) {
1551          tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1552          if (instr->dest.ssa.bit_size == 64)
1553             tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1554          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1555       }
1556       break;
1557    }
1558    case nir_intrinsic_load_local_invocation_id:
1559       for (unsigned i = 0; i < 3; i++)
1560          result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1561       break;
1562    case nir_intrinsic_load_local_invocation_index: {
1563       LLVMValueRef tmp, tmp2;
1564       tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), ""));
1565       tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1566       tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1567       tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, ""));
1568 
1569       tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1570       tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, ""));
1571       tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1572       tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, ""));
1573       result[0] = tmp;
1574       break;
1575    }
1576    case nir_intrinsic_load_num_workgroups: {
1577       LLVMValueRef tmp[3];
1578       for (unsigned i = 0; i < 3; i++) {
1579          tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1580          if (instr->dest.ssa.bit_size == 64)
1581             tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1582          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1583       }
1584       break;
1585    }
1586    case nir_intrinsic_load_invocation_id:
1587       if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1588          result[0] = bld->system_values.invocation_id;
1589       else
1590          result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1591       break;
1592    case nir_intrinsic_load_front_face:
1593       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1594       break;
1595    case nir_intrinsic_load_draw_id:
1596       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1597       break;
1598    default:
1599       break;
1600    case nir_intrinsic_load_workgroup_size:
1601      for (unsigned i = 0; i < 3; i++)
1602        result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1603      break;
1604    case nir_intrinsic_load_work_dim:
1605       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1606       break;
1607    case nir_intrinsic_load_tess_coord:
1608       for (unsigned i = 0; i < 3; i++) {
1609 	 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1610       }
1611       break;
1612    case nir_intrinsic_load_tess_level_outer:
1613       for (unsigned i = 0; i < 4; i++)
1614          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1615       break;
1616    case nir_intrinsic_load_tess_level_inner:
1617       for (unsigned i = 0; i < 2; i++)
1618          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1619       break;
1620    case nir_intrinsic_load_patch_vertices_in:
1621       result[0] = bld->system_values.vertices_in;
1622       break;
1623    case nir_intrinsic_load_sample_id:
1624       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1625       break;
1626    case nir_intrinsic_load_sample_pos:
1627       for (unsigned i = 0; i < 2; i++) {
1628          LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1629          idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1630          LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1631          result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1632       }
1633       break;
1634    case nir_intrinsic_load_sample_mask_in:
1635       result[0] = bld->system_values.sample_mask_in;
1636       break;
1637    case nir_intrinsic_load_view_index:
1638       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1639       break;
1640    case nir_intrinsic_load_subgroup_invocation: {
1641       LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1642       for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1643          elems[i] = lp_build_const_int32(gallivm, i);
1644       result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1645       break;
1646    }
1647    case nir_intrinsic_load_subgroup_id:
1648       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1649       break;
1650    case nir_intrinsic_load_num_subgroups:
1651       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
1652       break;
1653    }
1654 }
1655 
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)1656 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1657                                    LLVMValueRef *dst)
1658 {
1659    struct gallivm_state *gallivm = bld_base->base.gallivm;
1660    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1661    *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1662 }
1663 
bgnloop(struct lp_build_nir_context * bld_base)1664 static void bgnloop(struct lp_build_nir_context *bld_base)
1665 {
1666    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1667    lp_exec_bgnloop(&bld->exec_mask, true);
1668 }
1669 
endloop(struct lp_build_nir_context * bld_base)1670 static void endloop(struct lp_build_nir_context *bld_base)
1671 {
1672    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1673    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1674 }
1675 
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1676 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1677 {
1678    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1679    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1680    lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1681 }
1682 
else_stmt(struct lp_build_nir_context * bld_base)1683 static void else_stmt(struct lp_build_nir_context *bld_base)
1684 {
1685    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1686    lp_exec_mask_cond_invert(&bld->exec_mask);
1687 }
1688 
endif_stmt(struct lp_build_nir_context * bld_base)1689 static void endif_stmt(struct lp_build_nir_context *bld_base)
1690 {
1691    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1692    lp_exec_mask_cond_pop(&bld->exec_mask);
1693 }
1694 
break_stmt(struct lp_build_nir_context * bld_base)1695 static void break_stmt(struct lp_build_nir_context *bld_base)
1696 {
1697    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1698 
1699    lp_exec_break(&bld->exec_mask, NULL, false);
1700 }
1701 
continue_stmt(struct lp_build_nir_context * bld_base)1702 static void continue_stmt(struct lp_build_nir_context *bld_base)
1703 {
1704    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1705    lp_exec_continue(&bld->exec_mask);
1706 }
1707 
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1708 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1709 {
1710    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1711    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1712    LLVMValueRef mask;
1713 
1714    if (!cond) {
1715       if (bld->exec_mask.has_mask) {
1716          mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1717       } else {
1718          mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1719       }
1720    } else {
1721       mask = LLVMBuildNot(builder, cond, "");
1722       if (bld->exec_mask.has_mask) {
1723          LLVMValueRef invmask;
1724          invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1725          mask = LLVMBuildOr(builder, mask, invmask, "");
1726       }
1727    }
1728    lp_build_mask_update(bld->mask, mask);
1729 }
1730 
1731 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1732 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1733                           LLVMValueRef ptr,
1734                           LLVMValueRef mask)
1735 {
1736    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1737    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1738 
1739    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1740 
1741    LLVMBuildStore(builder, current_vec, ptr);
1742 }
1743 
1744 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1745 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1746                              LLVMValueRef ptr,
1747                              LLVMValueRef mask)
1748 {
1749    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1750    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1751 
1752    current_vec = lp_build_select(&bld_base->uint_bld,
1753                                  mask,
1754                                  bld_base->uint_bld.zero,
1755                                  current_vec);
1756 
1757    LLVMBuildStore(builder, current_vec, ptr);
1758 }
1759 
1760 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)1761 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1762                                   LLVMValueRef current_mask_vec,
1763                                   LLVMValueRef total_emitted_vertices_vec)
1764 {
1765    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1766    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1767    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1768                                             total_emitted_vertices_vec,
1769                                             bld->max_output_vertices_vec);
1770 
1771    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1772 }
1773 
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)1774 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1775 {
1776    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1777    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1778 
1779    if (stream_id >= bld->gs_vertex_streams)
1780       return;
1781    assert(bld->gs_iface->emit_vertex);
1782    LLVMValueRef total_emitted_vertices_vec =
1783       LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1784    LLVMValueRef mask = mask_vec(bld_base);
1785    mask = clamp_mask_to_max_output_vertices(bld, mask,
1786                                             total_emitted_vertices_vec);
1787    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1788                               bld->outputs,
1789                               total_emitted_vertices_vec,
1790                               mask,
1791                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1792 
1793    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1794                              mask);
1795    increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1796                              mask);
1797 }
1798 
1799 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)1800 end_primitive_masked(struct lp_build_nir_context * bld_base,
1801                      LLVMValueRef mask, uint32_t stream_id)
1802 {
1803    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1804    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1805 
1806    if (stream_id >= bld->gs_vertex_streams)
1807       return;
1808    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1809    LLVMValueRef emitted_vertices_vec =
1810       LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1811    LLVMValueRef emitted_prims_vec =
1812       LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1813    LLVMValueRef total_emitted_vertices_vec =
1814       LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1815 
1816    LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1817                                             PIPE_FUNC_NOTEQUAL,
1818                                             emitted_vertices_vec,
1819                                             uint_bld->zero);
1820    mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1821    bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1822 				total_emitted_vertices_vec,
1823 				emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
1824    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1825                              mask);
1826    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1827                                 mask);
1828 }
1829 
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)1830 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1831 {
1832    ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1833 
1834    assert(bld->gs_iface->end_primitive);
1835 
1836    LLVMValueRef mask = mask_vec(bld_base);
1837    end_primitive_masked(bld_base, mask, stream_id);
1838 }
1839 
1840 static void
emit_prologue(struct lp_build_nir_soa_context * bld)1841 emit_prologue(struct lp_build_nir_soa_context *bld)
1842 {
1843    struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1844    if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1845       uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1846       unsigned index, chan;
1847       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1848       LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
1849       bld->inputs_array = lp_build_array_alloca(gallivm,
1850                                                vec_type, array_size,
1851                                                "input_array");
1852 
1853       for (index = 0; index < num_inputs; ++index) {
1854          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1855             LLVMValueRef lindex =
1856                lp_build_const_int32(gallivm, index * 4 + chan);
1857             LLVMValueRef input_ptr =
1858                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1859                             &lindex, 1, "");
1860             LLVMValueRef value = bld->inputs[index][chan];
1861             if (value)
1862                LLVMBuildStore(gallivm->builder, value, input_ptr);
1863          }
1864       }
1865    }
1866 }
1867 
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])1868 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
1869                       nir_intrinsic_instr *instr, LLVMValueRef result[4])
1870 {
1871    struct gallivm_state * gallivm = bld_base->base.gallivm;
1872    LLVMBuilderRef builder = gallivm->builder;
1873    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
1874    LLVMValueRef exec_mask = mask_vec(bld_base);
1875    struct lp_build_loop_state loop_state;
1876    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1877 
1878    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
1879    LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
1880    LLVMValueRef init_val = NULL;
1881    if (instr->intrinsic == nir_intrinsic_vote_ieq ||
1882        instr->intrinsic == nir_intrinsic_vote_feq) {
1883       /* for equal we unfortunately have to loop and find the first valid one. */
1884       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1885       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1886 
1887       struct lp_build_if_state ifthen;
1888       lp_build_if(&ifthen, gallivm, if_cond);
1889       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1890                                                        loop_state.counter, "");
1891       LLVMBuildStore(builder, value_ptr, eq_store);
1892       LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
1893       lp_build_endif(&ifthen);
1894       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1895                              NULL, LLVMIntUGE);
1896       init_val = LLVMBuildLoad(builder, eq_store, "");
1897    } else {
1898       LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
1899    }
1900 
1901    LLVMValueRef res;
1902    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1903    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1904                                                        loop_state.counter, "");
1905    struct lp_build_if_state ifthen;
1906    LLVMValueRef if_cond;
1907    if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1908 
1909    lp_build_if(&ifthen, gallivm, if_cond);
1910    res = LLVMBuildLoad(builder, res_store, "");
1911 
1912    if (instr->intrinsic == nir_intrinsic_vote_feq) {
1913       struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
1914       LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
1915                                        LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
1916                                        LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
1917       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1918       res = LLVMBuildAnd(builder, res, tmp, "");
1919    } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1920       LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
1921       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1922       res = LLVMBuildAnd(builder, res, tmp, "");
1923    } else if (instr->intrinsic == nir_intrinsic_vote_any)
1924       res = LLVMBuildOr(builder, res, value_ptr, "");
1925    else
1926       res = LLVMBuildAnd(builder, res, value_ptr, "");
1927    LLVMBuildStore(builder, res, res_store);
1928    lp_build_endif(&ifthen);
1929    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1930                           NULL, LLVMIntUGE);
1931    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1932 }
1933 
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])1934 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
1935 {
1936    struct gallivm_state * gallivm = bld_base->base.gallivm;
1937    LLVMBuilderRef builder = gallivm->builder;
1938    LLVMValueRef exec_mask = mask_vec(bld_base);
1939    struct lp_build_loop_state loop_state;
1940    src = LLVMBuildAnd(builder, src, exec_mask, "");
1941    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1942    LLVMValueRef res;
1943    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1944    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1945                                                     loop_state.counter, "");
1946    res = LLVMBuildLoad(builder, res_store, "");
1947    res = LLVMBuildOr(builder,
1948                      res,
1949                      LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
1950    LLVMBuildStore(builder, res, res_store);
1951 
1952    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1953                           NULL, LLVMIntUGE);
1954    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1955 }
1956 
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])1957 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
1958 {
1959    struct gallivm_state *gallivm = bld_base->base.gallivm;
1960    LLVMBuilderRef builder = gallivm->builder;
1961    LLVMValueRef exec_mask = mask_vec(bld_base);
1962    struct lp_build_loop_state loop_state;
1963 
1964    LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1965    LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1966    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1967    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
1968                                                     loop_state.counter, "");
1969    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
1970                                      LLVMIntEQ,
1971                                      value_ptr,
1972                                      lp_build_const_int32(gallivm, -1), "");
1973    LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
1974                                       LLVMIntEQ,
1975                                       LLVMBuildLoad(builder, found_store, ""),
1976                                       lp_build_const_int32(gallivm, 0), "");
1977 
1978    cond = LLVMBuildAnd(builder, cond, cond2, "");
1979    struct lp_build_if_state ifthen;
1980    lp_build_if(&ifthen, gallivm, cond);
1981    LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
1982    LLVMBuildStore(builder, loop_state.counter, idx_store);
1983    lp_build_endif(&ifthen);
1984    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1985                           NULL, LLVMIntUGE);
1986 
1987    result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
1988                                       lp_build_const_int32(gallivm, -1),
1989                                       LLVMBuildLoad(builder, idx_store, ""),
1990                                       "");
1991 }
1992 
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])1993 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
1994                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
1995 {
1996    struct gallivm_state *gallivm = bld_base->base.gallivm;
1997    LLVMBuilderRef builder = gallivm->builder;
1998    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
1999    /* can't use llvm reduction intrinsics because of exec_mask */
2000    LLVMValueRef exec_mask = mask_vec(bld_base);
2001    struct lp_build_loop_state loop_state;
2002    nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2003 
2004    LLVMValueRef res_store = NULL;
2005    LLVMValueRef scan_store;
2006    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2007 
2008    if (instr->intrinsic != nir_intrinsic_reduce)
2009       res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2010 
2011    scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2012 
2013    struct lp_build_context elem_bld;
2014    bool is_flt = reduction_op == nir_op_fadd ||
2015       reduction_op == nir_op_fmul ||
2016       reduction_op == nir_op_fmin ||
2017       reduction_op == nir_op_fmax;
2018    bool is_unsigned = reduction_op == nir_op_umin ||
2019       reduction_op == nir_op_umax;
2020 
2021    struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2022       get_int_bld(bld_base, is_unsigned, bit_size);
2023 
2024    lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2025 
2026    LLVMValueRef store_val = NULL;
2027    /*
2028     * Put the identity value for the operation into the storage
2029     */
2030    switch (reduction_op) {
2031    case nir_op_fmin: {
2032       LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2033          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2034       store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2035       break;
2036    }
2037    case nir_op_fmax: {
2038       LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2039          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2040       store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2041       break;
2042    }
2043    case nir_op_fmul: {
2044       LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2045          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2046       store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2047       break;
2048    }
2049    case nir_op_umin:
2050       switch (bit_size) {
2051       case 8:
2052          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2053          break;
2054       case 16:
2055          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2056          break;
2057       case 32:
2058       default:
2059          store_val  = lp_build_const_int32(gallivm, UINT_MAX);
2060          break;
2061       case 64:
2062          store_val  = lp_build_const_int64(gallivm, UINT64_MAX);
2063          break;
2064       }
2065       break;
2066    case nir_op_imin:
2067       switch (bit_size) {
2068       case 8:
2069          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2070          break;
2071       case 16:
2072          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2073          break;
2074       case 32:
2075       default:
2076          store_val  = lp_build_const_int32(gallivm, INT_MAX);
2077          break;
2078       case 64:
2079          store_val  = lp_build_const_int64(gallivm, INT64_MAX);
2080          break;
2081       }
2082       break;
2083    case nir_op_imax:
2084       switch (bit_size) {
2085       case 8:
2086          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2087          break;
2088       case 16:
2089          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2090          break;
2091       case 32:
2092       default:
2093          store_val  = lp_build_const_int32(gallivm, INT_MIN);
2094          break;
2095       case 64:
2096          store_val  = lp_build_const_int64(gallivm, INT64_MIN);
2097          break;
2098       }
2099       break;
2100    case nir_op_imul:
2101       switch (bit_size) {
2102       case 8:
2103          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2104          break;
2105       case 16:
2106          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2107          break;
2108       case 32:
2109       default:
2110          store_val  = lp_build_const_int32(gallivm, 1);
2111          break;
2112       case 64:
2113          store_val  = lp_build_const_int64(gallivm, 1);
2114          break;
2115       }
2116       break;
2117    case nir_op_iand:
2118       switch (bit_size) {
2119       case 8:
2120          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2121          break;
2122       case 16:
2123          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2124          break;
2125       case 32:
2126       default:
2127          store_val  = lp_build_const_int32(gallivm, 0xffffffff);
2128          break;
2129       case 64:
2130          store_val  = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2131          break;
2132       }
2133       break;
2134    default:
2135       break;
2136    }
2137    if (store_val)
2138       LLVMBuildStore(builder, store_val, scan_store);
2139 
2140    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2141 
2142    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2143 
2144    struct lp_build_if_state ifthen;
2145    LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2146    lp_build_if(&ifthen, gallivm, if_cond);
2147    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2148 
2149    LLVMValueRef res = NULL;
2150    LLVMValueRef scan_val = LLVMBuildLoad(gallivm->builder, scan_store, "");
2151    if (instr->intrinsic != nir_intrinsic_reduce)
2152       res = LLVMBuildLoad(gallivm->builder, res_store, "");
2153 
2154    if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2155       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2156 
2157    if (is_flt) {
2158       scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2159       value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2160    }
2161    switch (reduction_op) {
2162    case nir_op_fadd:
2163    case nir_op_iadd:
2164       scan_val = lp_build_add(&elem_bld, value, scan_val);
2165       break;
2166    case nir_op_fmul:
2167    case nir_op_imul:
2168       scan_val = lp_build_mul(&elem_bld, value, scan_val);
2169       break;
2170    case nir_op_imin:
2171    case nir_op_umin:
2172    case nir_op_fmin:
2173       scan_val = lp_build_min(&elem_bld, value, scan_val);
2174       break;
2175    case nir_op_imax:
2176    case nir_op_umax:
2177    case nir_op_fmax:
2178       scan_val = lp_build_max(&elem_bld, value, scan_val);
2179       break;
2180    case nir_op_iand:
2181       scan_val = lp_build_and(&elem_bld, value, scan_val);
2182       break;
2183    case nir_op_ior:
2184       scan_val = lp_build_or(&elem_bld, value, scan_val);
2185       break;
2186    case nir_op_ixor:
2187       scan_val = lp_build_xor(&elem_bld, value, scan_val);
2188       break;
2189    default:
2190       assert(0);
2191       break;
2192    }
2193    if (is_flt)
2194       scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2195    LLVMBuildStore(builder, scan_val, scan_store);
2196 
2197    if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2198       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2199    }
2200 
2201    if (instr->intrinsic != nir_intrinsic_reduce)
2202       LLVMBuildStore(builder, res, res_store);
2203    lp_build_endif(&ifthen);
2204 
2205    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2206                           NULL, LLVMIntUGE);
2207    if (instr->intrinsic == nir_intrinsic_reduce)
2208       result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad(builder, scan_store, ""));
2209    else
2210       result[0] = LLVMBuildLoad(builder, res_store, "");
2211 }
2212 
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2213 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2214                                  LLVMValueRef src,
2215                                  unsigned bit_size,
2216                                  LLVMValueRef invoc,
2217                                  LLVMValueRef result[4])
2218 {
2219    struct gallivm_state *gallivm = bld_base->base.gallivm;
2220    LLVMBuilderRef builder = gallivm->builder;
2221    LLVMValueRef idx;
2222    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2223 
2224    /* have to find the first active invocation */
2225    LLVMValueRef exec_mask = mask_vec(bld_base);
2226    struct lp_build_loop_state loop_state;
2227    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2228    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2229    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));
2230 
2231    LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2232    struct lp_build_if_state ifthen;
2233 
2234    lp_build_if(&ifthen, gallivm, if_cond);
2235    LLVMValueRef store_val = loop_state.counter;
2236    if (invoc)
2237       store_val = LLVMBuildExtractElement(gallivm->builder, invoc, loop_state.counter, "");
2238    LLVMBuildStore(builder, store_val, res_store);
2239    lp_build_endif(&ifthen);
2240 
2241    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),
2242                           lp_build_const_int32(gallivm, -1), LLVMIntEQ);
2243    idx = LLVMBuildLoad(builder, res_store, "");
2244 
2245    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2246                                                 src, idx, "");
2247    result[0] = lp_build_broadcast_scalar(uint_bld, value);
2248 }
2249 
2250 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2251 emit_interp_at(struct lp_build_nir_context *bld_base,
2252                unsigned num_components,
2253                nir_variable *var,
2254                bool centroid,
2255                bool sample,
2256                unsigned const_index,
2257                LLVMValueRef indir_index,
2258                LLVMValueRef offsets[2],
2259                LLVMValueRef dst[4])
2260 {
2261    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2262 
2263    for (unsigned i = 0; i < num_components; i++) {
2264       dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2265                                         const_index + var->data.driver_location, i + var->data.location_frac,
2266                                         centroid, sample, indir_index, offsets);
2267    }
2268 }
2269 
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2270 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2271                                                struct lp_type type,
2272                                                unsigned scratch_size)
2273 {
2274    LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2275    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2276    unsigned i;
2277 
2278    if (type.length == 1)
2279       return LLVMConstInt(elem_type, 0, 0);
2280 
2281    for (i = 0; i < type.length; ++i)
2282       elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2283 
2284    return LLVMConstVector(elems, type.length);
2285 }
2286 
2287 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2288 emit_load_scratch(struct lp_build_nir_context *bld_base,
2289                   unsigned nc, unsigned bit_size,
2290                   LLVMValueRef offset,
2291                   LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2292 {
2293    struct gallivm_state * gallivm = bld_base->base.gallivm;
2294    LLVMBuilderRef builder = gallivm->builder;
2295    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2296    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2297    struct lp_build_context *load_bld;
2298    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2299    uint32_t shift_val = bit_size_to_shift_size(bit_size);
2300 
2301    load_bld = get_int_bld(bld_base, true, bit_size);
2302 
2303    offset = lp_build_add(uint_bld, offset, thread_offsets);
2304    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2305    for (unsigned c = 0; c < nc; c++) {
2306       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2307       LLVMValueRef exec_mask = mask_vec(bld_base);
2308 
2309       LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
2310       struct lp_build_loop_state loop_state;
2311       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2312 
2313       struct lp_build_if_state ifthen;
2314       LLVMValueRef cond, temp_res;
2315 
2316       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2317                                            loop_state.counter, "");
2318       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2319       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2320 
2321       lp_build_if(&ifthen, gallivm, cond);
2322       LLVMValueRef scalar;
2323       LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
2324       scalar = lp_build_pointer_get(builder, ptr2, loop_index);
2325 
2326       temp_res = LLVMBuildLoad(builder, result, "");
2327       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
2328       LLVMBuildStore(builder, temp_res, result);
2329       lp_build_else(&ifthen);
2330       temp_res = LLVMBuildLoad(builder, result, "");
2331       LLVMValueRef zero;
2332       if (bit_size == 64)
2333          zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
2334       else if (bit_size == 16)
2335          zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
2336       else if (bit_size == 8)
2337          zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
2338       else
2339          zero = lp_build_const_int32(gallivm, 0);
2340       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
2341       LLVMBuildStore(builder, temp_res, result);
2342       lp_build_endif(&ifthen);
2343       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2344                                 NULL, LLVMIntUGE);
2345       outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
2346    }
2347 }
2348 
2349 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2350 emit_store_scratch(struct lp_build_nir_context *bld_base,
2351                    unsigned writemask, unsigned nc,
2352                    unsigned bit_size, LLVMValueRef offset,
2353                    LLVMValueRef dst)
2354 {
2355    struct gallivm_state * gallivm = bld_base->base.gallivm;
2356    LLVMBuilderRef builder = gallivm->builder;
2357    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2358    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2359    struct lp_build_context *store_bld;
2360    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2361    uint32_t shift_val = bit_size_to_shift_size(bit_size);
2362    store_bld = get_int_bld(bld_base, true, bit_size);
2363 
2364    LLVMValueRef exec_mask = mask_vec(bld_base);
2365    offset = lp_build_add(uint_bld, offset, thread_offsets);
2366    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2367 
2368    for (unsigned c = 0; c < nc; c++) {
2369       if (!(writemask & (1u << c)))
2370          continue;
2371       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2372       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2373 
2374       struct lp_build_loop_state loop_state;
2375       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2376 
2377       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
2378                                                        loop_state.counter, "");
2379       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
2380 
2381       struct lp_build_if_state ifthen;
2382       LLVMValueRef cond;
2383 
2384       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2385                                                         loop_state.counter, "");
2386 
2387       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2388       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2389       lp_build_if(&ifthen, gallivm, cond);
2390 
2391       LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
2392       lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
2393 
2394       lp_build_endif(&ifthen);
2395       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2396                              NULL, LLVMIntUGE);
2397    }
2398 }
2399 
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2400 void lp_build_nir_soa(struct gallivm_state *gallivm,
2401                       struct nir_shader *shader,
2402                       const struct lp_build_tgsi_params *params,
2403                       LLVMValueRef (*outputs)[4])
2404 {
2405    struct lp_build_nir_soa_context bld;
2406    struct lp_type type = params->type;
2407    struct lp_type res_type;
2408 
2409    assert(type.length <= LP_MAX_VECTOR_LENGTH);
2410    memset(&res_type, 0, sizeof res_type);
2411    res_type.width = type.width;
2412    res_type.length = type.length;
2413    res_type.sign = 1;
2414 
2415    /* Setup build context */
2416    memset(&bld, 0, sizeof bld);
2417    lp_build_context_init(&bld.bld_base.base, gallivm, type);
2418    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2419    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2420    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2421    lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2422    {
2423       struct lp_type dbl_type;
2424       dbl_type = type;
2425       dbl_type.width *= 2;
2426       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2427    }
2428    {
2429       struct lp_type half_type;
2430       half_type = type;
2431       half_type.width /= 2;
2432       lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2433    }
2434    {
2435       struct lp_type uint64_type;
2436       uint64_type = lp_uint_type(type);
2437       uint64_type.width *= 2;
2438       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2439    }
2440    {
2441       struct lp_type int64_type;
2442       int64_type = lp_int_type(type);
2443       int64_type.width *= 2;
2444       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2445    }
2446    {
2447       struct lp_type uint16_type;
2448       uint16_type = lp_uint_type(type);
2449       uint16_type.width /= 2;
2450       lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2451    }
2452    {
2453       struct lp_type int16_type;
2454       int16_type = lp_int_type(type);
2455       int16_type.width /= 2;
2456       lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2457    }
2458    {
2459       struct lp_type uint8_type;
2460       uint8_type = lp_uint_type(type);
2461       uint8_type.width /= 4;
2462       lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2463    }
2464    {
2465       struct lp_type int8_type;
2466       int8_type = lp_int_type(type);
2467       int8_type.width /= 4;
2468       lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2469    }
2470    bld.bld_base.load_var = emit_load_var;
2471    bld.bld_base.store_var = emit_store_var;
2472    bld.bld_base.load_reg = emit_load_reg;
2473    bld.bld_base.store_reg = emit_store_reg;
2474    bld.bld_base.emit_var_decl = emit_var_decl;
2475    bld.bld_base.load_ubo = emit_load_ubo;
2476    bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2477    bld.bld_base.load_global = emit_load_global;
2478    bld.bld_base.store_global = emit_store_global;
2479    bld.bld_base.atomic_global = emit_atomic_global;
2480    bld.bld_base.tex = emit_tex;
2481    bld.bld_base.tex_size = emit_tex_size;
2482    bld.bld_base.bgnloop = bgnloop;
2483    bld.bld_base.endloop = endloop;
2484    bld.bld_base.if_cond = if_cond;
2485    bld.bld_base.else_stmt = else_stmt;
2486    bld.bld_base.endif_stmt = endif_stmt;
2487    bld.bld_base.break_stmt = break_stmt;
2488    bld.bld_base.continue_stmt = continue_stmt;
2489    bld.bld_base.sysval_intrin = emit_sysval_intrin;
2490    bld.bld_base.discard = discard;
2491    bld.bld_base.emit_vertex = emit_vertex;
2492    bld.bld_base.end_primitive = end_primitive;
2493    bld.bld_base.load_mem = emit_load_mem;
2494    bld.bld_base.store_mem = emit_store_mem;
2495    bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2496    bld.bld_base.atomic_mem = emit_atomic_mem;
2497    bld.bld_base.barrier = emit_barrier;
2498    bld.bld_base.image_op = emit_image_op;
2499    bld.bld_base.image_size = emit_image_size;
2500    bld.bld_base.vote = emit_vote;
2501    bld.bld_base.elect = emit_elect;
2502    bld.bld_base.reduce = emit_reduce;
2503    bld.bld_base.ballot = emit_ballot;
2504    bld.bld_base.read_invocation = emit_read_invocation;
2505    bld.bld_base.helper_invocation = emit_helper_invocation;
2506    bld.bld_base.interp_at = emit_interp_at;
2507    bld.bld_base.load_scratch = emit_load_scratch;
2508    bld.bld_base.store_scratch = emit_store_scratch;
2509 
2510    bld.mask = params->mask;
2511    bld.inputs = params->inputs;
2512    bld.outputs = outputs;
2513    bld.consts_ptr = params->consts_ptr;
2514    bld.const_sizes_ptr = params->const_sizes_ptr;
2515    bld.ssbo_ptr = params->ssbo_ptr;
2516    bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2517    bld.sampler = params->sampler;
2518 //   bld.bld_base.info = params->info;
2519 
2520    bld.context_ptr = params->context_ptr;
2521    bld.thread_data_ptr = params->thread_data_ptr;
2522    bld.bld_base.aniso_filter_table = params->aniso_filter_table;
2523    bld.image = params->image;
2524    bld.shared_ptr = params->shared_ptr;
2525    bld.coro = params->coro;
2526    bld.kernel_args_ptr = params->kernel_args;
2527    bld.indirects = 0;
2528    if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2529       bld.indirects |= nir_var_shader_in;
2530 
2531    bld.gs_iface = params->gs_iface;
2532    bld.tcs_iface = params->tcs_iface;
2533    bld.tes_iface = params->tes_iface;
2534    bld.fs_iface = params->fs_iface;
2535    if (bld.gs_iface) {
2536       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2537 
2538       bld.gs_vertex_streams = params->gs_vertex_streams;
2539       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2540                                                            shader->info.gs.vertices_out);
2541       for (int i = 0; i < params->gs_vertex_streams; i++) {
2542          bld.emitted_prims_vec_ptr[i] =
2543             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2544          bld.emitted_vertices_vec_ptr[i] =
2545             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2546          bld.total_emitted_vertices_vec_ptr[i] =
2547             lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2548       }
2549    }
2550    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2551 
2552    bld.system_values = *params->system_values;
2553 
2554    bld.bld_base.shader = shader;
2555 
2556    if (shader->scratch_size) {
2557       bld.scratch_ptr = lp_build_array_alloca(gallivm,
2558                                               LLVMInt8TypeInContext(gallivm->context),
2559                                               lp_build_const_int32(gallivm, shader->scratch_size * type.length),
2560                                               "scratch");
2561    }
2562    bld.scratch_size = shader->scratch_size;
2563    emit_prologue(&bld);
2564    lp_build_nir_llvm(&bld.bld_base, shader);
2565 
2566    if (bld.gs_iface) {
2567       LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2568       LLVMValueRef total_emitted_vertices_vec;
2569       LLVMValueRef emitted_prims_vec;
2570 
2571       for (int i = 0; i < params->gs_vertex_streams; i++) {
2572          end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2573 
2574          total_emitted_vertices_vec =
2575             LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
2576 
2577          emitted_prims_vec =
2578             LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
2579          bld.gs_iface->gs_epilogue(bld.gs_iface,
2580                                    total_emitted_vertices_vec,
2581                                    emitted_prims_vec, i);
2582       }
2583    }
2584    lp_exec_mask_fini(&bld.exec_mask);
2585 }
2586