1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_jit_types.h"
34 #include "lp_bld_arit.h"
35 #include "lp_bld_bitarit.h"
36 #include "lp_bld_coro.h"
37 #include "lp_bld_printf.h"
38 #include "lp_bld_intr.h"
39 #include "util/u_cpu_detect.h"
40 #include "util/u_math.h"
41
bit_size_to_shift_size(int bit_size)42 static int bit_size_to_shift_size(int bit_size)
43 {
44 switch (bit_size) {
45 case 64:
46 return 3;
47 default:
48 case 32:
49 return 2;
50 case 16:
51 return 1;
52 case 8:
53 return 0;
54 }
55 }
56
57 /*
58 * combine the execution mask if there is one with the current mask.
59 */
60 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)61 mask_vec(struct lp_build_nir_context *bld_base)
62 {
63 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
64 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
65 struct lp_exec_mask *exec_mask = &bld->exec_mask;
66 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
67 if (!exec_mask->has_mask) {
68 return bld_mask;
69 }
70 if (!bld_mask)
71 return exec_mask->exec_mask;
72 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
73 exec_mask->exec_mask, "");
74 }
75
76 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)77 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
78 {
79 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
80
81 /* Fragment shaders may dispatch with invocation 0 inactive. All other
82 * stages have invocation 0 active at the top. (See
83 * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
84 */
85 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
86 return false;
87
88 /* If we're in some control flow right now, then invocation 0 may be
89 * disabled.
90 */
91 if (bld->exec_mask.has_mask)
92 return false;
93
94 return true;
95 }
96
97 /**
98 * Returns a scalar value of the first active invocation in the exec_mask.
99 *
100 * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else
101 * branches thare are all false, or portions of a loop after a break/continue
102 * has ended the last invocation that had been active in the loop). In that
103 * case, we return a 0 value so that unconditional LLVMBuildExtractElement of
104 * the first_active_invocation (such as in memory loads, texture unit index
105 * lookups, etc) will use a valid index
106 */
first_active_invocation(struct lp_build_nir_context * bld_base)107 static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base)
108 {
109 struct gallivm_state *gallivm = bld_base->base.gallivm;
110 LLVMBuilderRef builder = gallivm->builder;
111 struct lp_build_context *uint_bld = &bld_base->uint_bld;
112
113 if (invocation_0_must_be_active(bld_base))
114 return lp_build_const_int32(gallivm, 0);
115
116 LLVMValueRef exec_mask = mask_vec(bld_base);
117
118 LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "exec_bitvec");
119 /* Turn it from N x i1 to iN, then extend it up to i32 so we can use a single
120 * cttz intrinsic -- I assume the compiler will drop the extend if there are
121 * smaller instructions available, since we have is_zero_poison.
122 */
123 bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask");
124 bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
125
126 LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
127
128 LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
129 LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false));
130
131 return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0");
132 }
133
134 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size,bool is_float)135 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float)
136 {
137 if (bit_size == 64)
138 return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
139 else if (bit_size == 16)
140 return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
141 else if (bit_size == 8)
142 return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
143 else
144 return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0);
145 }
146
147 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)148 emit_fetch_64bit(
149 struct lp_build_nir_context * bld_base,
150 LLVMValueRef input,
151 LLVMValueRef input2)
152 {
153 struct gallivm_state *gallivm = bld_base->base.gallivm;
154 LLVMBuilderRef builder = gallivm->builder;
155 LLVMValueRef res;
156 int i;
157 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
158 int len = bld_base->base.type.length * 2;
159 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
160
161 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
162 #if UTIL_ARCH_LITTLE_ENDIAN
163 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
164 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
165 #else
166 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
167 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
168 #endif
169 }
170 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
171
172 return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
173 }
174
175 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])176 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
177 LLVMValueRef value,
178 LLVMValueRef split_values[2])
179 {
180 struct gallivm_state *gallivm = bld_base->base.gallivm;
181 LLVMBuilderRef builder = gallivm->builder;
182 unsigned i;
183 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
184 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
185 int len = bld_base->base.type.length * 2;
186
187 value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
188 for (i = 0; i < bld_base->base.type.length; i++) {
189 #if UTIL_ARCH_LITTLE_ENDIAN
190 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
191 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
192 #else
193 shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
194 shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
195 #endif
196 }
197
198 split_values[0] = LLVMBuildShuffleVector(builder, value,
199 LLVMGetUndef(LLVMTypeOf(value)),
200 LLVMConstVector(shuffles,
201 bld_base->base.type.length),
202 "");
203 split_values[1] = LLVMBuildShuffleVector(builder, value,
204 LLVMGetUndef(LLVMTypeOf(value)),
205 LLVMConstVector(shuffles2,
206 bld_base->base.type.length),
207 "");
208 }
209
210 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)211 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
212 LLVMValueRef chan_ptr,
213 LLVMValueRef chan_ptr2,
214 LLVMValueRef value)
215 {
216 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
217 struct lp_build_context *float_bld = &bld_base->base;
218 LLVMValueRef split_vals[2];
219
220 emit_store_64bit_split(bld_base, value, split_vals);
221
222 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
223 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
224 }
225
226 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)227 get_soa_array_offsets(struct lp_build_context *uint_bld,
228 LLVMValueRef indirect_index,
229 int num_components,
230 unsigned chan_index,
231 bool need_perelement_offset)
232 {
233 struct gallivm_state *gallivm = uint_bld->gallivm;
234 LLVMValueRef chan_vec =
235 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
236 LLVMValueRef length_vec =
237 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
238 LLVMValueRef index_vec;
239
240 /* index_vec = (indirect_index * num_components + chan_index) * length + offsets */
241 index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
242 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
243 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
244
245 if (need_perelement_offset) {
246 LLVMValueRef pixel_offsets;
247 unsigned i;
248 /* build pixel offset vector: {0, 1, 2, 3, ...} */
249 pixel_offsets = uint_bld->undef;
250 for (i = 0; i < uint_bld->type.length; i++) {
251 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
252 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
253 ii, ii, "");
254 }
255 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
256 }
257 return index_vec;
258 }
259
260 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMTypeRef base_type,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)261 build_gather(struct lp_build_nir_context *bld_base,
262 struct lp_build_context *bld,
263 LLVMTypeRef base_type,
264 LLVMValueRef base_ptr,
265 LLVMValueRef indexes,
266 LLVMValueRef overflow_mask,
267 LLVMValueRef indexes2)
268 {
269 struct gallivm_state *gallivm = bld_base->base.gallivm;
270 LLVMBuilderRef builder = gallivm->builder;
271 struct lp_build_context *uint_bld = &bld_base->uint_bld;
272 LLVMValueRef res;
273 unsigned i;
274
275 if (indexes2)
276 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
277 else
278 res = bld->undef;
279 /*
280 * overflow_mask is a vector telling us which channels
281 * in the vector overflowed. We use the overflow behavior for
282 * constant buffers which is defined as:
283 * Out of bounds access to constant buffer returns 0 in all
284 * components. Out of bounds behavior is always with respect
285 * to the size of the buffer bound at that slot.
286 */
287
288 if (overflow_mask) {
289 /*
290 * We avoid per-element control flow here (also due to llvm going crazy,
291 * though I suspect it's better anyway since overflow is likely rare).
292 * Note that since we still fetch from buffers even if num_elements was
293 * zero (in this case we'll fetch from index zero) the jit func callers
294 * MUST provide valid fake constant buffers of size 4x32 (the values do
295 * not matter), otherwise we'd still need (not per element though)
296 * control flow.
297 */
298 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
299 if (indexes2)
300 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
301 }
302
303 /*
304 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
305 */
306 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
307 LLVMValueRef si, di;
308 LLVMValueRef index;
309 LLVMValueRef scalar_ptr, scalar;
310
311 di = lp_build_const_int32(gallivm, i);
312 if (indexes2)
313 si = lp_build_const_int32(gallivm, i >> 1);
314 else
315 si = di;
316
317 if (indexes2 && (i & 1)) {
318 index = LLVMBuildExtractElement(builder,
319 indexes2, si, "");
320 } else {
321 index = LLVMBuildExtractElement(builder,
322 indexes, si, "");
323 }
324
325 scalar_ptr = LLVMBuildGEP2(builder, base_type, base_ptr, &index, 1, "gather_ptr");
326 scalar = LLVMBuildLoad2(builder, base_type, scalar_ptr, "");
327
328 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
329 }
330
331 if (overflow_mask) {
332 if (indexes2) {
333 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
334 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
335 bld_base->dbl_bld.int_vec_type, "");
336 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
337 bld_base->dbl_bld.zero, res);
338 } else
339 res = lp_build_select(bld, overflow_mask, bld->zero, res);
340 }
341
342 return res;
343 }
344
345 /**
346 * Scatter/store vector.
347 */
348 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)349 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
350 LLVMValueRef base_ptr,
351 LLVMValueRef indexes,
352 LLVMValueRef values,
353 struct lp_exec_mask *mask)
354 {
355 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
356 LLVMBuilderRef builder = gallivm->builder;
357 unsigned i;
358 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
359
360 /*
361 * Loop over elements of index_vec, store scalar value.
362 */
363 for (i = 0; i < bld->bld_base.base.type.length; i++) {
364 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
365 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
366 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
367 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(val), base_ptr, &index, 1, "scatter_ptr");
368 LLVMValueRef scalar_pred = pred ?
369 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
370
371 if (0)
372 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
373 ii, val, index, scalar_ptr);
374
375 if (scalar_pred) {
376 LLVMValueRef real_val, dst_val;
377 dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
378 scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
379 real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
380 LLVMBuildStore(builder, real_val, scalar_ptr);
381 }
382 else {
383 LLVMBuildStore(builder, val, scalar_ptr);
384 }
385 }
386 }
387
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])388 static void emit_load_var(struct lp_build_nir_context *bld_base,
389 nir_variable_mode deref_mode,
390 unsigned num_components,
391 unsigned bit_size,
392 nir_variable *var,
393 unsigned vertex_index,
394 LLVMValueRef indir_vertex_index,
395 unsigned const_index,
396 LLVMValueRef indir_index,
397 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
398 {
399 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
400 struct gallivm_state *gallivm = bld_base->base.gallivm;
401 int dmul = bit_size == 64 ? 2 : 1;
402 unsigned location = var->data.driver_location;
403 unsigned location_frac = var->data.location_frac;
404
405 if (!var->data.compact && !indir_index)
406 location += const_index;
407 else if (var->data.compact) {
408 location += const_index / 4;
409 location_frac += const_index % 4;
410 const_index = 0;
411 }
412 switch (deref_mode) {
413 case nir_var_shader_in:
414 for (unsigned i = 0; i < num_components; i++) {
415 int idx = (i * dmul) + location_frac;
416 int comp_loc = location;
417
418 if (bit_size == 64 && idx >= 4) {
419 comp_loc++;
420 idx = idx % 4;
421 }
422
423 if (bld->gs_iface) {
424 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
425 LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
426 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
427 LLVMValueRef result2;
428
429 result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
430 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
431 if (bit_size == 64) {
432 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
433 result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
434 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
435 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
436 }
437 } else if (bld->tes_iface) {
438 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
439 LLVMValueRef attrib_index_val;
440 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
441 LLVMValueRef result2;
442
443 if (indir_index) {
444 if (var->data.compact) {
445 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447 } else
448 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
449 } else
450 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451
452 if (var->data.patch) {
453 result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
454 indir_index ? true : false, attrib_index_val, swizzle_index_val);
455 if (bit_size == 64) {
456 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457 result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
458 indir_index ? true : false, attrib_index_val, swizzle_index_val);
459 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
460 }
461 }
462 else {
463 result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
464 indir_vertex_index ? true : false,
465 indir_vertex_index ? indir_vertex_index : vertex_index_val,
466 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
467 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
468 if (bit_size == 64) {
469 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
470 result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
471 indir_vertex_index ? true : false,
472 indir_vertex_index ? indir_vertex_index : vertex_index_val,
473 indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
474 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
475 }
476 }
477 } else if (bld->tcs_iface) {
478 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
479 LLVMValueRef attrib_index_val;
480 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
481
482 if (indir_index) {
483 if (var->data.compact) {
484 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
485 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
486 } else
487 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
488 } else
489 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
490 result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
491 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
492 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
493 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
494 if (bit_size == 64) {
495 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
496 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
497 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
498 indir_index ? true : false, attrib_index_val,
499 false, swizzle_index_val);
500 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
501 }
502 } else {
503 if (indir_index) {
504 LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
505 LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
506 attrib_index_val, 4, idx,
507 true);
508 LLVMValueRef index_vec2 = NULL;
509 LLVMTypeRef scalar_type = LLVMFloatTypeInContext(gallivm->context);
510 LLVMValueRef inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, LLVMPointerType(scalar_type, 0), "");
511
512 if (bit_size == 64)
513 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
514 indir_index, 4, idx + 1, true);
515
516 /* Gather values from the input register array */
517 result[i] = build_gather(bld_base, &bld_base->base, scalar_type, inputs_array, index_vec, NULL, index_vec2);
518 } else {
519 if (bld->indirects & nir_var_shader_in) {
520 LLVMValueRef lindex = lp_build_const_int32(gallivm,
521 comp_loc * 4 + idx);
522 LLVMValueRef input_ptr = lp_build_pointer_get2(gallivm->builder,
523 bld->bld_base.base.vec_type,
524 bld->inputs_array, lindex);
525 if (bit_size == 64) {
526 LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
527 comp_loc * 4 + (idx + 1));
528 LLVMValueRef input_ptr2 = lp_build_pointer_get2(gallivm->builder,
529 bld->bld_base.base.vec_type,
530 bld->inputs_array, lindex2);
531 result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
532 } else {
533 result[i] = input_ptr;
534 }
535 } else {
536 if (bit_size == 64) {
537 LLVMValueRef tmp[2];
538 tmp[0] = bld->inputs[comp_loc][idx];
539 tmp[1] = bld->inputs[comp_loc][idx + 1];
540 result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
541 } else {
542 result[i] = bld->inputs[comp_loc][idx];
543 }
544 }
545 }
546 }
547 }
548 break;
549 case nir_var_shader_out:
550 if (bld->fs_iface && bld->fs_iface->fb_fetch) {
551 bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
552 return;
553 }
554 for (unsigned i = 0; i < num_components; i++) {
555 int idx = (i * dmul) + location_frac;
556 if (bld->tcs_iface) {
557 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
558 LLVMValueRef attrib_index_val;
559 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
560
561 if (indir_index)
562 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
563 else
564 attrib_index_val = lp_build_const_int32(gallivm, location);
565
566 result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
567 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
568 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
569 (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
570 if (bit_size == 64) {
571 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
572 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
573 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
574 indir_index ? true : false, attrib_index_val,
575 false, swizzle_index_val, 0);
576 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
577 }
578 }
579 }
580 break;
581 default:
582 break;
583 }
584 }
585
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)586 static void emit_store_chan(struct lp_build_nir_context *bld_base,
587 nir_variable_mode deref_mode,
588 unsigned bit_size,
589 unsigned location, unsigned comp,
590 unsigned chan,
591 LLVMValueRef dst)
592 {
593 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
594 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
595 struct lp_build_context *float_bld = &bld_base->base;
596
597 if (bit_size == 64) {
598 chan *= 2;
599 chan += comp;
600 if (chan >= 4) {
601 chan -= 4;
602 location++;
603 }
604 emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
605 bld->outputs[location][chan + 1], dst);
606 } else {
607 dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
608 lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
609 bld->outputs[location][chan + comp]);
610 }
611 }
612
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)613 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
614 bool is_compact,
615 unsigned bit_size,
616 unsigned location,
617 unsigned const_index,
618 LLVMValueRef indir_vertex_index,
619 LLVMValueRef indir_index,
620 unsigned comp,
621 unsigned chan,
622 LLVMValueRef chan_val)
623 {
624 struct gallivm_state *gallivm = bld_base->base.gallivm;
625 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
626 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
627 unsigned swizzle = chan;
628 if (bit_size == 64) {
629 swizzle *= 2;
630 swizzle += comp;
631 if (swizzle >= 4) {
632 swizzle -= 4;
633 location++;
634 }
635 } else
636 swizzle += comp;
637 LLVMValueRef attrib_index_val;
638 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
639
640 if (indir_index) {
641 if (is_compact) {
642 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
643 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
644 } else
645 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
646 } else
647 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
648 LLVMValueRef exec_mask = mask_vec(bld_base);
649 if (bit_size == 64) {
650 LLVMValueRef split_vals[2];
651 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
652 emit_store_64bit_split(bld_base, chan_val, split_vals);
653 if (bld->mesh_iface) {
654 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
655 indir_vertex_index ? true : false,
656 indir_vertex_index,
657 indir_index ? true : false,
658 attrib_index_val,
659 false, swizzle_index_val,
660 split_vals[0], exec_mask);
661 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
662 indir_vertex_index ? true : false,
663 indir_vertex_index,
664 indir_index ? true : false,
665 attrib_index_val,
666 false, swizzle_index_val2,
667 split_vals[1], exec_mask);
668 } else {
669 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
670 indir_vertex_index ? true : false,
671 indir_vertex_index,
672 indir_index ? true : false,
673 attrib_index_val,
674 false, swizzle_index_val,
675 split_vals[0], exec_mask);
676 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
677 indir_vertex_index ? true : false,
678 indir_vertex_index,
679 indir_index ? true : false,
680 attrib_index_val,
681 false, swizzle_index_val2,
682 split_vals[1], exec_mask);
683 }
684 } else {
685 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
686 if (bld->mesh_iface) {
687 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
688 indir_vertex_index ? true : false,
689 indir_vertex_index,
690 indir_index && !is_compact ? true : false,
691 attrib_index_val,
692 indir_index && is_compact ? true : false,
693 swizzle_index_val,
694 chan_val, exec_mask);
695 } else {
696 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
697 indir_vertex_index ? true : false,
698 indir_vertex_index,
699 indir_index && !is_compact ? true : false,
700 attrib_index_val,
701 indir_index && is_compact ? true : false,
702 swizzle_index_val,
703 chan_val, exec_mask);
704 }
705 }
706 }
707
emit_store_mesh_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)708 static void emit_store_mesh_chan(struct lp_build_nir_context *bld_base,
709 bool is_compact,
710 unsigned bit_size,
711 unsigned location,
712 unsigned const_index,
713 LLVMValueRef indir_vertex_index,
714 LLVMValueRef indir_index,
715 unsigned comp,
716 unsigned chan,
717 LLVMValueRef chan_val)
718 {
719 struct gallivm_state *gallivm = bld_base->base.gallivm;
720 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
721 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
722 unsigned swizzle = chan;
723 if (bit_size == 64) {
724 swizzle += const_index;
725 swizzle *= 2;
726 swizzle += comp;
727 if (swizzle >= 4) {
728 swizzle -= 4;
729 location++;
730 }
731 } else
732 swizzle += comp;
733 LLVMValueRef attrib_index_val;
734 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
735
736 if (indir_index) {
737 if (is_compact) {
738 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
739 attrib_index_val = lp_build_const_int32(gallivm, location);
740 } else
741 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
742 } else
743 attrib_index_val = lp_build_const_int32(gallivm, location + const_index);
744 LLVMValueRef exec_mask = mask_vec(bld_base);
745 if (bit_size == 64) {
746 LLVMValueRef split_vals[2];
747 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
748 emit_store_64bit_split(bld_base, chan_val, split_vals);
749 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
750 indir_vertex_index ? true : false,
751 indir_vertex_index,
752 indir_index ? true : false,
753 attrib_index_val,
754 false, swizzle_index_val,
755 split_vals[0], exec_mask);
756 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
757 indir_vertex_index ? true : false,
758 indir_vertex_index,
759 indir_index ? true : false,
760 attrib_index_val,
761 false, swizzle_index_val2,
762 split_vals[1], exec_mask);
763 } else {
764 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
765 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
766 indir_vertex_index ? true : false,
767 indir_vertex_index,
768 indir_index && !is_compact ? true : false,
769 attrib_index_val,
770 indir_index && is_compact ? true : false,
771 swizzle_index_val,
772 chan_val, exec_mask);
773 }
774 }
775
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)776 static void emit_store_var(struct lp_build_nir_context *bld_base,
777 nir_variable_mode deref_mode,
778 unsigned num_components,
779 unsigned bit_size,
780 nir_variable *var,
781 unsigned writemask,
782 LLVMValueRef indir_vertex_index,
783 unsigned const_index,
784 LLVMValueRef indir_index,
785 LLVMValueRef dst)
786 {
787 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
788 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
789 switch (deref_mode) {
790 case nir_var_shader_out: {
791 unsigned location = var->data.driver_location;
792 unsigned comp = var->data.location_frac;
793 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
794 if (var->data.location == FRAG_RESULT_STENCIL)
795 comp = 1;
796 else if (var->data.location == FRAG_RESULT_DEPTH)
797 comp = 2;
798 }
799
800 if (var->data.compact) {
801 location += const_index / 4;
802 comp += const_index % 4;
803 const_index = 0;
804 }
805
806 for (unsigned chan = 0; chan < num_components; chan++) {
807 if (writemask & (1u << chan)) {
808 LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
809 if (bld->mesh_iface) {
810 emit_store_mesh_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
811 } else if (bld->tcs_iface) {
812 emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
813 } else
814 emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
815 }
816 }
817 break;
818 }
819 default:
820 break;
821 }
822 }
823
824 /**
825 * Returns the address of the given constant array index and channel in a
826 * nir register.
827 */
reg_chan_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,LLVMValueRef reg_storage,int array_index,int chan)828 static LLVMValueRef reg_chan_pointer(struct lp_build_nir_context *bld_base,
829 struct lp_build_context *reg_bld,
830 const nir_intrinsic_instr *decl,
831 LLVMValueRef reg_storage,
832 int array_index, int chan)
833 {
834 struct gallivm_state *gallivm = bld_base->base.gallivm;
835 int nc = nir_intrinsic_num_components(decl);
836 int num_array_elems = nir_intrinsic_num_array_elems(decl);
837
838 LLVMTypeRef chan_type = reg_bld->vec_type;
839 if (nc > 1)
840 chan_type = LLVMArrayType(chan_type, nc);
841
842 if (num_array_elems > 0) {
843 LLVMTypeRef array_type = LLVMArrayType(chan_type, num_array_elems);
844 reg_storage = lp_build_array_get_ptr2(gallivm, array_type, reg_storage,
845 lp_build_const_int32(gallivm, array_index));
846 }
847 if (nc > 1) {
848 reg_storage = lp_build_array_get_ptr2(gallivm, chan_type, reg_storage,
849 lp_build_const_int32(gallivm, chan));
850 }
851
852 return reg_storage;
853 }
854
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage)855 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
856 struct lp_build_context *reg_bld,
857 const nir_intrinsic_instr *decl,
858 unsigned base,
859 LLVMValueRef indir_src,
860 LLVMValueRef reg_storage)
861 {
862 struct gallivm_state *gallivm = bld_base->base.gallivm;
863 LLVMBuilderRef builder = gallivm->builder;
864 int nc = nir_intrinsic_num_components(decl);
865 int num_array_elems = nir_intrinsic_num_array_elems(decl);
866 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
867 struct lp_build_context *uint_bld = &bld_base->uint_bld;
868 if (indir_src != NULL) {
869 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
870 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
871 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
872 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
873 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
874 for (unsigned i = 0; i < nc; i++) {
875 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
876 vals[i] = build_gather(bld_base, reg_bld, reg_bld->elem_type, reg_storage, indirect_offset, NULL, NULL);
877 }
878 } else {
879 for (unsigned i = 0; i < nc; i++) {
880 vals[i] = LLVMBuildLoad2(builder, reg_bld->vec_type,
881 reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
882 base, i), "");
883 }
884 }
885 return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
886 }
887
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned writemask,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])888 static void emit_store_reg(struct lp_build_nir_context *bld_base,
889 struct lp_build_context *reg_bld,
890 const nir_intrinsic_instr *decl,
891 unsigned writemask,
892 unsigned base,
893 LLVMValueRef indir_src,
894 LLVMValueRef reg_storage,
895 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
896 {
897 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
898 struct gallivm_state *gallivm = bld_base->base.gallivm;
899 LLVMBuilderRef builder = gallivm->builder;
900 struct lp_build_context *uint_bld = &bld_base->uint_bld;
901 int nc = nir_intrinsic_num_components(decl);
902 int num_array_elems = nir_intrinsic_num_array_elems(decl);
903 if (indir_src != NULL) {
904 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
905 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
906 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
907 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
908 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
909 for (unsigned i = 0; i < nc; i++) {
910 if (!(writemask & (1 << i)))
911 continue;
912 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
913 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
914 emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
915 }
916 return;
917 }
918
919 for (unsigned i = 0; i < nc; i++) {
920 if (!(writemask & (1 << i)))
921 continue;
922 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
923 lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i],
924 reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
925 base, i));
926 }
927 }
928
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])929 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
930 unsigned nc,
931 unsigned bit_size,
932 unsigned offset_bit_size,
933 bool offset_is_uniform,
934 LLVMValueRef offset,
935 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
936 {
937 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
938 struct gallivm_state *gallivm = bld_base->base.gallivm;
939 LLVMBuilderRef builder = gallivm->builder;
940 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
941 LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
942 unsigned size_shift = bit_size_to_shift_size(bit_size);
943 struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
944 if (size_shift)
945 offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
946
947 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
948 kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
949
950 if (offset_is_uniform) {
951 offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
952
953 for (unsigned c = 0; c < nc; c++) {
954 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
955
956 LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset);
957 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
958 }
959 } else {
960 unreachable("load_kernel_arg must have a uniform offset.");
961 }
962 }
963
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)964 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
965 {
966 LLVMBuilderRef builder = gallivm->builder;
967 switch (bit_size) {
968 case 8:
969 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
970 break;
971 case 16:
972 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
973 break;
974 case 32:
975 default:
976 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
977 break;
978 case 64:
979 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
980 break;
981 }
982 return addr_ptr;
983 }
984
global_addr_to_ptr_vec(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned length,unsigned bit_size)985 static LLVMValueRef global_addr_to_ptr_vec(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned length, unsigned bit_size)
986 {
987 LLVMBuilderRef builder = gallivm->builder;
988 switch (bit_size) {
989 case 8:
990 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), length), "");
991 break;
992 case 16:
993 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), length), "");
994 break;
995 case 32:
996 default:
997 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), length), "");
998 break;
999 case 64:
1000 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), length), "");
1001 break;
1002 }
1003 return addr_ptr;
1004 }
1005
lp_vec_add_offset_ptr(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef ptr,LLVMValueRef offset)1006 static LLVMValueRef lp_vec_add_offset_ptr(struct lp_build_nir_context *bld_base,
1007 unsigned bit_size,
1008 LLVMValueRef ptr,
1009 LLVMValueRef offset)
1010 {
1011 unsigned pointer_size = 8 * sizeof(void *);
1012 struct gallivm_state *gallivm = bld_base->base.gallivm;
1013 LLVMBuilderRef builder = gallivm->builder;
1014 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1015 struct lp_build_context *ptr_bld = get_int_bld(bld_base, true, pointer_size);
1016 LLVMValueRef result = LLVMBuildPtrToInt(builder, ptr, ptr_bld->vec_type, "");
1017 if (pointer_size == 64)
1018 offset = LLVMBuildZExt(builder, offset, ptr_bld->vec_type, "");
1019 result = LLVMBuildAdd(builder, offset, result, "");
1020 return global_addr_to_ptr_vec(gallivm, result, uint_bld->type.length, bit_size);
1021 }
1022
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1023 static void emit_load_global(struct lp_build_nir_context *bld_base,
1024 unsigned nc,
1025 unsigned bit_size,
1026 unsigned addr_bit_size,
1027 bool offset_is_uniform,
1028 LLVMValueRef addr,
1029 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1030 {
1031 struct gallivm_state *gallivm = bld_base->base.gallivm;
1032 LLVMBuilderRef builder = gallivm->builder;
1033 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1034 struct lp_build_context *res_bld;
1035 LLVMValueRef exec_mask = mask_vec(bld_base);
1036
1037 res_bld = get_int_bld(bld_base, true, bit_size);
1038
1039 /* Note, we don't use first_active_invocation here, since we aren't
1040 * guaranteed that there is actually an active invocation.
1041 */
1042 if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1043 /* If the offset is uniform, then use the address from invocation 0 to
1044 * load, and broadcast to all invocations.
1045 */
1046 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1047 lp_build_const_int32(gallivm, 0), "");
1048 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
1049
1050 for (unsigned c = 0; c < nc; c++) {
1051 LLVMValueRef scalar = lp_build_pointer_get2(builder, res_bld->elem_type,
1052 addr_ptr, lp_build_const_int32(gallivm, c));
1053 outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
1054 }
1055 return;
1056 }
1057
1058 for (unsigned c = 0; c < nc; c++) {
1059 LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1060
1061 outval[c] = lp_build_masked_gather(gallivm, res_bld->type.length,
1062 bit_size,
1063 res_bld->vec_type,
1064 lp_vec_add_offset_ptr(bld_base, bit_size, addr, chan_offset),
1065 exec_mask);
1066 outval[c] = LLVMBuildBitCast(builder, outval[c], res_bld->vec_type, "");
1067 }
1068 }
1069
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)1070 static void emit_store_global(struct lp_build_nir_context *bld_base,
1071 unsigned writemask,
1072 unsigned nc, unsigned bit_size,
1073 unsigned addr_bit_size,
1074 LLVMValueRef addr,
1075 LLVMValueRef dst)
1076 {
1077 struct gallivm_state *gallivm = bld_base->base.gallivm;
1078 LLVMBuilderRef builder = gallivm->builder;
1079 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1080 LLVMValueRef exec_mask = mask_vec(bld_base);
1081
1082 for (unsigned c = 0; c < nc; c++) {
1083 if (!(writemask & (1u << c)))
1084 continue;
1085 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1086 LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1087
1088 struct lp_build_context *out_bld = get_int_bld(bld_base, false, bit_size);
1089 val = LLVMBuildBitCast(builder, val, out_bld->vec_type, "");
1090 lp_build_masked_scatter(gallivm, out_bld->type.length, bit_size,
1091 lp_vec_add_offset_ptr(bld_base, bit_size,
1092 addr, chan_offset),
1093 val, exec_mask);
1094 }
1095 }
1096
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1097 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
1098 nir_atomic_op nir_op,
1099 unsigned addr_bit_size,
1100 unsigned val_bit_size,
1101 LLVMValueRef addr,
1102 LLVMValueRef val, LLVMValueRef val2,
1103 LLVMValueRef *result)
1104 {
1105 struct gallivm_state *gallivm = bld_base->base.gallivm;
1106 LLVMBuilderRef builder = gallivm->builder;
1107 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1108 bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
1109 struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
1110 if (is_flt)
1111 val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
1112
1113 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1114 atom_bld->vec_type, "");
1115 LLVMValueRef exec_mask = mask_vec(bld_base);
1116 struct lp_build_loop_state loop_state;
1117 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1118
1119 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1120 loop_state.counter, "");
1121 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atom_bld->elem_type, "");
1122
1123 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1124 loop_state.counter, "");
1125 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
1126 struct lp_build_if_state ifthen;
1127 LLVMValueRef cond, temp_res;
1128 LLVMValueRef scalar;
1129 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1130 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1131 lp_build_if(&ifthen, gallivm, cond);
1132
1133 addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
1134 if (val2 != NULL /* compare-and-swap */) {
1135 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1136 loop_state.counter, "");
1137 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
1138 scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
1139 cas_src_ptr,
1140 LLVMAtomicOrderingSequentiallyConsistent,
1141 LLVMAtomicOrderingSequentiallyConsistent,
1142 false);
1143 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1144 } else {
1145 scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1146 addr_ptr, value_ptr,
1147 LLVMAtomicOrderingSequentiallyConsistent,
1148 false);
1149 }
1150 temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1151 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1152 LLVMBuildStore(builder, temp_res, atom_res);
1153 lp_build_else(&ifthen);
1154 temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1155 LLVMValueRef zero_val = lp_build_zero_bits(gallivm, val_bit_size, is_flt);
1156 temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1157 LLVMBuildStore(builder, temp_res, atom_res);
1158 lp_build_endif(&ifthen);
1159 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1160 NULL, LLVMIntUGE);
1161 *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1162 }
1163
1164 /* Returns a boolean for whether the offset is in range of the given limit for
1165 * SSBO/UBO dereferences.
1166 */
1167 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1168 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1169 LLVMValueRef offset,
1170 LLVMValueRef limit)
1171 {
1172 struct gallivm_state *gallivm = bld_base->base.gallivm;
1173 LLVMBuilderRef builder = gallivm->builder;
1174
1175 LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1176 LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1177 LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1178 return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1179 }
1180
1181 static LLVMValueRef
build_resource_to_scalar(struct lp_build_nir_context * bld_base,LLVMValueRef resource)1182 build_resource_to_scalar(struct lp_build_nir_context *bld_base, LLVMValueRef resource)
1183 {
1184 struct gallivm_state *gallivm = bld_base->base.gallivm;
1185
1186 LLVMValueRef invocation = first_active_invocation(bld_base);
1187
1188 LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, resource, 0, "");
1189 set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1190
1191 LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, resource, 1, "");
1192 binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1193
1194 LLVMValueRef components[2] = { set, binding };
1195 return lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1196 }
1197
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1198 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1199 unsigned nc,
1200 unsigned bit_size,
1201 bool offset_is_uniform,
1202 LLVMValueRef index,
1203 LLVMValueRef offset,
1204 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1205 {
1206 if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind)
1207 index = build_resource_to_scalar(bld_base, index);
1208
1209 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1210 struct gallivm_state *gallivm = bld_base->base.gallivm;
1211 LLVMBuilderRef builder = gallivm->builder;
1212 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1213 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1214 LLVMValueRef consts_ptr = lp_llvm_buffer_base(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1215 LLVMValueRef num_consts = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1216 unsigned size_shift = bit_size_to_shift_size(bit_size);
1217 if (size_shift)
1218 offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1219
1220 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1221 consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1222
1223 if (offset_is_uniform) {
1224 offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
1225 struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1226 switch (bit_size) {
1227 case 8:
1228 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1229 break;
1230 case 16:
1231 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1232 break;
1233 case 64:
1234 num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1235 break;
1236 default: break;
1237 }
1238 for (unsigned c = 0; c < nc; c++) {
1239 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1240
1241 LLVMValueRef scalar;
1242 /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1243 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1244 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1245 LLVMBuildStore(builder, zero, res_store);
1246
1247 struct lp_build_if_state ifthen;
1248 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1249 LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type,
1250 consts_ptr, chan_offset), res_store);
1251 lp_build_endif(&ifthen);
1252
1253 scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1254
1255 result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1256 }
1257 } else {
1258 LLVMValueRef overflow_mask;
1259
1260 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1261 if (bit_size == 64)
1262 num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1263 else if (bit_size == 16)
1264 num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1265 else if (bit_size == 8)
1266 num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1267
1268 for (unsigned c = 0; c < nc; c++) {
1269 LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1270 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1271 this_offset, num_consts);
1272 result[c] = build_gather(bld_base, bld_broad, bld_broad->elem_type, consts_ptr, this_offset, overflow_mask, NULL);
1273 }
1274 }
1275 }
1276
1277 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1278 emit_load_const(struct lp_build_nir_context *bld_base,
1279 const nir_load_const_instr *instr,
1280 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1281 {
1282 struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1283 const unsigned bits = instr->def.bit_size;
1284
1285 for (unsigned i = 0; i < instr->def.num_components; i++) {
1286 outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1287 bits == 32 ? instr->value[i].u32
1288 : instr->value[i].u64);
1289 }
1290 for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1291 outval[i] = NULL;
1292 }
1293 }
1294
1295 /**
1296 * Get the base address of SSBO[@index] for the @invocation channel, returning
1297 * the address and also the bounds (in units of the bit_size).
1298 */
1299 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1300 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1301 unsigned bit_size,
1302 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1303 {
1304 struct gallivm_state *gallivm = bld_base->base.gallivm;
1305 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1306 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1307
1308 LLVMValueRef ssbo_idx;
1309 LLVMValueRef buffers;
1310 uint32_t buffers_limit;
1311 if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) {
1312 LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, index, 0, "");
1313 set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1314
1315 LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, index, 1, "");
1316 binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1317
1318 LLVMValueRef components[2] = { set, binding };
1319 ssbo_idx = lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1320
1321 buffers = bld->consts_ptr;
1322 buffers_limit = LP_MAX_TGSI_CONST_BUFFERS;
1323 } else {
1324 ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1325
1326 buffers = bld->ssbo_ptr;
1327 buffers_limit = LP_MAX_TGSI_SHADER_BUFFERS;
1328 }
1329
1330 LLVMValueRef ssbo_size_ptr = lp_llvm_buffer_num_elements(gallivm, buffers, ssbo_idx, buffers_limit);
1331 LLVMValueRef ssbo_ptr = lp_llvm_buffer_base(gallivm, buffers, ssbo_idx, buffers_limit);
1332 if (bounds)
1333 *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1334
1335 return ssbo_ptr;
1336 }
1337
1338 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,bool payload,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1339 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1340 struct lp_build_context *mem_bld,
1341 unsigned bit_size, bool payload,
1342 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1343 {
1344 struct gallivm_state *gallivm = bld_base->base.gallivm;
1345 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1346 LLVMValueRef ptr;
1347
1348 if (index) {
1349 ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1350 } else {
1351 if (payload) {
1352 ptr = bld->payload_ptr;
1353 ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
1354 ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
1355 ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1356 }
1357 else
1358 ptr = bld->shared_ptr;
1359 *bounds = NULL;
1360 }
1361
1362 /* Cast it to the pointer type of the access this instruction is doing. */
1363 if (bit_size == 32 && !mem_bld->type.floating)
1364 return ptr;
1365 else
1366 return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1367 }
1368
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1369 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1370 unsigned nc,
1371 unsigned bit_size,
1372 bool index_and_offset_are_uniform,
1373 bool payload,
1374 LLVMValueRef index,
1375 LLVMValueRef offset,
1376 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1377 {
1378 struct gallivm_state *gallivm = bld_base->base.gallivm;
1379 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1380 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1381 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1382 struct lp_build_context *load_bld;
1383 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1384
1385 load_bld = get_int_bld(bld_base, true, bit_size);
1386
1387 offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1388
1389 /* If the address is uniform, then use the address from the first active
1390 * invocation 0 to load, and broadcast to all invocations. We can't do
1391 * computed first active invocation for shared accesses (index == NULL),
1392 * though, since those don't do bounds checking and we could use an invalid
1393 * offset if exec_mask == 0.
1394 */
1395 if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) {
1396 LLVMValueRef ssbo_limit;
1397 LLVMValueRef first_active = first_active_invocation(bld_base);
1398 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1399 first_active, &ssbo_limit);
1400
1401 offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, "");
1402
1403 for (unsigned c = 0; c < nc; c++) {
1404 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1405
1406 LLVMValueRef scalar;
1407 /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1408 if (ssbo_limit) {
1409 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1410 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1411 LLVMBuildStore(builder, zero, res_store);
1412
1413 struct lp_build_if_state ifthen;
1414 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1415 LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store);
1416 lp_build_endif(&ifthen);
1417
1418 scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1419 } else {
1420 scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset);
1421 }
1422
1423 outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1424 }
1425 return;
1426 }
1427
1428 /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1429
1430 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1431 for (unsigned c = 0; c < nc; c++)
1432 result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1433
1434 LLVMValueRef exec_mask = mask_vec(bld_base);
1435
1436 /* mask the offset to prevent invalid reads */
1437 offset = LLVMBuildAnd(gallivm->builder, offset, exec_mask, "");
1438
1439 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1440 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1441
1442 LLVMValueRef ssbo_limit;
1443 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1444 counter, &ssbo_limit);
1445
1446 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1447
1448 for (unsigned c = 0; c < nc; c++) {
1449 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1450 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1451 if (ssbo_limit) {
1452 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1453 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1454 }
1455
1456 struct lp_build_if_state ifthen;
1457 LLVMValueRef fetch_cond, temp_res;
1458
1459 fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1460
1461 lp_build_if(&ifthen, gallivm, fetch_cond);
1462 LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index);
1463
1464 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1465 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1466 LLVMBuildStore(builder, temp_res, result[c]);
1467 lp_build_else(&ifthen);
1468 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1469 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1470 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1471 LLVMBuildStore(builder, temp_res, result[c]);
1472 lp_build_endif(&ifthen);
1473 }
1474 }
1475 for (unsigned c = 0; c < nc; c++)
1476 outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1477
1478 }
1479
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1480 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1481 unsigned writemask,
1482 unsigned nc,
1483 unsigned bit_size,
1484 bool index_and_offset_are_uniform,
1485 bool payload,
1486 LLVMValueRef index,
1487 LLVMValueRef offset,
1488 LLVMValueRef dst)
1489 {
1490 struct gallivm_state *gallivm = bld_base->base.gallivm;
1491 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1492 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1493 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1494 struct lp_build_context *store_bld;
1495 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1496 store_bld = get_int_bld(bld_base, true, bit_size);
1497
1498 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1499
1500 /* If the address is uniform, then just store the value from the first
1501 * channel instead of making LLVM unroll the invocation loop. Note that we
1502 * don't use first_active_uniform(), since we aren't guaranteed that there is
1503 * actually an active invocation.
1504 */
1505 if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1506 LLVMValueRef ssbo_limit;
1507 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1508 lp_build_const_int32(gallivm, 0), &ssbo_limit);
1509
1510 offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1511
1512 for (unsigned c = 0; c < nc; c++) {
1513 if (!(writemask & (1u << c)))
1514 continue;
1515
1516 /* Pick out invocation 0's value. */
1517 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1518 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1519 lp_build_const_int32(gallivm, 0), "");
1520 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1521
1522 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1523
1524 /* If storing outside the SSBO, we need to skip the store instead. */
1525 if (ssbo_limit) {
1526 struct lp_build_if_state ifthen;
1527 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1528 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1529 lp_build_endif(&ifthen);
1530 } else {
1531 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1532 }
1533 }
1534 return;
1535 }
1536
1537 LLVMValueRef exec_mask = mask_vec(bld_base);
1538 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1539 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1540 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1541 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1542
1543 struct lp_build_if_state exec_ifthen;
1544 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1545
1546 LLVMValueRef ssbo_limit;
1547 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1548 counter, &ssbo_limit);
1549
1550 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1551
1552 for (unsigned c = 0; c < nc; c++) {
1553 if (!(writemask & (1u << c)))
1554 continue;
1555 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1556 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1557 LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1558
1559 if (ssbo_limit) {
1560 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1561 do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1562 }
1563
1564 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1565 counter, "");
1566 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1567 struct lp_build_if_state ifthen;
1568 LLVMValueRef store_cond;
1569
1570 store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1571 lp_build_if(&ifthen, gallivm, store_cond);
1572 lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1573 lp_build_endif(&ifthen);
1574 }
1575
1576 lp_build_endif(&exec_ifthen);
1577 }
1578 }
1579
1580
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,uint32_t bit_size,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1581 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1582 nir_atomic_op nir_op,
1583 uint32_t bit_size,
1584 bool payload,
1585 LLVMValueRef index, LLVMValueRef offset,
1586 LLVMValueRef val, LLVMValueRef val2,
1587 LLVMValueRef *result)
1588 {
1589 struct gallivm_state *gallivm = bld_base->base.gallivm;
1590 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1591 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1592 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1593 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1594 bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
1595 struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
1596
1597 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1598 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1599 atomic_bld->vec_type, "");
1600
1601 LLVMValueRef exec_mask = mask_vec(bld_base);
1602 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1603 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1604 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1605 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1606
1607 struct lp_build_if_state exec_ifthen;
1608 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1609
1610 LLVMValueRef ssbo_limit;
1611 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
1612 counter, &ssbo_limit);
1613
1614 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1615
1616 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1617 if (ssbo_limit) {
1618 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1619 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1620 }
1621
1622 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1623 counter, "");
1624 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1625
1626 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1627
1628 struct lp_build_if_state ifthen;
1629 LLVMValueRef inner_cond, temp_res;
1630 LLVMValueRef scalar;
1631
1632 inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1633 lp_build_if(&ifthen, gallivm, inner_cond);
1634
1635 if (val2 != NULL) {
1636 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1637 counter, "");
1638 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1639 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1640 cas_src_ptr,
1641 LLVMAtomicOrderingSequentiallyConsistent,
1642 LLVMAtomicOrderingSequentiallyConsistent,
1643 false);
1644 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1645 } else {
1646 scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1647 scalar_ptr, value_ptr,
1648 LLVMAtomicOrderingSequentiallyConsistent,
1649 false);
1650 }
1651 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1652 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1653 LLVMBuildStore(builder, temp_res, atom_res);
1654 lp_build_else(&ifthen);
1655 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1656 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
1657 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1658 LLVMBuildStore(builder, temp_res, atom_res);
1659 lp_build_endif(&ifthen);
1660
1661 lp_build_endif(&exec_ifthen);
1662 }
1663 *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1664 }
1665
emit_barrier(struct lp_build_nir_context * bld_base)1666 static void emit_barrier(struct lp_build_nir_context *bld_base)
1667 {
1668 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1669 struct gallivm_state * gallivm = bld_base->base.gallivm;
1670
1671 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1672
1673 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1674 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1675 }
1676
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1677 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1678 LLVMValueRef index)
1679 {
1680 struct lp_build_context *bld_broad = &bld_base->uint_bld;
1681
1682 LLVMValueRef size;
1683 ssbo_base_pointer(bld_base, 8, index, first_active_invocation(bld_base), &size);
1684
1685 return lp_build_broadcast_scalar(bld_broad, size);
1686 }
1687
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1688 static void emit_image_op(struct lp_build_nir_context *bld_base,
1689 struct lp_img_params *params)
1690 {
1691 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1692 struct gallivm_state *gallivm = bld_base->base.gallivm;
1693
1694 params->type = bld_base->base.type;
1695 params->resources_type = bld->resources_type;
1696 params->resources_ptr = bld->resources_ptr;
1697 params->thread_data_type = bld->thread_data_type;
1698 params->thread_data_ptr = bld->thread_data_ptr;
1699 params->exec_mask = mask_vec(bld_base);
1700
1701 if (params->image_index_offset)
1702 params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1703 first_active_invocation(bld_base), "");
1704
1705 if (params->resource)
1706 params->resource = build_resource_to_scalar(bld_base, params->resource);
1707
1708 bld->image->emit_op(bld->image,
1709 bld->bld_base.base.gallivm,
1710 params);
1711
1712 }
1713
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1714 static void emit_image_size(struct lp_build_nir_context *bld_base,
1715 struct lp_sampler_size_query_params *params)
1716 {
1717 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1718 struct gallivm_state *gallivm = bld_base->base.gallivm;
1719
1720 params->int_type = bld_base->int_bld.type;
1721 params->resources_type = bld->resources_type;
1722 params->resources_ptr = bld->resources_ptr;
1723 if (params->texture_unit_offset)
1724 params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1725 first_active_invocation(bld_base), "");
1726 bld->image->emit_size_query(bld->image,
1727 bld->bld_base.base.gallivm,
1728 params);
1729
1730 }
1731
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1732 static void init_var_slots(struct lp_build_nir_context *bld_base,
1733 nir_variable *var, unsigned sc)
1734 {
1735 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1736 unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1737
1738 if (!bld->outputs)
1739 return;
1740 for (unsigned comp = sc; comp < slots + sc; comp++) {
1741 unsigned this_loc = var->data.driver_location + (comp / 4);
1742 unsigned this_chan = comp % 4;
1743
1744 if (!bld->outputs[this_loc][this_chan])
1745 bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1746 bld_base->base.vec_type, "output");
1747 }
1748 }
1749
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1750 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1751 nir_variable *var)
1752 {
1753 unsigned sc = var->data.location_frac;
1754 switch (var->data.mode) {
1755 case nir_var_shader_out: {
1756 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1757 if (var->data.location == FRAG_RESULT_STENCIL)
1758 sc = 1;
1759 else if (var->data.location == FRAG_RESULT_DEPTH)
1760 sc = 2;
1761 }
1762 init_var_slots(bld_base, var, sc);
1763 break;
1764 }
1765 default:
1766 break;
1767 }
1768 }
1769
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1770 static void emit_tex(struct lp_build_nir_context *bld_base,
1771 struct lp_sampler_params *params)
1772 {
1773 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1774 struct gallivm_state *gallivm = bld_base->base.gallivm;
1775
1776 params->type = bld_base->base.type;
1777 params->resources_type = bld->resources_type;
1778 params->resources_ptr = bld->resources_ptr;
1779 params->thread_data_type = bld->thread_data_type;
1780 params->thread_data_ptr = bld->thread_data_ptr;
1781 params->exec_mask = mask_vec(bld_base);
1782
1783 if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1784 /* this is horrible but this can be dynamic */
1785 LLVMValueRef coords[5];
1786 LLVMValueRef *orig_texel_ptr;
1787 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1788 LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1789 LLVMGetUndef(bld_base->base.vec_type),
1790 LLVMGetUndef(bld_base->base.vec_type),
1791 LLVMGetUndef(bld_base->base.vec_type) };
1792 LLVMValueRef texel[4], orig_offset, orig_lod;
1793 unsigned i;
1794 orig_texel_ptr = params->texel;
1795 orig_lod = params->lod;
1796 for (i = 0; i < 5; i++) {
1797 coords[i] = params->coords[i];
1798 }
1799 orig_offset = params->texture_index_offset;
1800
1801 for (unsigned v = 0; v < uint_bld->type.length; v++) {
1802 LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1803 LLVMValueRef new_coords[5];
1804 for (i = 0; i < 5; i++) {
1805 new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1806 coords[i], idx, "");
1807 }
1808 params->coords = new_coords;
1809 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1810 orig_offset,
1811 idx, "");
1812 params->type = lp_elem_type(bld_base->base.type);
1813
1814 if (orig_lod)
1815 params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1816 params->texel = texel;
1817 bld->sampler->emit_tex_sample(bld->sampler,
1818 gallivm,
1819 params);
1820
1821 for (i = 0; i < 4; i++) {
1822 result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1823 }
1824 }
1825 for (i = 0; i < 4; i++) {
1826 orig_texel_ptr[i] = result[i];
1827 }
1828 return;
1829 }
1830
1831 if (params->texture_index_offset) {
1832 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1833 first_active_invocation(bld_base), "");
1834 }
1835
1836 if (params->texture_resource)
1837 params->texture_resource = build_resource_to_scalar(bld_base, params->texture_resource);
1838
1839 if (params->sampler_resource)
1840 params->sampler_resource = build_resource_to_scalar(bld_base, params->sampler_resource);
1841
1842 params->type = bld_base->base.type;
1843 bld->sampler->emit_tex_sample(bld->sampler,
1844 bld->bld_base.base.gallivm,
1845 params);
1846 }
1847
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1848 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1849 struct lp_sampler_size_query_params *params)
1850 {
1851 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1852
1853 params->int_type = bld_base->int_bld.type;
1854 params->resources_type = bld->resources_type;
1855 params->resources_ptr = bld->resources_ptr;
1856 if (params->texture_unit_offset)
1857 params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1858 params->texture_unit_offset,
1859 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1860
1861 params->exec_mask = mask_vec(bld_base);
1862 if (params->resource)
1863 params->resource = build_resource_to_scalar(bld_base, params->resource);
1864
1865 bld->sampler->emit_size_query(bld->sampler,
1866 bld->bld_base.base.gallivm,
1867 params);
1868 }
1869
get_local_invocation_index(struct lp_build_nir_soa_context * bld)1870 static LLVMValueRef get_local_invocation_index(struct lp_build_nir_soa_context *bld)
1871 {
1872 struct lp_build_nir_context *bld_base = &bld->bld_base;
1873 LLVMValueRef tmp, tmp2;
1874
1875 tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[1]);
1876 tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[0]);
1877 tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1878 tmp = lp_build_mul(&bld_base->uint_bld, tmp, bld->system_values.thread_id[2]);
1879
1880 tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, bld->system_values.thread_id[1]);
1881 tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1882 tmp = lp_build_add(&bld_base->uint_bld, tmp, bld->system_values.thread_id[0]);
1883 return tmp;
1884 }
1885
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1886 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1887 nir_intrinsic_instr *instr,
1888 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1889 {
1890 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1891 struct gallivm_state *gallivm = bld_base->base.gallivm;
1892 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->def.bit_size);
1893 switch (instr->intrinsic) {
1894 case nir_intrinsic_load_instance_id:
1895 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1896 break;
1897 case nir_intrinsic_load_base_instance:
1898 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1899 break;
1900 case nir_intrinsic_load_base_vertex:
1901 result[0] = bld->system_values.basevertex;
1902 break;
1903 case nir_intrinsic_load_first_vertex:
1904 result[0] = bld->system_values.firstvertex;
1905 break;
1906 case nir_intrinsic_load_vertex_id:
1907 result[0] = bld->system_values.vertex_id;
1908 break;
1909 case nir_intrinsic_load_primitive_id:
1910 result[0] = bld->system_values.prim_id;
1911 break;
1912 case nir_intrinsic_load_workgroup_id: {
1913 LLVMValueRef tmp[3];
1914 for (unsigned i = 0; i < 3; i++) {
1915 tmp[i] = bld->system_values.block_id[i];
1916 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1917 }
1918 break;
1919 }
1920 case nir_intrinsic_load_local_invocation_id:
1921 for (unsigned i = 0; i < 3; i++)
1922 result[i] = bld->system_values.thread_id[i];
1923 break;
1924 case nir_intrinsic_load_local_invocation_index:
1925 result[0] = get_local_invocation_index(bld);
1926 break;
1927 case nir_intrinsic_load_num_workgroups: {
1928 LLVMValueRef tmp[3];
1929 for (unsigned i = 0; i < 3; i++) {
1930 tmp[i] = bld->system_values.grid_size[i];
1931 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1932 }
1933 break;
1934 }
1935 case nir_intrinsic_load_invocation_id:
1936 if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1937 result[0] = bld->system_values.invocation_id;
1938 else
1939 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1940 break;
1941 case nir_intrinsic_load_front_face:
1942 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1943 break;
1944 case nir_intrinsic_load_draw_id:
1945 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1946 break;
1947 default:
1948 break;
1949 case nir_intrinsic_load_workgroup_size:
1950 for (unsigned i = 0; i < 3; i++)
1951 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[i]);
1952 break;
1953 case nir_intrinsic_load_work_dim:
1954 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1955 break;
1956 case nir_intrinsic_load_tess_coord:
1957 for (unsigned i = 0; i < 3; i++) {
1958 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1959 }
1960 break;
1961 case nir_intrinsic_load_tess_level_outer:
1962 for (unsigned i = 0; i < 4; i++)
1963 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1964 break;
1965 case nir_intrinsic_load_tess_level_inner:
1966 for (unsigned i = 0; i < 2; i++)
1967 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1968 break;
1969 case nir_intrinsic_load_patch_vertices_in:
1970 result[0] = bld->system_values.vertices_in;
1971 break;
1972 case nir_intrinsic_load_sample_id:
1973 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1974 break;
1975 case nir_intrinsic_load_sample_pos:
1976 for (unsigned i = 0; i < 2; i++) {
1977 LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1978 idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1979 LLVMValueRef val = lp_build_array_get2(gallivm, bld->system_values.sample_pos_type,
1980 bld->system_values.sample_pos, idx);
1981 result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1982 }
1983 break;
1984 case nir_intrinsic_load_sample_mask_in:
1985 result[0] = bld->system_values.sample_mask_in;
1986 break;
1987 case nir_intrinsic_load_view_index:
1988 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1989 break;
1990 case nir_intrinsic_load_subgroup_invocation: {
1991 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1992 for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1993 elems[i] = lp_build_const_int32(gallivm, i);
1994 result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1995 break;
1996 }
1997 case nir_intrinsic_load_subgroup_id:
1998 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1999 break;
2000 case nir_intrinsic_load_num_subgroups:
2001 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
2002 break;
2003 }
2004 }
2005
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)2006 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
2007 LLVMValueRef *dst)
2008 {
2009 struct gallivm_state *gallivm = bld_base->base.gallivm;
2010 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2011 *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
2012 }
2013
bgnloop(struct lp_build_nir_context * bld_base)2014 static void bgnloop(struct lp_build_nir_context *bld_base)
2015 {
2016 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2017 lp_exec_bgnloop(&bld->exec_mask, true);
2018 }
2019
endloop(struct lp_build_nir_context * bld_base)2020 static void endloop(struct lp_build_nir_context *bld_base)
2021 {
2022 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2023 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
2024 }
2025
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2026 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2027 {
2028 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2029 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2030 lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
2031 }
2032
else_stmt(struct lp_build_nir_context * bld_base)2033 static void else_stmt(struct lp_build_nir_context *bld_base)
2034 {
2035 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2036 lp_exec_mask_cond_invert(&bld->exec_mask);
2037 }
2038
endif_stmt(struct lp_build_nir_context * bld_base)2039 static void endif_stmt(struct lp_build_nir_context *bld_base)
2040 {
2041 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2042 lp_exec_mask_cond_pop(&bld->exec_mask);
2043 }
2044
break_stmt(struct lp_build_nir_context * bld_base)2045 static void break_stmt(struct lp_build_nir_context *bld_base)
2046 {
2047 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2048
2049 lp_exec_break(&bld->exec_mask, NULL, false);
2050 }
2051
continue_stmt(struct lp_build_nir_context * bld_base)2052 static void continue_stmt(struct lp_build_nir_context *bld_base)
2053 {
2054 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2055 lp_exec_continue(&bld->exec_mask);
2056 }
2057
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2058 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2059 {
2060 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2061 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2062 LLVMValueRef mask;
2063
2064 if (!cond) {
2065 if (bld->exec_mask.has_mask) {
2066 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2067 } else {
2068 mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
2069 }
2070 } else {
2071 mask = LLVMBuildNot(builder, cond, "");
2072 if (bld->exec_mask.has_mask) {
2073 LLVMValueRef invmask;
2074 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2075 mask = LLVMBuildOr(builder, mask, invmask, "");
2076 }
2077 }
2078 lp_build_mask_update(bld->mask, mask);
2079 }
2080
2081 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2082 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
2083 LLVMValueRef ptr,
2084 LLVMValueRef mask)
2085 {
2086 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2087 LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
2088
2089 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2090
2091 LLVMBuildStore(builder, current_vec, ptr);
2092 }
2093
2094 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2095 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
2096 LLVMValueRef ptr,
2097 LLVMValueRef mask)
2098 {
2099 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2100 LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
2101
2102 current_vec = lp_build_select(&bld_base->uint_bld,
2103 mask,
2104 bld_base->uint_bld.zero,
2105 current_vec);
2106
2107 LLVMBuildStore(builder, current_vec, ptr);
2108 }
2109
2110 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)2111 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
2112 LLVMValueRef current_mask_vec,
2113 LLVMValueRef total_emitted_vertices_vec)
2114 {
2115 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2116 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2117 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2118 total_emitted_vertices_vec,
2119 bld->max_output_vertices_vec);
2120
2121 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2122 }
2123
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)2124 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2125 {
2126 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2127 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2128
2129 if (stream_id >= bld->gs_vertex_streams)
2130 return;
2131 assert(bld->gs_iface->emit_vertex);
2132 LLVMValueRef total_emitted_vertices_vec =
2133 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2134 LLVMValueRef mask = mask_vec(bld_base);
2135 mask = clamp_mask_to_max_output_vertices(bld, mask,
2136 total_emitted_vertices_vec);
2137 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
2138 bld->outputs,
2139 total_emitted_vertices_vec,
2140 mask,
2141 lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
2142
2143 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2144 mask);
2145 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2146 mask);
2147 }
2148
2149 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2150 end_primitive_masked(struct lp_build_nir_context * bld_base,
2151 LLVMValueRef mask, uint32_t stream_id)
2152 {
2153 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2154 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2155
2156 if (stream_id >= bld->gs_vertex_streams)
2157 return;
2158 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2159 LLVMValueRef emitted_vertices_vec =
2160 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2161 LLVMValueRef emitted_prims_vec =
2162 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2163 LLVMValueRef total_emitted_vertices_vec =
2164 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2165
2166 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2167 PIPE_FUNC_NOTEQUAL,
2168 emitted_vertices_vec,
2169 uint_bld->zero);
2170 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2171 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2172 total_emitted_vertices_vec,
2173 emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2174 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2175 mask);
2176 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2177 mask);
2178 }
2179
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2180 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2181 {
2182 ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2183
2184 assert(bld->gs_iface->end_primitive);
2185
2186 LLVMValueRef mask = mask_vec(bld_base);
2187 end_primitive_masked(bld_base, mask, stream_id);
2188 }
2189
2190 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2191 emit_prologue(struct lp_build_nir_soa_context *bld)
2192 {
2193 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2194 if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2195 uint32_t num_inputs = bld->num_inputs;
2196 /* If this is an indirect case, the number of inputs should not be 0 */
2197 assert(num_inputs > 0);
2198
2199 unsigned index, chan;
2200 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2201 LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2202 bld->inputs_array = lp_build_array_alloca(gallivm,
2203 vec_type, array_size,
2204 "input_array");
2205
2206 for (index = 0; index < num_inputs; ++index) {
2207 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2208 LLVMValueRef lindex =
2209 lp_build_const_int32(gallivm, index * 4 + chan);
2210 LLVMValueRef input_ptr =
2211 LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2212 LLVMValueRef value = bld->inputs[index][chan];
2213 if (value)
2214 LLVMBuildStore(gallivm->builder, value, input_ptr);
2215 }
2216 }
2217 }
2218 }
2219
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2220 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2221 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2222 {
2223 struct gallivm_state * gallivm = bld_base->base.gallivm;
2224 LLVMBuilderRef builder = gallivm->builder;
2225 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2226 LLVMValueRef exec_mask = mask_vec(bld_base);
2227 struct lp_build_loop_state loop_state;
2228 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2229
2230 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2231 LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2232 LLVMValueRef init_val = NULL;
2233 if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2234 instr->intrinsic == nir_intrinsic_vote_feq) {
2235 /* for equal we unfortunately have to loop and find the first valid one. */
2236 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2237 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2238
2239 struct lp_build_if_state ifthen;
2240 lp_build_if(&ifthen, gallivm, if_cond);
2241 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2242 loop_state.counter, "");
2243 LLVMBuildStore(builder, value_ptr, eq_store);
2244 LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2245 lp_build_endif(&ifthen);
2246 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2247 NULL, LLVMIntUGE);
2248 init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2249 } else {
2250 LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2251 }
2252
2253 LLVMValueRef res;
2254 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2255 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2256 loop_state.counter, "");
2257 struct lp_build_if_state ifthen;
2258 LLVMValueRef if_cond;
2259 if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2260
2261 lp_build_if(&ifthen, gallivm, if_cond);
2262 res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2263
2264 if (instr->intrinsic == nir_intrinsic_vote_feq) {
2265 struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2266 LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2267 LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2268 LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2269 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2270 res = LLVMBuildAnd(builder, res, tmp, "");
2271 } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2272 LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2273 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2274 res = LLVMBuildAnd(builder, res, tmp, "");
2275 } else if (instr->intrinsic == nir_intrinsic_vote_any)
2276 res = LLVMBuildOr(builder, res, value_ptr, "");
2277 else
2278 res = LLVMBuildAnd(builder, res, value_ptr, "");
2279 LLVMBuildStore(builder, res, res_store);
2280 lp_build_endif(&ifthen);
2281 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2282 NULL, LLVMIntUGE);
2283 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2284 LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2285 }
2286
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2287 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2288 {
2289 struct gallivm_state * gallivm = bld_base->base.gallivm;
2290 LLVMBuilderRef builder = gallivm->builder;
2291 LLVMValueRef exec_mask = mask_vec(bld_base);
2292 struct lp_build_loop_state loop_state;
2293 src = LLVMBuildAnd(builder, src, exec_mask, "");
2294 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2295 LLVMValueRef res;
2296 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2297 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2298 loop_state.counter, "");
2299 res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2300 res = LLVMBuildOr(builder,
2301 res,
2302 LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2303 LLVMBuildStore(builder, res, res_store);
2304
2305 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2306 NULL, LLVMIntUGE);
2307 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2308 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2309 }
2310
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2311 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2312 {
2313 struct gallivm_state *gallivm = bld_base->base.gallivm;
2314 LLVMBuilderRef builder = gallivm->builder;
2315 LLVMValueRef exec_mask = mask_vec(bld_base);
2316 struct lp_build_loop_state loop_state;
2317
2318 LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2319 LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2320 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2321 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2322 loop_state.counter, "");
2323 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2324 LLVMIntEQ,
2325 value_ptr,
2326 lp_build_const_int32(gallivm, -1), "");
2327 LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2328 LLVMIntEQ,
2329 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2330 lp_build_const_int32(gallivm, 0), "");
2331
2332 cond = LLVMBuildAnd(builder, cond, cond2, "");
2333 struct lp_build_if_state ifthen;
2334 lp_build_if(&ifthen, gallivm, cond);
2335 LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2336 LLVMBuildStore(builder, loop_state.counter, idx_store);
2337 lp_build_endif(&ifthen);
2338 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2339 NULL, LLVMIntUGE);
2340
2341 result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2342 lp_build_const_int32(gallivm, -1),
2343 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2344 "");
2345 }
2346
2347 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2348 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2349 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2350 {
2351 assert(instr->intrinsic == nir_intrinsic_shuffle);
2352
2353 struct gallivm_state *gallivm = bld_base->base.gallivm;
2354 LLVMBuilderRef builder = gallivm->builder;
2355 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2356 uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2357 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2358
2359 if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2360 /* freeze `src` in case inactive invocations contain poison */
2361 src = LLVMBuildFreeze(builder, src, "");
2362 result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2363 } else {
2364 LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2365 struct lp_build_loop_state loop_state;
2366 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2367
2368 LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2369
2370 LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2371 /* freeze `src_value` in case an out-of-bounds index or an index into an
2372 * inactive invocation results in poison
2373 */
2374 src_value = LLVMBuildFreeze(builder, src_value, "");
2375
2376 LLVMValueRef res = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2377 res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2378 LLVMBuildStore(builder, res, res_store);
2379
2380 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2381 NULL, LLVMIntUGE);
2382
2383 result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2384 }
2385 }
2386 #endif
2387
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2388 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2389 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2390 {
2391 struct gallivm_state *gallivm = bld_base->base.gallivm;
2392 LLVMBuilderRef builder = gallivm->builder;
2393 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2394 /* can't use llvm reduction intrinsics because of exec_mask */
2395 LLVMValueRef exec_mask = mask_vec(bld_base);
2396 struct lp_build_loop_state loop_state;
2397 nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2398
2399 LLVMValueRef res_store = NULL;
2400 LLVMValueRef scan_store;
2401 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2402
2403 if (instr->intrinsic != nir_intrinsic_reduce)
2404 res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2405
2406 scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2407
2408 struct lp_build_context elem_bld;
2409 bool is_flt = reduction_op == nir_op_fadd ||
2410 reduction_op == nir_op_fmul ||
2411 reduction_op == nir_op_fmin ||
2412 reduction_op == nir_op_fmax;
2413 bool is_unsigned = reduction_op == nir_op_umin ||
2414 reduction_op == nir_op_umax;
2415
2416 struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2417 get_int_bld(bld_base, is_unsigned, bit_size);
2418
2419 lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2420
2421 LLVMValueRef store_val = NULL;
2422 /*
2423 * Put the identity value for the operation into the storage
2424 */
2425 switch (reduction_op) {
2426 case nir_op_fmin: {
2427 LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2428 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2429 store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2430 break;
2431 }
2432 case nir_op_fmax: {
2433 LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2434 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2435 store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2436 break;
2437 }
2438 case nir_op_fmul: {
2439 LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2440 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2441 store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2442 break;
2443 }
2444 case nir_op_umin:
2445 switch (bit_size) {
2446 case 8:
2447 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2448 break;
2449 case 16:
2450 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2451 break;
2452 case 32:
2453 default:
2454 store_val = lp_build_const_int32(gallivm, UINT_MAX);
2455 break;
2456 case 64:
2457 store_val = lp_build_const_int64(gallivm, UINT64_MAX);
2458 break;
2459 }
2460 break;
2461 case nir_op_imin:
2462 switch (bit_size) {
2463 case 8:
2464 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2465 break;
2466 case 16:
2467 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2468 break;
2469 case 32:
2470 default:
2471 store_val = lp_build_const_int32(gallivm, INT_MAX);
2472 break;
2473 case 64:
2474 store_val = lp_build_const_int64(gallivm, INT64_MAX);
2475 break;
2476 }
2477 break;
2478 case nir_op_imax:
2479 switch (bit_size) {
2480 case 8:
2481 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2482 break;
2483 case 16:
2484 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2485 break;
2486 case 32:
2487 default:
2488 store_val = lp_build_const_int32(gallivm, INT_MIN);
2489 break;
2490 case 64:
2491 store_val = lp_build_const_int64(gallivm, INT64_MIN);
2492 break;
2493 }
2494 break;
2495 case nir_op_imul:
2496 switch (bit_size) {
2497 case 8:
2498 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2499 break;
2500 case 16:
2501 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2502 break;
2503 case 32:
2504 default:
2505 store_val = lp_build_const_int32(gallivm, 1);
2506 break;
2507 case 64:
2508 store_val = lp_build_const_int64(gallivm, 1);
2509 break;
2510 }
2511 break;
2512 case nir_op_iand:
2513 switch (bit_size) {
2514 case 8:
2515 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2516 break;
2517 case 16:
2518 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2519 break;
2520 case 32:
2521 default:
2522 store_val = lp_build_const_int32(gallivm, 0xffffffff);
2523 break;
2524 case 64:
2525 store_val = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2526 break;
2527 }
2528 break;
2529 default:
2530 break;
2531 }
2532 if (store_val)
2533 LLVMBuildStore(builder, store_val, scan_store);
2534
2535 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2536
2537 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2538
2539 struct lp_build_if_state ifthen;
2540 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2541 lp_build_if(&ifthen, gallivm, if_cond);
2542 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2543
2544 LLVMValueRef res = NULL;
2545 LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2546 if (instr->intrinsic != nir_intrinsic_reduce)
2547 res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2548
2549 if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2550 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2551
2552 if (is_flt) {
2553 scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2554 value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2555 }
2556 switch (reduction_op) {
2557 case nir_op_fadd:
2558 case nir_op_iadd:
2559 scan_val = lp_build_add(&elem_bld, value, scan_val);
2560 break;
2561 case nir_op_fmul:
2562 case nir_op_imul:
2563 scan_val = lp_build_mul(&elem_bld, value, scan_val);
2564 break;
2565 case nir_op_imin:
2566 case nir_op_umin:
2567 case nir_op_fmin:
2568 scan_val = lp_build_min(&elem_bld, value, scan_val);
2569 break;
2570 case nir_op_imax:
2571 case nir_op_umax:
2572 case nir_op_fmax:
2573 scan_val = lp_build_max(&elem_bld, value, scan_val);
2574 break;
2575 case nir_op_iand:
2576 scan_val = lp_build_and(&elem_bld, value, scan_val);
2577 break;
2578 case nir_op_ior:
2579 scan_val = lp_build_or(&elem_bld, value, scan_val);
2580 break;
2581 case nir_op_ixor:
2582 scan_val = lp_build_xor(&elem_bld, value, scan_val);
2583 break;
2584 default:
2585 assert(0);
2586 break;
2587 }
2588 if (is_flt)
2589 scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2590 LLVMBuildStore(builder, scan_val, scan_store);
2591
2592 if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2593 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2594 }
2595
2596 if (instr->intrinsic != nir_intrinsic_reduce)
2597 LLVMBuildStore(builder, res, res_store);
2598 lp_build_endif(&ifthen);
2599
2600 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2601 NULL, LLVMIntUGE);
2602 if (instr->intrinsic == nir_intrinsic_reduce)
2603 result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad2(builder, int_bld->elem_type, scan_store, ""));
2604 else
2605 result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2606 }
2607
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2608 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2609 LLVMValueRef src,
2610 unsigned bit_size,
2611 LLVMValueRef invoc,
2612 LLVMValueRef result[4])
2613 {
2614 struct gallivm_state *gallivm = bld_base->base.gallivm;
2615 LLVMValueRef idx = first_active_invocation(bld_base);
2616 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2617
2618 /* If we're emitting readInvocation() (as opposed to readFirstInvocation),
2619 * use the first active channel to pull the invocation index number out of
2620 * the invocation arg.
2621 */
2622 if (invoc)
2623 idx = LLVMBuildExtractElement(gallivm->builder, invoc, idx, "");
2624
2625 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2626 src, idx, "");
2627 result[0] = lp_build_broadcast_scalar(uint_bld, value);
2628 }
2629
2630 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2631 emit_interp_at(struct lp_build_nir_context *bld_base,
2632 unsigned num_components,
2633 nir_variable *var,
2634 bool centroid,
2635 bool sample,
2636 unsigned const_index,
2637 LLVMValueRef indir_index,
2638 LLVMValueRef offsets[2],
2639 LLVMValueRef dst[4])
2640 {
2641 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2642
2643 for (unsigned i = 0; i < num_components; i++) {
2644 dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2645 const_index + var->data.driver_location, i + var->data.location_frac,
2646 centroid, sample, indir_index, offsets);
2647 }
2648 }
2649
2650 static void
emit_set_vertex_and_primitive_count(struct lp_build_nir_context * bld_base,LLVMValueRef vert_count,LLVMValueRef prim_count)2651 emit_set_vertex_and_primitive_count(struct lp_build_nir_context *bld_base,
2652 LLVMValueRef vert_count,
2653 LLVMValueRef prim_count)
2654 {
2655 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2656 struct gallivm_state *gallivm = bld_base->base.gallivm;
2657 assert(bld->mesh_iface);
2658 LLVMValueRef idx = first_active_invocation(bld_base);
2659
2660 LLVMValueRef vcount = LLVMBuildExtractElement(gallivm->builder,
2661 vert_count, idx, "");
2662 LLVMValueRef pcount = LLVMBuildExtractElement(gallivm->builder,
2663 prim_count, idx, "");
2664
2665 bld->mesh_iface->emit_vertex_and_primitive_count(bld->mesh_iface, &bld_base->base, vcount, pcount);
2666 }
2667
2668 static void
emit_launch_mesh_workgroups(struct lp_build_nir_context * bld_base,LLVMValueRef launch_grid)2669 emit_launch_mesh_workgroups(struct lp_build_nir_context *bld_base,
2670 LLVMValueRef launch_grid)
2671 {
2672 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2673 struct gallivm_state *gallivm = bld_base->base.gallivm;
2674 LLVMTypeRef vec_type = LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), 3);
2675
2676 LLVMValueRef local_invoc_idx = get_local_invocation_index(bld);
2677
2678 vec_type = LLVMPointerType(vec_type, 0);
2679
2680 local_invoc_idx = LLVMBuildExtractElement(gallivm->builder, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2681 LLVMValueRef if_cond = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2682 struct lp_build_if_state ifthen;
2683 lp_build_if(&ifthen, gallivm, if_cond);
2684 LLVMValueRef ptr = bld->payload_ptr;
2685 ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
2686 for (unsigned i = 0; i < 3; i++) {
2687 LLVMValueRef lg = LLVMBuildExtractValue(gallivm->builder, launch_grid, i, "");
2688 lg = LLVMBuildExtractElement(gallivm->builder, lg, lp_build_const_int32(gallivm, 0), "");
2689 LLVMValueRef this_ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
2690 LLVMBuildStore(gallivm->builder, lg, this_ptr);
2691 ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 4), "");
2692 }
2693 lp_build_endif(&ifthen);
2694 }
2695
2696 static void
emit_call(struct lp_build_nir_context * bld_base,struct lp_build_fn * fn,int num_args,LLVMValueRef * args)2697 emit_call(struct lp_build_nir_context *bld_base,
2698 struct lp_build_fn *fn,
2699 int num_args,
2700 LLVMValueRef *args)
2701 {
2702 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2703
2704 args[0] = mask_vec(bld_base);
2705 args[1] = bld->call_context_ptr;
2706 LLVMBuildCall2(bld_base->base.gallivm->builder, fn->fn_type, fn->fn, args, num_args, "");
2707 }
2708
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2709 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2710 struct lp_type type,
2711 unsigned scratch_size)
2712 {
2713 LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2714 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2715 unsigned i;
2716
2717 if (type.length == 1)
2718 return LLVMConstInt(elem_type, 0, 0);
2719
2720 for (i = 0; i < type.length; ++i)
2721 elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2722
2723 return LLVMConstVector(elems, type.length);
2724 }
2725
2726 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2727 emit_load_scratch(struct lp_build_nir_context *bld_base,
2728 unsigned nc, unsigned bit_size,
2729 LLVMValueRef offset,
2730 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2731 {
2732 struct gallivm_state * gallivm = bld_base->base.gallivm;
2733 LLVMBuilderRef builder = gallivm->builder;
2734 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2735 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2736 struct lp_build_context *load_bld;
2737 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2738 LLVMValueRef exec_mask = mask_vec(bld_base);
2739 LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2740 LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2741 bld->scratch_ptr);
2742 load_bld = get_int_bld(bld_base, true, bit_size);
2743
2744 offset = lp_build_add(uint_bld, offset, thread_offsets);
2745
2746 for (unsigned c = 0; c < nc; c++) {
2747 LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2748
2749 outval[c] = lp_build_masked_gather(gallivm, load_bld->type.length, bit_size,
2750 load_bld->vec_type,
2751 lp_vec_add_offset_ptr(bld_base, bit_size,
2752 scratch_ptr_vec,
2753 chan_offset),
2754 exec_mask);
2755 outval[c] = LLVMBuildBitCast(builder, outval[c], load_bld->vec_type, "");
2756 }
2757 }
2758
2759 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2760 emit_store_scratch(struct lp_build_nir_context *bld_base,
2761 unsigned writemask, unsigned nc,
2762 unsigned bit_size, LLVMValueRef offset,
2763 LLVMValueRef dst)
2764 {
2765 struct gallivm_state * gallivm = bld_base->base.gallivm;
2766 LLVMBuilderRef builder = gallivm->builder;
2767 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2768 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2769 struct lp_build_context *store_bld;
2770 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2771 LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2772 LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2773 bld->scratch_ptr);
2774 store_bld = get_int_bld(bld_base, true, bit_size);
2775
2776 LLVMValueRef exec_mask = mask_vec(bld_base);
2777 offset = lp_build_add(uint_bld, offset, thread_offsets);
2778
2779 for (unsigned c = 0; c < nc; c++) {
2780 if (!(writemask & (1u << c)))
2781 continue;
2782 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2783
2784 LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2785
2786 val = LLVMBuildBitCast(builder, val, store_bld->vec_type, "");
2787
2788 lp_build_masked_scatter(gallivm, store_bld->type.length, bit_size,
2789 lp_vec_add_offset_ptr(bld_base, bit_size,
2790 scratch_ptr_vec, chan_offset),
2791 val, exec_mask);
2792 }
2793 }
2794
2795 static void
emit_clock(struct lp_build_nir_context * bld_base,LLVMValueRef dst[4])2796 emit_clock(struct lp_build_nir_context *bld_base,
2797 LLVMValueRef dst[4])
2798 {
2799 struct gallivm_state *gallivm = bld_base->base.gallivm;
2800 LLVMBuilderRef builder = gallivm->builder;
2801 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, 32);
2802
2803 lp_init_clock_hook(gallivm);
2804
2805 LLVMTypeRef get_time_type = LLVMFunctionType(LLVMInt64TypeInContext(gallivm->context), NULL, 0, 1);
2806 LLVMValueRef result = LLVMBuildCall2(builder, get_time_type, gallivm->get_time_hook, NULL, 0, "");
2807
2808 LLVMValueRef hi = LLVMBuildShl(builder, result, lp_build_const_int64(gallivm, 32), "");
2809 hi = LLVMBuildTrunc(builder, hi, uint_bld->elem_type, "");
2810 LLVMValueRef lo = LLVMBuildTrunc(builder, result, uint_bld->elem_type, "");
2811 dst[0] = lp_build_broadcast_scalar(uint_bld, lo);
2812 dst[1] = lp_build_broadcast_scalar(uint_bld, hi);
2813 }
2814
2815 LLVMTypeRef
lp_build_cs_func_call_context(struct gallivm_state * gallivm,int length,LLVMTypeRef context_type,LLVMTypeRef resources_type)2816 lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length,
2817 LLVMTypeRef context_type, LLVMTypeRef resources_type)
2818 {
2819 LLVMTypeRef args[LP_NIR_CALL_CONTEXT_MAX_ARGS];
2820
2821 args[LP_NIR_CALL_CONTEXT_CONTEXT] = LLVMPointerType(context_type, 0);
2822 args[LP_NIR_CALL_CONTEXT_RESOURCES] = LLVMPointerType(resources_type, 0);
2823 args[LP_NIR_CALL_CONTEXT_SHARED] = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); /* shared_ptr */
2824 args[LP_NIR_CALL_CONTEXT_SCRATCH] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* scratch ptr */
2825 args[LP_NIR_CALL_CONTEXT_WORK_DIM] = LLVMInt32TypeInContext(gallivm->context); /* work_dim */
2826 args[LP_NIR_CALL_CONTEXT_THREAD_ID_0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[0] */
2827 args[LP_NIR_CALL_CONTEXT_THREAD_ID_1] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[1] */
2828 args[LP_NIR_CALL_CONTEXT_THREAD_ID_2] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[2] */
2829 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[0] */
2830 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[1] */
2831 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[2] */
2832
2833 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[0] */
2834 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[1] */
2835 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[2] */
2836 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[0] */
2837 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[1] */
2838 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[2] */
2839
2840 LLVMTypeRef stype = LLVMStructTypeInContext(gallivm->context, args, LP_NIR_CALL_CONTEXT_MAX_ARGS, 0);
2841 return stype;
2842 }
2843
2844 static void
build_call_context(struct lp_build_nir_soa_context * bld)2845 build_call_context(struct lp_build_nir_soa_context *bld)
2846 {
2847 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2848 bld->call_context_ptr = lp_build_alloca(gallivm, bld->call_context_type, "callcontext");
2849 LLVMValueRef call_context = LLVMGetUndef(bld->call_context_type);
2850 call_context = LLVMBuildInsertValue(gallivm->builder,
2851 call_context, bld->context_ptr, LP_NIR_CALL_CONTEXT_CONTEXT, "");
2852 call_context = LLVMBuildInsertValue(gallivm->builder,
2853 call_context, bld->resources_ptr, LP_NIR_CALL_CONTEXT_RESOURCES, "");
2854 if (bld->shared_ptr) {
2855 call_context = LLVMBuildInsertValue(gallivm->builder,
2856 call_context, bld->shared_ptr, LP_NIR_CALL_CONTEXT_SHARED, "");
2857 } else {
2858 call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2859 LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2860 LP_NIR_CALL_CONTEXT_SHARED, "");
2861 }
2862 if (bld->scratch_ptr) {
2863 call_context = LLVMBuildInsertValue(gallivm->builder,
2864 call_context, bld->scratch_ptr, LP_NIR_CALL_CONTEXT_SCRATCH, "");
2865 } else {
2866 call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2867 LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2868 LP_NIR_CALL_CONTEXT_SCRATCH, "");
2869 }
2870 call_context = LLVMBuildInsertValue(gallivm->builder,
2871 call_context, bld->system_values.work_dim, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
2872 call_context = LLVMBuildInsertValue(gallivm->builder,
2873 call_context, bld->system_values.thread_id[0], LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
2874 call_context = LLVMBuildInsertValue(gallivm->builder,
2875 call_context, bld->system_values.thread_id[1], LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
2876 call_context = LLVMBuildInsertValue(gallivm->builder,
2877 call_context, bld->system_values.thread_id[2], LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
2878 call_context = LLVMBuildInsertValue(gallivm->builder,
2879 call_context, bld->system_values.block_id[0], LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
2880 call_context = LLVMBuildInsertValue(gallivm->builder,
2881 call_context, bld->system_values.block_id[1], LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
2882 call_context = LLVMBuildInsertValue(gallivm->builder,
2883 call_context, bld->system_values.block_id[2], LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
2884 call_context = LLVMBuildInsertValue(gallivm->builder,
2885 call_context, bld->system_values.grid_size[0], LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
2886 call_context = LLVMBuildInsertValue(gallivm->builder,
2887 call_context, bld->system_values.grid_size[1], LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
2888 call_context = LLVMBuildInsertValue(gallivm->builder,
2889 call_context, bld->system_values.grid_size[2], LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
2890 call_context = LLVMBuildInsertValue(gallivm->builder,
2891 call_context, bld->system_values.block_size[0], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
2892 call_context = LLVMBuildInsertValue(gallivm->builder,
2893 call_context, bld->system_values.block_size[1], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
2894 call_context = LLVMBuildInsertValue(gallivm->builder,
2895 call_context, bld->system_values.block_size[2], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
2896 LLVMBuildStore(gallivm->builder, call_context, bld->call_context_ptr);
2897 }
2898
lp_build_nir_soa_func(struct gallivm_state * gallivm,struct nir_shader * shader,nir_function_impl * impl,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2899 void lp_build_nir_soa_func(struct gallivm_state *gallivm,
2900 struct nir_shader *shader,
2901 nir_function_impl *impl,
2902 const struct lp_build_tgsi_params *params,
2903 LLVMValueRef (*outputs)[4])
2904 {
2905 struct lp_build_nir_soa_context bld;
2906 const struct lp_type type = params->type;
2907 struct lp_type res_type;
2908
2909 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2910 memset(&res_type, 0, sizeof res_type);
2911 res_type.width = type.width;
2912 res_type.length = type.length;
2913 res_type.sign = 1;
2914
2915 /* Setup build context */
2916 memset(&bld, 0, sizeof bld);
2917 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2918 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2919 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2920 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2921 lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2922 {
2923 struct lp_type dbl_type;
2924 dbl_type = type;
2925 dbl_type.width *= 2;
2926 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2927 }
2928 {
2929 struct lp_type half_type;
2930 half_type = type;
2931 half_type.width /= 2;
2932 lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2933 }
2934 {
2935 struct lp_type uint64_type;
2936 uint64_type = lp_uint_type(type);
2937 uint64_type.width *= 2;
2938 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2939 }
2940 {
2941 struct lp_type int64_type;
2942 int64_type = lp_int_type(type);
2943 int64_type.width *= 2;
2944 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2945 }
2946 {
2947 struct lp_type uint16_type;
2948 uint16_type = lp_uint_type(type);
2949 uint16_type.width /= 2;
2950 lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2951 }
2952 {
2953 struct lp_type int16_type;
2954 int16_type = lp_int_type(type);
2955 int16_type.width /= 2;
2956 lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2957 }
2958 {
2959 struct lp_type uint8_type;
2960 uint8_type = lp_uint_type(type);
2961 uint8_type.width /= 4;
2962 lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2963 }
2964 {
2965 struct lp_type int8_type;
2966 int8_type = lp_int_type(type);
2967 int8_type.width /= 4;
2968 lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2969 }
2970 bld.bld_base.load_var = emit_load_var;
2971 bld.bld_base.store_var = emit_store_var;
2972 bld.bld_base.load_reg = emit_load_reg;
2973 bld.bld_base.store_reg = emit_store_reg;
2974 bld.bld_base.emit_var_decl = emit_var_decl;
2975 bld.bld_base.load_ubo = emit_load_ubo;
2976 bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2977 bld.bld_base.load_global = emit_load_global;
2978 bld.bld_base.store_global = emit_store_global;
2979 bld.bld_base.atomic_global = emit_atomic_global;
2980 bld.bld_base.tex = emit_tex;
2981 bld.bld_base.tex_size = emit_tex_size;
2982 bld.bld_base.bgnloop = bgnloop;
2983 bld.bld_base.endloop = endloop;
2984 bld.bld_base.if_cond = if_cond;
2985 bld.bld_base.else_stmt = else_stmt;
2986 bld.bld_base.endif_stmt = endif_stmt;
2987 bld.bld_base.break_stmt = break_stmt;
2988 bld.bld_base.continue_stmt = continue_stmt;
2989 bld.bld_base.sysval_intrin = emit_sysval_intrin;
2990 bld.bld_base.discard = discard;
2991 bld.bld_base.emit_vertex = emit_vertex;
2992 bld.bld_base.end_primitive = end_primitive;
2993 bld.bld_base.load_mem = emit_load_mem;
2994 bld.bld_base.store_mem = emit_store_mem;
2995 bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2996 bld.bld_base.atomic_mem = emit_atomic_mem;
2997 bld.bld_base.barrier = emit_barrier;
2998 bld.bld_base.image_op = emit_image_op;
2999 bld.bld_base.image_size = emit_image_size;
3000 bld.bld_base.vote = emit_vote;
3001 bld.bld_base.elect = emit_elect;
3002 bld.bld_base.reduce = emit_reduce;
3003 bld.bld_base.ballot = emit_ballot;
3004 #if LLVM_VERSION_MAJOR >= 10
3005 bld.bld_base.shuffle = emit_shuffle;
3006 #endif
3007 bld.bld_base.read_invocation = emit_read_invocation;
3008 bld.bld_base.helper_invocation = emit_helper_invocation;
3009 bld.bld_base.interp_at = emit_interp_at;
3010 bld.bld_base.call = emit_call;
3011 bld.bld_base.load_scratch = emit_load_scratch;
3012 bld.bld_base.store_scratch = emit_store_scratch;
3013 bld.bld_base.load_const = emit_load_const;
3014 bld.bld_base.clock = emit_clock;
3015 bld.bld_base.set_vertex_and_primitive_count = emit_set_vertex_and_primitive_count;
3016 bld.bld_base.launch_mesh_workgroups = emit_launch_mesh_workgroups;
3017
3018 bld.bld_base.fns = params->fns;
3019 bld.bld_base.func = params->current_func;
3020 bld.mask = params->mask;
3021 bld.inputs = params->inputs;
3022 bld.outputs = outputs;
3023 bld.consts_ptr = params->consts_ptr;
3024 bld.ssbo_ptr = params->ssbo_ptr;
3025 bld.sampler = params->sampler;
3026
3027 bld.context_type = params->context_type;
3028 bld.context_ptr = params->context_ptr;
3029 bld.resources_type = params->resources_type;
3030 bld.resources_ptr = params->resources_ptr;
3031 bld.thread_data_type = params->thread_data_type;
3032 bld.thread_data_ptr = params->thread_data_ptr;
3033 bld.bld_base.aniso_filter_table = params->aniso_filter_table;
3034 bld.image = params->image;
3035 bld.shared_ptr = params->shared_ptr;
3036 bld.payload_ptr = params->payload_ptr;
3037 bld.coro = params->coro;
3038 bld.kernel_args_ptr = params->kernel_args;
3039 bld.num_inputs = params->num_inputs;
3040 bld.indirects = 0;
3041 if (shader->info.inputs_read_indirectly)
3042 bld.indirects |= nir_var_shader_in;
3043
3044 bld.gs_iface = params->gs_iface;
3045 bld.tcs_iface = params->tcs_iface;
3046 bld.tes_iface = params->tes_iface;
3047 bld.fs_iface = params->fs_iface;
3048 bld.mesh_iface = params->mesh_iface;
3049 if (bld.gs_iface) {
3050 struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
3051
3052 bld.gs_vertex_streams = params->gs_vertex_streams;
3053 bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3054 shader->info.gs.vertices_out);
3055 for (int i = 0; i < params->gs_vertex_streams; i++) {
3056 bld.emitted_prims_vec_ptr[i] =
3057 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
3058 bld.emitted_vertices_vec_ptr[i] =
3059 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
3060 bld.total_emitted_vertices_vec_ptr[i] =
3061 lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
3062 }
3063 }
3064 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3065
3066 if (params->system_values)
3067 bld.system_values = *params->system_values;
3068
3069 bld.bld_base.shader = shader;
3070
3071 bld.scratch_size = ALIGN(shader->scratch_size, 8);
3072 if (params->scratch_ptr)
3073 bld.scratch_ptr = params->scratch_ptr;
3074 else if (shader->scratch_size) {
3075 bld.scratch_ptr = lp_build_array_alloca(gallivm,
3076 LLVMInt8TypeInContext(gallivm->context),
3077 lp_build_const_int32(gallivm, bld.scratch_size * type.length),
3078 "scratch");
3079 }
3080
3081 if (shader->info.stage == MESA_SHADER_KERNEL) {
3082 bld.call_context_type = lp_build_cs_func_call_context(gallivm, type.length, bld.context_type, bld.resources_type);
3083 if (!params->call_context_ptr) {
3084 build_call_context(&bld);
3085 } else
3086 bld.call_context_ptr = params->call_context_ptr;
3087 }
3088
3089 emit_prologue(&bld);
3090 lp_build_nir_llvm(&bld.bld_base, shader, impl);
3091
3092 if (bld.gs_iface) {
3093 LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
3094 LLVMValueRef total_emitted_vertices_vec;
3095 LLVMValueRef emitted_prims_vec;
3096
3097 for (int i = 0; i < params->gs_vertex_streams; i++) {
3098 end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
3099
3100 total_emitted_vertices_vec =
3101 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
3102
3103 emitted_prims_vec =
3104 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
3105 bld.gs_iface->gs_epilogue(bld.gs_iface,
3106 total_emitted_vertices_vec,
3107 emitted_prims_vec, i);
3108 }
3109 }
3110 lp_exec_mask_fini(&bld.exec_mask);
3111 }
3112
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3113 void lp_build_nir_soa(struct gallivm_state *gallivm,
3114 struct nir_shader *shader,
3115 const struct lp_build_tgsi_params *params,
3116 LLVMValueRef (*outputs)[4])
3117 {
3118 lp_build_nir_prepasses(shader);
3119 lp_build_nir_soa_func(gallivm, shader,
3120 nir_shader_get_entrypoint(shader),
3121 params, outputs);
3122 }
3123