1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_jit_types.h"
34 #include "lp_bld_arit.h"
35 #include "lp_bld_bitarit.h"
36 #include "lp_bld_coro.h"
37 #include "lp_bld_printf.h"
38 #include "lp_bld_intr.h"
39 #include "util/u_cpu_detect.h"
40 #include "util/u_math.h"
41
bit_size_to_shift_size(int bit_size)42 static int bit_size_to_shift_size(int bit_size)
43 {
44 switch (bit_size) {
45 case 64:
46 return 3;
47 default:
48 case 32:
49 return 2;
50 case 16:
51 return 1;
52 case 8:
53 return 0;
54 }
55 }
56
57 /*
58 * Combine the global mask if there is one with the current execution mask.
59 */
60 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)61 mask_vec(struct lp_build_nir_context *bld_base)
62 {
63 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
64
65 struct lp_exec_mask *exec_mask = &bld->exec_mask;
66 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
67 if (!exec_mask->has_mask)
68 return bld_mask;
69 if (!bld_mask)
70 return exec_mask->exec_mask;
71
72 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
73 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
74 exec_mask->exec_mask, "");
75 }
76
77 /*
78 * Use the execution mask if there is one, otherwise don't mask (ignore global mask).
79 * This allows helper invocations to run, which are necessary for correct derivatives.
80 */
81 static LLVMValueRef
mask_vec_with_helpers(struct lp_build_nir_context * bld_base)82 mask_vec_with_helpers(struct lp_build_nir_context *bld_base)
83 {
84 if (bld_base->shader->info.stage != MESA_SHADER_FRAGMENT)
85 return mask_vec(bld_base); /* No helper invocations needed. */
86
87 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
88
89 struct lp_exec_mask *exec_mask = &bld->exec_mask;
90 if (exec_mask->has_mask)
91 return exec_mask->exec_mask;
92
93 return lp_build_const_int_vec(bld_base->base.gallivm,
94 bld_base->uint_bld.type, -1);
95 }
96
97 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)98 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
99 {
100 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
101
102 /* Fragment shaders may dispatch with invocation 0 inactive. All other
103 * stages have invocation 0 active at the top. (See
104 * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
105 */
106 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
107 return false;
108
109 /* If we're in some control flow right now, then invocation 0 may be
110 * disabled.
111 */
112 if (bld->exec_mask.has_mask)
113 return false;
114
115 return true;
116 }
117
118 /**
119 * Returns a scalar value of the first active invocation in the exec_mask.
120 *
121 * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else
122 * branches thare are all false, or portions of a loop after a break/continue
123 * has ended the last invocation that had been active in the loop). In that
124 * case, we return a 0 value so that unconditional LLVMBuildExtractElement of
125 * the first_active_invocation (such as in memory loads, texture unit index
126 * lookups, etc) will use a valid index
127 */
first_active_invocation(struct lp_build_nir_context * bld_base)128 static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base)
129 {
130 struct gallivm_state *gallivm = bld_base->base.gallivm;
131 LLVMBuilderRef builder = gallivm->builder;
132 struct lp_build_context *uint_bld = &bld_base->uint_bld;
133
134 if (invocation_0_must_be_active(bld_base))
135 return lp_build_const_int32(gallivm, 0);
136
137 LLVMValueRef exec_mask = mask_vec(bld_base);
138
139 LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "exec_bitvec");
140 /* Turn it from N x i1 to iN, then extend it up to i32 so we can use a single
141 * cttz intrinsic -- I assume the compiler will drop the extend if there are
142 * smaller instructions available, since we have is_zero_poison.
143 */
144 bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask");
145 bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
146
147 LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
148
149 LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
150 LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false));
151
152 return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0");
153 }
154
155 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size,bool is_float)156 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float)
157 {
158 if (bit_size == 64)
159 return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
160 else if (bit_size == 16)
161 return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
162 else if (bit_size == 8)
163 return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
164 else
165 return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0);
166 }
167
168 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)169 emit_fetch_64bit(
170 struct lp_build_nir_context * bld_base,
171 LLVMValueRef input,
172 LLVMValueRef input2)
173 {
174 struct gallivm_state *gallivm = bld_base->base.gallivm;
175 LLVMBuilderRef builder = gallivm->builder;
176 LLVMValueRef res;
177 int i;
178 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
179 int len = bld_base->base.type.length * 2;
180 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
181
182 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
183 #if UTIL_ARCH_LITTLE_ENDIAN
184 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
185 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
186 #else
187 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
188 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
189 #endif
190 }
191 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
192
193 return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
194 }
195
196 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])197 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
198 LLVMValueRef value,
199 LLVMValueRef split_values[2])
200 {
201 struct gallivm_state *gallivm = bld_base->base.gallivm;
202 LLVMBuilderRef builder = gallivm->builder;
203 unsigned i;
204 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
205 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
206 int len = bld_base->base.type.length * 2;
207
208 value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
209 for (i = 0; i < bld_base->base.type.length; i++) {
210 #if UTIL_ARCH_LITTLE_ENDIAN
211 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
212 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
213 #else
214 shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
215 shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
216 #endif
217 }
218
219 split_values[0] = LLVMBuildShuffleVector(builder, value,
220 LLVMGetUndef(LLVMTypeOf(value)),
221 LLVMConstVector(shuffles,
222 bld_base->base.type.length),
223 "");
224 split_values[1] = LLVMBuildShuffleVector(builder, value,
225 LLVMGetUndef(LLVMTypeOf(value)),
226 LLVMConstVector(shuffles2,
227 bld_base->base.type.length),
228 "");
229 }
230
231 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)232 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
233 LLVMValueRef chan_ptr,
234 LLVMValueRef chan_ptr2,
235 LLVMValueRef value)
236 {
237 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
238 struct lp_build_context *float_bld = &bld_base->base;
239 LLVMValueRef split_vals[2];
240
241 emit_store_64bit_split(bld_base, value, split_vals);
242
243 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
244 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
245 }
246
247 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)248 get_soa_array_offsets(struct lp_build_context *uint_bld,
249 LLVMValueRef indirect_index,
250 int num_components,
251 unsigned chan_index,
252 bool need_perelement_offset)
253 {
254 struct gallivm_state *gallivm = uint_bld->gallivm;
255 LLVMValueRef chan_vec =
256 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
257 LLVMValueRef length_vec =
258 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
259 LLVMValueRef index_vec;
260
261 /* index_vec = (indirect_index * num_components + chan_index) * length + offsets */
262 index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
263 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
264 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
265
266 if (need_perelement_offset) {
267 LLVMValueRef pixel_offsets;
268 unsigned i;
269 /* build pixel offset vector: {0, 1, 2, 3, ...} */
270 pixel_offsets = uint_bld->undef;
271 for (i = 0; i < uint_bld->type.length; i++) {
272 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
273 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
274 ii, ii, "");
275 }
276 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
277 }
278 return index_vec;
279 }
280
281 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMTypeRef base_type,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)282 build_gather(struct lp_build_nir_context *bld_base,
283 struct lp_build_context *bld,
284 LLVMTypeRef base_type,
285 LLVMValueRef base_ptr,
286 LLVMValueRef indexes,
287 LLVMValueRef overflow_mask,
288 LLVMValueRef indexes2)
289 {
290 struct gallivm_state *gallivm = bld_base->base.gallivm;
291 LLVMBuilderRef builder = gallivm->builder;
292 struct lp_build_context *uint_bld = &bld_base->uint_bld;
293 LLVMValueRef res;
294 unsigned i;
295
296 if (indexes2)
297 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
298 else
299 res = bld->undef;
300 /*
301 * overflow_mask is a vector telling us which channels
302 * in the vector overflowed. We use the overflow behavior for
303 * constant buffers which is defined as:
304 * Out of bounds access to constant buffer returns 0 in all
305 * components. Out of bounds behavior is always with respect
306 * to the size of the buffer bound at that slot.
307 */
308
309 if (overflow_mask) {
310 /*
311 * We avoid per-element control flow here (also due to llvm going crazy,
312 * though I suspect it's better anyway since overflow is likely rare).
313 * Note that since we still fetch from buffers even if num_elements was
314 * zero (in this case we'll fetch from index zero) the jit func callers
315 * MUST provide valid fake constant buffers of size 4x32 (the values do
316 * not matter), otherwise we'd still need (not per element though)
317 * control flow.
318 */
319 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
320 if (indexes2)
321 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
322 }
323
324 /*
325 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
326 */
327 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
328 LLVMValueRef si, di;
329 LLVMValueRef index;
330 LLVMValueRef scalar_ptr, scalar;
331
332 di = lp_build_const_int32(gallivm, i);
333 if (indexes2)
334 si = lp_build_const_int32(gallivm, i >> 1);
335 else
336 si = di;
337
338 if (indexes2 && (i & 1)) {
339 index = LLVMBuildExtractElement(builder,
340 indexes2, si, "");
341 } else {
342 index = LLVMBuildExtractElement(builder,
343 indexes, si, "");
344 }
345
346 scalar_ptr = LLVMBuildGEP2(builder, base_type, base_ptr, &index, 1, "gather_ptr");
347 scalar = LLVMBuildLoad2(builder, base_type, scalar_ptr, "");
348
349 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
350 }
351
352 if (overflow_mask) {
353 if (indexes2) {
354 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
355 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
356 bld_base->dbl_bld.int_vec_type, "");
357 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
358 bld_base->dbl_bld.zero, res);
359 } else
360 res = lp_build_select(bld, overflow_mask, bld->zero, res);
361 }
362
363 return res;
364 }
365
366 /**
367 * Scatter/store vector.
368 */
369 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)370 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
371 LLVMValueRef base_ptr,
372 LLVMValueRef indexes,
373 LLVMValueRef values,
374 struct lp_exec_mask *mask)
375 {
376 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
377 LLVMBuilderRef builder = gallivm->builder;
378 unsigned i;
379 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
380
381 /*
382 * Loop over elements of index_vec, store scalar value.
383 */
384 for (i = 0; i < bld->bld_base.base.type.length; i++) {
385 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
386 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
387 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
388 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(val), base_ptr, &index, 1, "scatter_ptr");
389 LLVMValueRef scalar_pred = pred ?
390 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
391
392 if (0)
393 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
394 ii, val, index, scalar_ptr);
395
396 if (scalar_pred) {
397 LLVMValueRef real_val, dst_val;
398 dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
399 scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
400 real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
401 LLVMBuildStore(builder, real_val, scalar_ptr);
402 }
403 else {
404 LLVMBuildStore(builder, val, scalar_ptr);
405 }
406 }
407 }
408
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])409 static void emit_load_var(struct lp_build_nir_context *bld_base,
410 nir_variable_mode deref_mode,
411 unsigned num_components,
412 unsigned bit_size,
413 nir_variable *var,
414 unsigned vertex_index,
415 LLVMValueRef indir_vertex_index,
416 unsigned const_index,
417 LLVMValueRef indir_index,
418 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
419 {
420 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
421 struct gallivm_state *gallivm = bld_base->base.gallivm;
422 int dmul = bit_size == 64 ? 2 : 1;
423 unsigned location = var->data.driver_location;
424 unsigned location_frac = var->data.location_frac;
425
426 if (!var->data.compact && !indir_index)
427 location += const_index;
428 else if (var->data.compact) {
429 location += const_index / 4;
430 location_frac += const_index % 4;
431 const_index = 0;
432 }
433 switch (deref_mode) {
434 case nir_var_shader_in:
435 for (unsigned i = 0; i < num_components; i++) {
436 int idx = (i * dmul) + location_frac;
437 int comp_loc = location;
438
439 if (bit_size == 64 && idx >= 4) {
440 comp_loc++;
441 idx = idx % 4;
442 }
443
444 if (bld->gs_iface) {
445 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
446 LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
448 LLVMValueRef result2;
449
450 result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
451 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
452 if (bit_size == 64) {
453 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
454 result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
455 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
456 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
457 }
458 } else if (bld->tes_iface) {
459 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
460 LLVMValueRef attrib_index_val;
461 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
462 LLVMValueRef result2;
463
464 if (indir_index) {
465 if (var->data.compact) {
466 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
467 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
468 } else
469 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
470 } else
471 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
472
473 if (var->data.patch) {
474 result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
475 indir_index ? true : false, attrib_index_val, swizzle_index_val);
476 if (bit_size == 64) {
477 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
478 result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
479 indir_index ? true : false, attrib_index_val, swizzle_index_val);
480 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
481 }
482 }
483 else {
484 result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
485 indir_vertex_index ? true : false,
486 indir_vertex_index ? indir_vertex_index : vertex_index_val,
487 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
488 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
489 if (bit_size == 64) {
490 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
491 result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
492 indir_vertex_index ? true : false,
493 indir_vertex_index ? indir_vertex_index : vertex_index_val,
494 indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
495 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
496 }
497 }
498 } else if (bld->tcs_iface) {
499 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
500 LLVMValueRef attrib_index_val;
501 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
502
503 if (indir_index) {
504 if (var->data.compact) {
505 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
506 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
507 } else
508 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
509 } else
510 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
511 result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
512 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
513 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
514 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
515 if (bit_size == 64) {
516 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
517 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
518 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
519 indir_index ? true : false, attrib_index_val,
520 false, swizzle_index_val);
521 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
522 }
523 } else {
524 if (indir_index) {
525 LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
526 LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
527 attrib_index_val, 4, idx,
528 true);
529 LLVMValueRef index_vec2 = NULL;
530 LLVMTypeRef scalar_type = LLVMFloatTypeInContext(gallivm->context);
531 LLVMValueRef inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, LLVMPointerType(scalar_type, 0), "");
532
533 if (bit_size == 64)
534 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
535 indir_index, 4, idx + 1, true);
536
537 /* Gather values from the input register array */
538 result[i] = build_gather(bld_base, &bld_base->base, scalar_type, inputs_array, index_vec, NULL, index_vec2);
539 } else {
540 if (bld->indirects & nir_var_shader_in) {
541 LLVMValueRef lindex = lp_build_const_int32(gallivm,
542 comp_loc * 4 + idx);
543 LLVMValueRef input_ptr = lp_build_pointer_get2(gallivm->builder,
544 bld->bld_base.base.vec_type,
545 bld->inputs_array, lindex);
546 if (bit_size == 64) {
547 LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
548 comp_loc * 4 + (idx + 1));
549 LLVMValueRef input_ptr2 = lp_build_pointer_get2(gallivm->builder,
550 bld->bld_base.base.vec_type,
551 bld->inputs_array, lindex2);
552 result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
553 } else {
554 result[i] = input_ptr;
555 }
556 } else {
557 if (bit_size == 64) {
558 LLVMValueRef tmp[2];
559 tmp[0] = bld->inputs[comp_loc][idx];
560 tmp[1] = bld->inputs[comp_loc][idx + 1];
561 result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
562 } else {
563 result[i] = bld->inputs[comp_loc][idx];
564 }
565 }
566 }
567 }
568 }
569 break;
570 case nir_var_shader_out:
571 if (var->data.fb_fetch_output && bld->fs_iface && bld->fs_iface->fb_fetch) {
572 bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
573 break;
574 }
575
576 for (unsigned i = 0; i < num_components; i++) {
577 int idx = (i * dmul) + location_frac;
578 int comp_loc = location;
579 if (bit_size == 64 && idx >= 4) {
580 comp_loc++;
581 idx = idx % 4;
582 }
583
584 if (bld->tcs_iface) {
585 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
586 LLVMValueRef attrib_index_val;
587 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
588
589 if (indir_index)
590 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
591 else
592 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
593
594 result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
595 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
596 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
597 (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
598 if (bit_size == 64) {
599 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
600 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
601 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
602 indir_index ? true : false, attrib_index_val,
603 false, swizzle_index_val, 0);
604 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
605 }
606 } else {
607 /* Output variable behave like private variables during the shader execution.
608 GLSL 4.60 spec, section 4.3.6.
609 Vulkan 1.3 spec, Helper Invocations */
610 if (bit_size == 64) {
611 result[i] = emit_fetch_64bit(bld_base,
612 LLVMBuildLoad2(gallivm->builder, bld_base->base.vec_type, bld->outputs[comp_loc][idx], "output0_ptr"),
613 LLVMBuildLoad2(gallivm->builder, bld_base->base.vec_type, bld->outputs[comp_loc][idx + 1], "output1_ptr"));
614 } else {
615 result[i] = LLVMBuildLoad2(gallivm->builder, bld_base->base.vec_type, bld->outputs[comp_loc][idx], "output_ptr");
616 }
617 }
618 }
619 break;
620 default:
621 break;
622 }
623 }
624
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)625 static void emit_store_chan(struct lp_build_nir_context *bld_base,
626 nir_variable_mode deref_mode,
627 unsigned bit_size,
628 unsigned location, unsigned comp,
629 unsigned chan,
630 LLVMValueRef dst)
631 {
632 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
633 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
634 struct lp_build_context *float_bld = &bld_base->base;
635
636 if (bit_size == 64) {
637 chan *= 2;
638 chan += comp;
639 if (chan >= 4) {
640 chan -= 4;
641 location++;
642 }
643 emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
644 bld->outputs[location][chan + 1], dst);
645 } else {
646 dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
647 lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
648 bld->outputs[location][chan + comp]);
649 }
650 }
651
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)652 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
653 bool is_compact,
654 unsigned bit_size,
655 unsigned location,
656 unsigned const_index,
657 LLVMValueRef indir_vertex_index,
658 LLVMValueRef indir_index,
659 unsigned comp,
660 unsigned chan,
661 LLVMValueRef chan_val)
662 {
663 struct gallivm_state *gallivm = bld_base->base.gallivm;
664 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
665 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
666 unsigned swizzle = chan;
667 if (bit_size == 64) {
668 swizzle *= 2;
669 swizzle += comp;
670 if (swizzle >= 4) {
671 swizzle -= 4;
672 location++;
673 }
674 } else
675 swizzle += comp;
676 LLVMValueRef attrib_index_val;
677 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
678
679 if (indir_index) {
680 if (is_compact) {
681 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
682 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
683 } else
684 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
685 } else
686 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
687 LLVMValueRef exec_mask = mask_vec(bld_base);
688 if (bit_size == 64) {
689 LLVMValueRef split_vals[2];
690 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
691 emit_store_64bit_split(bld_base, chan_val, split_vals);
692 if (bld->mesh_iface) {
693 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
694 indir_vertex_index ? true : false,
695 indir_vertex_index,
696 indir_index ? true : false,
697 attrib_index_val,
698 false, swizzle_index_val,
699 split_vals[0], exec_mask);
700 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
701 indir_vertex_index ? true : false,
702 indir_vertex_index,
703 indir_index ? true : false,
704 attrib_index_val,
705 false, swizzle_index_val2,
706 split_vals[1], exec_mask);
707 } else {
708 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
709 indir_vertex_index ? true : false,
710 indir_vertex_index,
711 indir_index ? true : false,
712 attrib_index_val,
713 false, swizzle_index_val,
714 split_vals[0], exec_mask);
715 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
716 indir_vertex_index ? true : false,
717 indir_vertex_index,
718 indir_index ? true : false,
719 attrib_index_val,
720 false, swizzle_index_val2,
721 split_vals[1], exec_mask);
722 }
723 } else {
724 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
725 if (bld->mesh_iface) {
726 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
727 indir_vertex_index ? true : false,
728 indir_vertex_index,
729 indir_index && !is_compact ? true : false,
730 attrib_index_val,
731 indir_index && is_compact ? true : false,
732 swizzle_index_val,
733 chan_val, exec_mask);
734 } else {
735 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
736 indir_vertex_index ? true : false,
737 indir_vertex_index,
738 indir_index && !is_compact ? true : false,
739 attrib_index_val,
740 indir_index && is_compact ? true : false,
741 swizzle_index_val,
742 chan_val, exec_mask);
743 }
744 }
745 }
746
emit_store_mesh_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)747 static void emit_store_mesh_chan(struct lp_build_nir_context *bld_base,
748 bool is_compact,
749 unsigned bit_size,
750 unsigned location,
751 unsigned const_index,
752 LLVMValueRef indir_vertex_index,
753 LLVMValueRef indir_index,
754 unsigned comp,
755 unsigned chan,
756 LLVMValueRef chan_val)
757 {
758 struct gallivm_state *gallivm = bld_base->base.gallivm;
759 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
760 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
761 unsigned swizzle = chan;
762 if (bit_size == 64) {
763 swizzle += const_index;
764 swizzle *= 2;
765 swizzle += comp;
766 if (swizzle >= 4) {
767 swizzle -= 4;
768 location++;
769 }
770 } else
771 swizzle += comp;
772 LLVMValueRef attrib_index_val;
773 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
774
775 if (indir_index) {
776 if (is_compact) {
777 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
778 attrib_index_val = lp_build_const_int32(gallivm, location);
779 } else
780 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
781 } else
782 attrib_index_val = lp_build_const_int32(gallivm, location + const_index);
783 LLVMValueRef exec_mask = mask_vec(bld_base);
784 if (bit_size == 64) {
785 LLVMValueRef split_vals[2];
786 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
787 emit_store_64bit_split(bld_base, chan_val, split_vals);
788 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
789 indir_vertex_index ? true : false,
790 indir_vertex_index,
791 indir_index ? true : false,
792 attrib_index_val,
793 false, swizzle_index_val,
794 split_vals[0], exec_mask);
795 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
796 indir_vertex_index ? true : false,
797 indir_vertex_index,
798 indir_index ? true : false,
799 attrib_index_val,
800 false, swizzle_index_val2,
801 split_vals[1], exec_mask);
802 } else {
803 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
804 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
805 indir_vertex_index ? true : false,
806 indir_vertex_index,
807 indir_index && !is_compact ? true : false,
808 attrib_index_val,
809 indir_index && is_compact ? true : false,
810 swizzle_index_val,
811 chan_val, exec_mask);
812 }
813 }
814
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)815 static void emit_store_var(struct lp_build_nir_context *bld_base,
816 nir_variable_mode deref_mode,
817 unsigned num_components,
818 unsigned bit_size,
819 nir_variable *var,
820 unsigned writemask,
821 LLVMValueRef indir_vertex_index,
822 unsigned const_index,
823 LLVMValueRef indir_index,
824 LLVMValueRef dst)
825 {
826 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
827 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
828 switch (deref_mode) {
829 case nir_var_shader_out: {
830 unsigned location = var->data.driver_location;
831 unsigned comp = var->data.location_frac;
832 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
833 if (var->data.location == FRAG_RESULT_STENCIL)
834 comp = 1;
835 else if (var->data.location == FRAG_RESULT_DEPTH)
836 comp = 2;
837 }
838
839 if (var->data.compact) {
840 location += const_index / 4;
841 comp += const_index % 4;
842 const_index = 0;
843 }
844
845 for (unsigned chan = 0; chan < num_components; chan++) {
846 if (writemask & (1u << chan)) {
847 LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
848 if (bld->mesh_iface) {
849 emit_store_mesh_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
850 } else if (bld->tcs_iface) {
851 emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
852 } else
853 emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
854 }
855 }
856 break;
857 }
858 default:
859 break;
860 }
861 }
862
863 /**
864 * Returns the address of the given constant array index and channel in a
865 * nir register.
866 */
reg_chan_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,LLVMValueRef reg_storage,int array_index,int chan)867 static LLVMValueRef reg_chan_pointer(struct lp_build_nir_context *bld_base,
868 struct lp_build_context *reg_bld,
869 const nir_intrinsic_instr *decl,
870 LLVMValueRef reg_storage,
871 int array_index, int chan)
872 {
873 struct gallivm_state *gallivm = bld_base->base.gallivm;
874 int nc = nir_intrinsic_num_components(decl);
875 int num_array_elems = nir_intrinsic_num_array_elems(decl);
876
877 LLVMTypeRef chan_type = reg_bld->vec_type;
878 if (nc > 1)
879 chan_type = LLVMArrayType(chan_type, nc);
880
881 if (num_array_elems > 0) {
882 LLVMTypeRef array_type = LLVMArrayType(chan_type, num_array_elems);
883 reg_storage = lp_build_array_get_ptr2(gallivm, array_type, reg_storage,
884 lp_build_const_int32(gallivm, array_index));
885 }
886 if (nc > 1) {
887 reg_storage = lp_build_array_get_ptr2(gallivm, chan_type, reg_storage,
888 lp_build_const_int32(gallivm, chan));
889 }
890
891 return reg_storage;
892 }
893
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage)894 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
895 struct lp_build_context *reg_bld,
896 const nir_intrinsic_instr *decl,
897 unsigned base,
898 LLVMValueRef indir_src,
899 LLVMValueRef reg_storage)
900 {
901 struct gallivm_state *gallivm = bld_base->base.gallivm;
902 LLVMBuilderRef builder = gallivm->builder;
903 int nc = nir_intrinsic_num_components(decl);
904 int num_array_elems = nir_intrinsic_num_array_elems(decl);
905 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
906 struct lp_build_context *uint_bld = &bld_base->uint_bld;
907 if (indir_src != NULL) {
908 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
909 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
910 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
911 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
912 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
913 for (unsigned i = 0; i < nc; i++) {
914 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
915 vals[i] = build_gather(bld_base, reg_bld, reg_bld->elem_type, reg_storage, indirect_offset, NULL, NULL);
916 }
917 } else {
918 for (unsigned i = 0; i < nc; i++) {
919 vals[i] = LLVMBuildLoad2(builder, reg_bld->vec_type,
920 reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
921 base, i), "");
922 }
923 }
924 return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
925 }
926
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned writemask,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])927 static void emit_store_reg(struct lp_build_nir_context *bld_base,
928 struct lp_build_context *reg_bld,
929 const nir_intrinsic_instr *decl,
930 unsigned writemask,
931 unsigned base,
932 LLVMValueRef indir_src,
933 LLVMValueRef reg_storage,
934 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
935 {
936 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
937 struct gallivm_state *gallivm = bld_base->base.gallivm;
938 LLVMBuilderRef builder = gallivm->builder;
939 struct lp_build_context *uint_bld = &bld_base->uint_bld;
940 int nc = nir_intrinsic_num_components(decl);
941 int num_array_elems = nir_intrinsic_num_array_elems(decl);
942 if (indir_src != NULL) {
943 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
944 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
945 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
946 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
947 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
948 for (unsigned i = 0; i < nc; i++) {
949 if (!(writemask & (1 << i)))
950 continue;
951 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
952 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
953 emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
954 }
955 return;
956 }
957
958 for (unsigned i = 0; i < nc; i++) {
959 if (!(writemask & (1 << i)))
960 continue;
961 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
962 lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i],
963 reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
964 base, i));
965 }
966 }
967
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])968 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
969 unsigned nc,
970 unsigned bit_size,
971 unsigned offset_bit_size,
972 bool offset_is_uniform,
973 LLVMValueRef offset,
974 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
975 {
976 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
977 struct gallivm_state *gallivm = bld_base->base.gallivm;
978 LLVMBuilderRef builder = gallivm->builder;
979 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
980 LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
981 unsigned size_shift = bit_size_to_shift_size(bit_size);
982 struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
983 if (size_shift)
984 offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
985
986 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
987 kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
988
989 if (offset_is_uniform) {
990 offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
991
992 for (unsigned c = 0; c < nc; c++) {
993 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
994
995 LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset);
996 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
997 }
998 } else {
999 unreachable("load_kernel_arg must have a uniform offset.");
1000 }
1001 }
1002
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)1003 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
1004 {
1005 LLVMBuilderRef builder = gallivm->builder;
1006 switch (bit_size) {
1007 case 8:
1008 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
1009 break;
1010 case 16:
1011 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
1012 break;
1013 case 32:
1014 default:
1015 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1016 break;
1017 case 64:
1018 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
1019 break;
1020 }
1021 return addr_ptr;
1022 }
1023
global_addr_to_ptr_vec(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned length,unsigned bit_size)1024 static LLVMValueRef global_addr_to_ptr_vec(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned length, unsigned bit_size)
1025 {
1026 LLVMBuilderRef builder = gallivm->builder;
1027 switch (bit_size) {
1028 case 8:
1029 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), length), "");
1030 break;
1031 case 16:
1032 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), length), "");
1033 break;
1034 case 32:
1035 default:
1036 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), length), "");
1037 break;
1038 case 64:
1039 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), length), "");
1040 break;
1041 }
1042 return addr_ptr;
1043 }
1044
lp_vec_add_offset_ptr(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef ptr,LLVMValueRef offset)1045 static LLVMValueRef lp_vec_add_offset_ptr(struct lp_build_nir_context *bld_base,
1046 unsigned bit_size,
1047 LLVMValueRef ptr,
1048 LLVMValueRef offset)
1049 {
1050 unsigned pointer_size = 8 * sizeof(void *);
1051 struct gallivm_state *gallivm = bld_base->base.gallivm;
1052 LLVMBuilderRef builder = gallivm->builder;
1053 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1054 struct lp_build_context *ptr_bld = get_int_bld(bld_base, true, pointer_size);
1055 LLVMValueRef result = LLVMBuildPtrToInt(builder, ptr, ptr_bld->vec_type, "");
1056 if (pointer_size == 64)
1057 offset = LLVMBuildZExt(builder, offset, ptr_bld->vec_type, "");
1058 result = LLVMBuildAdd(builder, offset, result, "");
1059 return global_addr_to_ptr_vec(gallivm, result, uint_bld->type.length, bit_size);
1060 }
1061
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1062 static void emit_load_global(struct lp_build_nir_context *bld_base,
1063 unsigned nc,
1064 unsigned bit_size,
1065 unsigned addr_bit_size,
1066 bool offset_is_uniform,
1067 LLVMValueRef addr,
1068 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1069 {
1070 struct gallivm_state *gallivm = bld_base->base.gallivm;
1071 LLVMBuilderRef builder = gallivm->builder;
1072 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1073 struct lp_build_context *res_bld;
1074
1075 res_bld = get_int_bld(bld_base, true, bit_size);
1076
1077 /* Note, we don't use first_active_invocation here, since we aren't
1078 * guaranteed that there is actually an active invocation.
1079 */
1080 if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1081 /* If the offset is uniform, then use the address from invocation 0 to
1082 * load, and broadcast to all invocations.
1083 */
1084 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1085 lp_build_const_int32(gallivm, 0), "");
1086 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
1087
1088 for (unsigned c = 0; c < nc; c++) {
1089 LLVMValueRef scalar = lp_build_pointer_get2(builder, res_bld->elem_type,
1090 addr_ptr, lp_build_const_int32(gallivm, c));
1091 outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
1092 }
1093 return;
1094 }
1095
1096 LLVMValueRef mask = mask_vec_with_helpers(bld_base);
1097 for (unsigned c = 0; c < nc; c++) {
1098 LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1099
1100 outval[c] = lp_build_masked_gather(gallivm, res_bld->type.length, bit_size, res_bld->vec_type,
1101 lp_vec_add_offset_ptr(bld_base, bit_size, addr, chan_offset),
1102 mask);
1103 outval[c] = LLVMBuildBitCast(builder, outval[c], res_bld->vec_type, "");
1104 }
1105 }
1106
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)1107 static void emit_store_global(struct lp_build_nir_context *bld_base,
1108 unsigned writemask,
1109 unsigned nc, unsigned bit_size,
1110 unsigned addr_bit_size,
1111 LLVMValueRef addr,
1112 LLVMValueRef dst)
1113 {
1114 struct gallivm_state *gallivm = bld_base->base.gallivm;
1115 LLVMBuilderRef builder = gallivm->builder;
1116 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1117 LLVMValueRef exec_mask = mask_vec(bld_base);
1118
1119 for (unsigned c = 0; c < nc; c++) {
1120 if (!(writemask & (1u << c)))
1121 continue;
1122 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1123 LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1124
1125 struct lp_build_context *out_bld = get_int_bld(bld_base, false, bit_size);
1126 val = LLVMBuildBitCast(builder, val, out_bld->vec_type, "");
1127 lp_build_masked_scatter(gallivm, out_bld->type.length, bit_size,
1128 lp_vec_add_offset_ptr(bld_base, bit_size,
1129 addr, chan_offset),
1130 val, exec_mask);
1131 }
1132 }
1133
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1134 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
1135 nir_atomic_op nir_op,
1136 unsigned addr_bit_size,
1137 unsigned val_bit_size,
1138 LLVMValueRef addr,
1139 LLVMValueRef val, LLVMValueRef val2,
1140 LLVMValueRef *result)
1141 {
1142 struct gallivm_state *gallivm = bld_base->base.gallivm;
1143 LLVMBuilderRef builder = gallivm->builder;
1144 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1145 bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
1146 struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
1147 if (is_flt)
1148 val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
1149
1150 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1151 atom_bld->vec_type, "");
1152 LLVMValueRef exec_mask = mask_vec(bld_base);
1153 struct lp_build_loop_state loop_state;
1154 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1155
1156 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1157 loop_state.counter, "");
1158 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atom_bld->elem_type, "");
1159
1160 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1161 loop_state.counter, "");
1162 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
1163 struct lp_build_if_state ifthen;
1164 LLVMValueRef cond, temp_res;
1165 LLVMValueRef scalar;
1166 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1167 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1168 lp_build_if(&ifthen, gallivm, cond);
1169
1170 addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
1171 if (val2 != NULL /* compare-and-swap */) {
1172 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1173 loop_state.counter, "");
1174 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
1175 scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
1176 cas_src_ptr,
1177 LLVMAtomicOrderingSequentiallyConsistent,
1178 LLVMAtomicOrderingSequentiallyConsistent,
1179 false);
1180 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1181 } else {
1182 scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1183 addr_ptr, value_ptr,
1184 LLVMAtomicOrderingSequentiallyConsistent,
1185 false);
1186 }
1187 temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1188 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1189 LLVMBuildStore(builder, temp_res, atom_res);
1190 lp_build_else(&ifthen);
1191 temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1192 LLVMValueRef zero_val = lp_build_zero_bits(gallivm, val_bit_size, is_flt);
1193 temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1194 LLVMBuildStore(builder, temp_res, atom_res);
1195 lp_build_endif(&ifthen);
1196 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1197 NULL, LLVMIntUGE);
1198 *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1199 }
1200
1201 /* Returns a boolean for whether the offset is in range of the given limit for
1202 * SSBO/UBO dereferences.
1203 */
1204 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1205 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1206 LLVMValueRef offset,
1207 LLVMValueRef limit)
1208 {
1209 struct gallivm_state *gallivm = bld_base->base.gallivm;
1210 LLVMBuilderRef builder = gallivm->builder;
1211
1212 LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1213 LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1214 LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1215 return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1216 }
1217
1218 static LLVMValueRef
build_resource_to_scalar(struct lp_build_nir_context * bld_base,LLVMValueRef resource)1219 build_resource_to_scalar(struct lp_build_nir_context *bld_base, LLVMValueRef resource)
1220 {
1221 struct gallivm_state *gallivm = bld_base->base.gallivm;
1222
1223 LLVMValueRef invocation = first_active_invocation(bld_base);
1224
1225 LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, resource, 0, "");
1226 set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1227
1228 LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, resource, 1, "");
1229 binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1230
1231 LLVMValueRef components[2] = { set, binding };
1232 return lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1233 }
1234
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1235 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1236 unsigned nc,
1237 unsigned bit_size,
1238 bool offset_is_uniform,
1239 LLVMValueRef index,
1240 LLVMValueRef offset,
1241 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1242 {
1243 if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind)
1244 index = build_resource_to_scalar(bld_base, index);
1245
1246 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1247 struct gallivm_state *gallivm = bld_base->base.gallivm;
1248 LLVMBuilderRef builder = gallivm->builder;
1249 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1250 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1251 LLVMValueRef consts_ptr = lp_llvm_buffer_base(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1252 LLVMValueRef num_consts = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1253 unsigned size_shift = bit_size_to_shift_size(bit_size);
1254 if (size_shift)
1255 offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1256
1257 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1258 consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1259
1260 if (offset_is_uniform) {
1261 offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
1262 struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1263 switch (bit_size) {
1264 case 8:
1265 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1266 break;
1267 case 16:
1268 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1269 break;
1270 case 64:
1271 num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1272 break;
1273 default: break;
1274 }
1275 for (unsigned c = 0; c < nc; c++) {
1276 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1277
1278 LLVMValueRef scalar;
1279 /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1280 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1281 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1282 LLVMBuildStore(builder, zero, res_store);
1283
1284 struct lp_build_if_state ifthen;
1285 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1286 LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type,
1287 consts_ptr, chan_offset), res_store);
1288 lp_build_endif(&ifthen);
1289
1290 scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1291
1292 result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1293 }
1294 } else {
1295 LLVMValueRef overflow_mask;
1296
1297 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1298 if (bit_size == 64)
1299 num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1300 else if (bit_size == 16)
1301 num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1302 else if (bit_size == 8)
1303 num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1304
1305 for (unsigned c = 0; c < nc; c++) {
1306 LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1307 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1308 this_offset, num_consts);
1309 result[c] = build_gather(bld_base, bld_broad, bld_broad->elem_type, consts_ptr, this_offset, overflow_mask, NULL);
1310 }
1311 }
1312 }
1313
1314 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1315 emit_load_const(struct lp_build_nir_context *bld_base,
1316 const nir_load_const_instr *instr,
1317 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1318 {
1319 struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1320 const unsigned bits = instr->def.bit_size;
1321
1322 for (unsigned i = 0; i < instr->def.num_components; i++) {
1323 outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1324 bits == 32 ? instr->value[i].u32
1325 : instr->value[i].u64);
1326 }
1327 for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1328 outval[i] = NULL;
1329 }
1330 }
1331
1332 /**
1333 * Get the base address of SSBO[@index] for the @invocation channel, returning
1334 * the address and also the bounds (in units of the bit_size).
1335 */
1336 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1337 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1338 unsigned bit_size,
1339 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1340 {
1341 struct gallivm_state *gallivm = bld_base->base.gallivm;
1342 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1343 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1344
1345 LLVMValueRef ssbo_idx;
1346 LLVMValueRef buffers;
1347 uint32_t buffers_limit;
1348 if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) {
1349 LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, index, 0, "");
1350 set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1351
1352 LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, index, 1, "");
1353 binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1354
1355 LLVMValueRef components[2] = { set, binding };
1356 ssbo_idx = lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1357
1358 buffers = bld->consts_ptr;
1359 buffers_limit = LP_MAX_TGSI_CONST_BUFFERS;
1360 } else {
1361 ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1362
1363 buffers = bld->ssbo_ptr;
1364 buffers_limit = LP_MAX_TGSI_SHADER_BUFFERS;
1365 }
1366
1367 LLVMValueRef ssbo_size_ptr = lp_llvm_buffer_num_elements(gallivm, buffers, ssbo_idx, buffers_limit);
1368 LLVMValueRef ssbo_ptr = lp_llvm_buffer_base(gallivm, buffers, ssbo_idx, buffers_limit);
1369 if (bounds)
1370 *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1371
1372 return ssbo_ptr;
1373 }
1374
1375 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,bool payload,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1376 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1377 struct lp_build_context *mem_bld,
1378 unsigned bit_size, bool payload,
1379 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1380 {
1381 struct gallivm_state *gallivm = bld_base->base.gallivm;
1382 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1383 LLVMValueRef ptr;
1384
1385 if (index) {
1386 ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1387 } else {
1388 if (payload) {
1389 ptr = bld->payload_ptr;
1390 ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
1391 ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
1392 ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1393 }
1394 else
1395 ptr = bld->shared_ptr;
1396 *bounds = NULL;
1397 }
1398
1399 /* Cast it to the pointer type of the access this instruction is doing. */
1400 if (bit_size == 32 && !mem_bld->type.floating)
1401 return ptr;
1402 else
1403 return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1404 }
1405
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1406 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1407 unsigned nc,
1408 unsigned bit_size,
1409 bool index_and_offset_are_uniform,
1410 bool payload,
1411 LLVMValueRef index,
1412 LLVMValueRef offset,
1413 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1414 {
1415 struct gallivm_state *gallivm = bld_base->base.gallivm;
1416 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1417 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1418 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1419 struct lp_build_context *load_bld;
1420 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1421
1422 load_bld = get_int_bld(bld_base, true, bit_size);
1423
1424 offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1425
1426 /* If the address is uniform, then use the address from the first active
1427 * invocation 0 to load, and broadcast to all invocations. We can't do
1428 * computed first active invocation for shared accesses (index == NULL),
1429 * though, since those don't do bounds checking and we could use an invalid
1430 * offset if exec_mask == 0.
1431 */
1432 if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) {
1433 LLVMValueRef ssbo_limit;
1434 LLVMValueRef first_active = first_active_invocation(bld_base);
1435 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1436 first_active, &ssbo_limit);
1437
1438 offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, "");
1439
1440 for (unsigned c = 0; c < nc; c++) {
1441 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1442
1443 LLVMValueRef scalar;
1444 /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1445 if (ssbo_limit) {
1446 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1447 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1448 LLVMBuildStore(builder, zero, res_store);
1449
1450 struct lp_build_if_state ifthen;
1451 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1452 LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store);
1453 lp_build_endif(&ifthen);
1454
1455 scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1456 } else {
1457 scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset);
1458 }
1459
1460 outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1461 }
1462 return;
1463 }
1464
1465 /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1466
1467 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1468 for (unsigned c = 0; c < nc; c++)
1469 result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1470
1471 LLVMValueRef gather_mask = mask_vec_with_helpers(bld_base);
1472 LLVMValueRef gather_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, gather_mask, uint_bld->zero, "");
1473 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1474 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1475 LLVMValueRef element_gather_cond = LLVMBuildExtractElement(gallivm->builder, gather_cond, counter, "");
1476
1477 struct lp_build_if_state if_gather_element;
1478 lp_build_if(&if_gather_element, gallivm, element_gather_cond);
1479
1480 LLVMValueRef ssbo_limit;
1481 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1482 counter, &ssbo_limit);
1483
1484 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1485
1486 for (unsigned c = 0; c < nc; c++) {
1487 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1488 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1489 if (ssbo_limit) {
1490 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1491 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1492 }
1493
1494 struct lp_build_if_state ifthen;
1495 LLVMValueRef fetch_cond, temp_res;
1496
1497 fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1498
1499 lp_build_if(&ifthen, gallivm, fetch_cond);
1500 LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index);
1501
1502 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1503 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1504 LLVMBuildStore(builder, temp_res, result[c]);
1505 lp_build_else(&ifthen);
1506 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1507 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1508 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1509 LLVMBuildStore(builder, temp_res, result[c]);
1510 lp_build_endif(&ifthen);
1511 }
1512
1513 lp_build_endif(&if_gather_element);
1514 }
1515 for (unsigned c = 0; c < nc; c++)
1516 outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1517
1518 }
1519
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1520 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1521 unsigned writemask,
1522 unsigned nc,
1523 unsigned bit_size,
1524 bool index_and_offset_are_uniform,
1525 bool payload,
1526 LLVMValueRef index,
1527 LLVMValueRef offset,
1528 LLVMValueRef dst)
1529 {
1530 struct gallivm_state *gallivm = bld_base->base.gallivm;
1531 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1532 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1533 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1534 struct lp_build_context *store_bld;
1535 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1536 store_bld = get_int_bld(bld_base, true, bit_size);
1537
1538 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1539
1540 /* If the address is uniform, then just store the value from the first
1541 * channel instead of making LLVM unroll the invocation loop. Note that we
1542 * don't use first_active_uniform(), since we aren't guaranteed that there is
1543 * actually an active invocation.
1544 */
1545 if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1546 LLVMValueRef ssbo_limit;
1547 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1548 lp_build_const_int32(gallivm, 0), &ssbo_limit);
1549
1550 offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1551
1552 for (unsigned c = 0; c < nc; c++) {
1553 if (!(writemask & (1u << c)))
1554 continue;
1555
1556 /* Pick out invocation 0's value. */
1557 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1558 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1559 lp_build_const_int32(gallivm, 0), "");
1560 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1561
1562 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1563
1564 /* If storing outside the SSBO, we need to skip the store instead. */
1565 if (ssbo_limit) {
1566 struct lp_build_if_state ifthen;
1567 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1568 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1569 lp_build_endif(&ifthen);
1570 } else {
1571 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1572 }
1573 }
1574 return;
1575 }
1576
1577 LLVMValueRef exec_mask = mask_vec(bld_base);
1578 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1579 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1580 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1581 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1582
1583 struct lp_build_if_state exec_ifthen;
1584 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1585
1586 LLVMValueRef ssbo_limit;
1587 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1588 counter, &ssbo_limit);
1589
1590 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1591
1592 for (unsigned c = 0; c < nc; c++) {
1593 if (!(writemask & (1u << c)))
1594 continue;
1595 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1596 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1597 LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1598
1599 if (ssbo_limit) {
1600 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1601 do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1602 }
1603
1604 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1605 counter, "");
1606 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1607 struct lp_build_if_state ifthen;
1608 LLVMValueRef store_cond;
1609
1610 store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1611 lp_build_if(&ifthen, gallivm, store_cond);
1612 lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1613 lp_build_endif(&ifthen);
1614 }
1615
1616 lp_build_endif(&exec_ifthen);
1617 }
1618 }
1619
1620
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,uint32_t bit_size,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1621 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1622 nir_atomic_op nir_op,
1623 uint32_t bit_size,
1624 bool payload,
1625 LLVMValueRef index, LLVMValueRef offset,
1626 LLVMValueRef val, LLVMValueRef val2,
1627 LLVMValueRef *result)
1628 {
1629 struct gallivm_state *gallivm = bld_base->base.gallivm;
1630 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1631 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1632 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1633 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1634 bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
1635 struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
1636
1637 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1638 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1639 atomic_bld->vec_type, "");
1640
1641 LLVMValueRef exec_mask = mask_vec(bld_base);
1642 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1643 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1644 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1645 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1646
1647 struct lp_build_if_state exec_ifthen;
1648 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1649
1650 LLVMValueRef ssbo_limit;
1651 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
1652 counter, &ssbo_limit);
1653
1654 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1655
1656 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1657 if (ssbo_limit) {
1658 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1659 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1660 }
1661
1662 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1663 counter, "");
1664 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1665
1666 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1667
1668 struct lp_build_if_state ifthen;
1669 LLVMValueRef inner_cond, temp_res;
1670 LLVMValueRef scalar;
1671
1672 inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1673 lp_build_if(&ifthen, gallivm, inner_cond);
1674
1675 if (val2 != NULL) {
1676 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1677 counter, "");
1678 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1679 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1680 cas_src_ptr,
1681 LLVMAtomicOrderingSequentiallyConsistent,
1682 LLVMAtomicOrderingSequentiallyConsistent,
1683 false);
1684 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1685 } else {
1686 scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1687 scalar_ptr, value_ptr,
1688 LLVMAtomicOrderingSequentiallyConsistent,
1689 false);
1690 }
1691 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1692 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1693 LLVMBuildStore(builder, temp_res, atom_res);
1694 lp_build_else(&ifthen);
1695 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1696 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
1697 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1698 LLVMBuildStore(builder, temp_res, atom_res);
1699 lp_build_endif(&ifthen);
1700
1701 lp_build_endif(&exec_ifthen);
1702 }
1703 *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1704 }
1705
emit_barrier(struct lp_build_nir_context * bld_base)1706 static void emit_barrier(struct lp_build_nir_context *bld_base)
1707 {
1708 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1709 struct gallivm_state * gallivm = bld_base->base.gallivm;
1710
1711 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1712
1713 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1714 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1715 }
1716
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1717 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1718 LLVMValueRef index)
1719 {
1720 struct lp_build_context *bld_broad = &bld_base->uint_bld;
1721
1722 LLVMValueRef size;
1723 ssbo_base_pointer(bld_base, 8, index, first_active_invocation(bld_base), &size);
1724
1725 return lp_build_broadcast_scalar(bld_broad, size);
1726 }
1727
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1728 static void emit_image_op(struct lp_build_nir_context *bld_base,
1729 struct lp_img_params *params)
1730 {
1731 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1732 struct gallivm_state *gallivm = bld_base->base.gallivm;
1733
1734 params->type = bld_base->base.type;
1735 params->resources_type = bld->resources_type;
1736 params->resources_ptr = bld->resources_ptr;
1737 params->thread_data_type = bld->thread_data_type;
1738 params->thread_data_ptr = bld->thread_data_ptr;
1739 params->exec_mask = mask_vec(bld_base);
1740
1741 if (params->image_index_offset)
1742 params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1743 first_active_invocation(bld_base), "");
1744
1745 if (params->resource)
1746 params->resource = build_resource_to_scalar(bld_base, params->resource);
1747
1748 bld->image->emit_op(bld->image,
1749 bld->bld_base.base.gallivm,
1750 params);
1751
1752 }
1753
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1754 static void emit_image_size(struct lp_build_nir_context *bld_base,
1755 struct lp_sampler_size_query_params *params)
1756 {
1757 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1758 struct gallivm_state *gallivm = bld_base->base.gallivm;
1759
1760 params->int_type = bld_base->int_bld.type;
1761 params->resources_type = bld->resources_type;
1762 params->resources_ptr = bld->resources_ptr;
1763 if (params->texture_unit_offset)
1764 params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1765 first_active_invocation(bld_base), "");
1766 bld->image->emit_size_query(bld->image,
1767 bld->bld_base.base.gallivm,
1768 params);
1769
1770 }
1771
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1772 static void init_var_slots(struct lp_build_nir_context *bld_base,
1773 nir_variable *var, unsigned sc)
1774 {
1775 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1776 unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1777
1778 if (!bld->outputs)
1779 return;
1780 for (unsigned comp = sc; comp < slots + sc; comp++) {
1781 unsigned this_loc = var->data.driver_location + (comp / 4);
1782 unsigned this_chan = comp % 4;
1783
1784 if (!bld->outputs[this_loc][this_chan])
1785 bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1786 bld_base->base.vec_type, "output");
1787 }
1788 }
1789
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1790 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1791 nir_variable *var)
1792 {
1793 unsigned sc = var->data.location_frac;
1794 switch (var->data.mode) {
1795 case nir_var_shader_out: {
1796 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1797 if (var->data.location == FRAG_RESULT_STENCIL)
1798 sc = 1;
1799 else if (var->data.location == FRAG_RESULT_DEPTH)
1800 sc = 2;
1801 }
1802 init_var_slots(bld_base, var, sc);
1803 break;
1804 }
1805 default:
1806 break;
1807 }
1808 }
1809
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1810 static void emit_tex(struct lp_build_nir_context *bld_base,
1811 struct lp_sampler_params *params)
1812 {
1813 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1814 struct gallivm_state *gallivm = bld_base->base.gallivm;
1815
1816 params->type = bld_base->base.type;
1817 params->resources_type = bld->resources_type;
1818 params->resources_ptr = bld->resources_ptr;
1819 params->thread_data_type = bld->thread_data_type;
1820 params->thread_data_ptr = bld->thread_data_ptr;
1821 params->exec_mask = mask_vec(bld_base);
1822
1823 if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1824 /* this is horrible but this can be dynamic */
1825 LLVMValueRef coords[5];
1826 LLVMValueRef *orig_texel_ptr;
1827 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1828 LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1829 LLVMGetUndef(bld_base->base.vec_type),
1830 LLVMGetUndef(bld_base->base.vec_type),
1831 LLVMGetUndef(bld_base->base.vec_type) };
1832 LLVMValueRef texel[4], orig_offset, orig_lod;
1833 unsigned i;
1834 orig_texel_ptr = params->texel;
1835 orig_lod = params->lod;
1836 for (i = 0; i < 5; i++) {
1837 coords[i] = params->coords[i];
1838 }
1839 orig_offset = params->texture_index_offset;
1840
1841 for (unsigned v = 0; v < uint_bld->type.length; v++) {
1842 LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1843 LLVMValueRef new_coords[5];
1844 for (i = 0; i < 5; i++) {
1845 new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1846 coords[i], idx, "");
1847 }
1848 params->coords = new_coords;
1849 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1850 orig_offset,
1851 idx, "");
1852 params->type = lp_elem_type(bld_base->base.type);
1853
1854 if (orig_lod)
1855 params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1856 params->texel = texel;
1857 bld->sampler->emit_tex_sample(bld->sampler,
1858 gallivm,
1859 params);
1860
1861 for (i = 0; i < 4; i++) {
1862 result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1863 }
1864 }
1865 for (i = 0; i < 4; i++) {
1866 orig_texel_ptr[i] = result[i];
1867 }
1868 return;
1869 }
1870
1871 if (params->texture_index_offset) {
1872 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1873 first_active_invocation(bld_base), "");
1874 }
1875
1876 if (params->texture_resource)
1877 params->texture_resource = build_resource_to_scalar(bld_base, params->texture_resource);
1878
1879 if (params->sampler_resource)
1880 params->sampler_resource = build_resource_to_scalar(bld_base, params->sampler_resource);
1881
1882 params->type = bld_base->base.type;
1883 bld->sampler->emit_tex_sample(bld->sampler,
1884 bld->bld_base.base.gallivm,
1885 params);
1886 }
1887
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1888 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1889 struct lp_sampler_size_query_params *params)
1890 {
1891 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1892
1893 params->int_type = bld_base->int_bld.type;
1894 params->resources_type = bld->resources_type;
1895 params->resources_ptr = bld->resources_ptr;
1896 if (params->texture_unit_offset)
1897 params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1898 params->texture_unit_offset,
1899 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1900
1901 params->exec_mask = mask_vec(bld_base);
1902 if (params->resource)
1903 params->resource = build_resource_to_scalar(bld_base, params->resource);
1904
1905 bld->sampler->emit_size_query(bld->sampler,
1906 bld->bld_base.base.gallivm,
1907 params);
1908 }
1909
get_local_invocation_index(struct lp_build_nir_soa_context * bld)1910 static LLVMValueRef get_local_invocation_index(struct lp_build_nir_soa_context *bld)
1911 {
1912 struct lp_build_nir_context *bld_base = &bld->bld_base;
1913 LLVMValueRef tmp, tmp2;
1914
1915 tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[1]);
1916 tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[0]);
1917 tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1918 tmp = lp_build_mul(&bld_base->uint_bld, tmp, bld->system_values.thread_id[2]);
1919
1920 tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, bld->system_values.thread_id[1]);
1921 tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1922 tmp = lp_build_add(&bld_base->uint_bld, tmp, bld->system_values.thread_id[0]);
1923 return tmp;
1924 }
1925
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1926 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1927 nir_intrinsic_instr *instr,
1928 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1929 {
1930 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1931 struct gallivm_state *gallivm = bld_base->base.gallivm;
1932 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->def.bit_size);
1933 switch (instr->intrinsic) {
1934 case nir_intrinsic_load_instance_id:
1935 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1936 break;
1937 case nir_intrinsic_load_base_instance:
1938 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1939 break;
1940 case nir_intrinsic_load_base_vertex:
1941 result[0] = bld->system_values.basevertex;
1942 break;
1943 case nir_intrinsic_load_first_vertex:
1944 result[0] = bld->system_values.firstvertex;
1945 break;
1946 case nir_intrinsic_load_vertex_id:
1947 result[0] = bld->system_values.vertex_id;
1948 break;
1949 case nir_intrinsic_load_primitive_id:
1950 result[0] = bld->system_values.prim_id;
1951 break;
1952 case nir_intrinsic_load_workgroup_id: {
1953 LLVMValueRef tmp[3];
1954 for (unsigned i = 0; i < 3; i++) {
1955 tmp[i] = bld->system_values.block_id[i];
1956 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1957 }
1958 break;
1959 }
1960 case nir_intrinsic_load_local_invocation_id:
1961 for (unsigned i = 0; i < 3; i++)
1962 result[i] = bld->system_values.thread_id[i];
1963 break;
1964 case nir_intrinsic_load_local_invocation_index:
1965 result[0] = get_local_invocation_index(bld);
1966 break;
1967 case nir_intrinsic_load_num_workgroups: {
1968 LLVMValueRef tmp[3];
1969 for (unsigned i = 0; i < 3; i++) {
1970 tmp[i] = bld->system_values.grid_size[i];
1971 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1972 }
1973 break;
1974 }
1975 case nir_intrinsic_load_invocation_id:
1976 if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1977 result[0] = bld->system_values.invocation_id;
1978 else
1979 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1980 break;
1981 case nir_intrinsic_load_front_face:
1982 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1983 break;
1984 case nir_intrinsic_load_draw_id:
1985 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1986 break;
1987 default:
1988 break;
1989 case nir_intrinsic_load_workgroup_size:
1990 for (unsigned i = 0; i < 3; i++)
1991 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[i]);
1992 break;
1993 case nir_intrinsic_load_work_dim:
1994 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1995 break;
1996 case nir_intrinsic_load_tess_coord:
1997 for (unsigned i = 0; i < 3; i++) {
1998 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1999 }
2000 break;
2001 case nir_intrinsic_load_tess_level_outer:
2002 for (unsigned i = 0; i < 4; i++)
2003 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
2004 break;
2005 case nir_intrinsic_load_tess_level_inner:
2006 for (unsigned i = 0; i < 2; i++)
2007 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
2008 break;
2009 case nir_intrinsic_load_patch_vertices_in:
2010 result[0] = bld->system_values.vertices_in;
2011 break;
2012 case nir_intrinsic_load_sample_id:
2013 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
2014 break;
2015 case nir_intrinsic_load_sample_pos:
2016 for (unsigned i = 0; i < 2; i++) {
2017 LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
2018 idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
2019 LLVMValueRef val = lp_build_array_get2(gallivm, bld->system_values.sample_pos_type,
2020 bld->system_values.sample_pos, idx);
2021 result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
2022 }
2023 break;
2024 case nir_intrinsic_load_sample_mask_in:
2025 result[0] = bld->system_values.sample_mask_in;
2026 break;
2027 case nir_intrinsic_load_view_index:
2028 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
2029 break;
2030 case nir_intrinsic_load_subgroup_invocation: {
2031 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2032 for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
2033 elems[i] = lp_build_const_int32(gallivm, i);
2034 result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
2035 break;
2036 }
2037 case nir_intrinsic_load_subgroup_id:
2038 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
2039 break;
2040 case nir_intrinsic_load_num_subgroups:
2041 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
2042 break;
2043 }
2044 }
2045
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)2046 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
2047 LLVMValueRef *dst)
2048 {
2049 struct gallivm_state *gallivm = bld_base->base.gallivm;
2050 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2051 *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
2052 }
2053
bgnloop(struct lp_build_nir_context * bld_base)2054 static void bgnloop(struct lp_build_nir_context *bld_base)
2055 {
2056 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2057 lp_exec_bgnloop(&bld->exec_mask, true);
2058 }
2059
endloop(struct lp_build_nir_context * bld_base)2060 static void endloop(struct lp_build_nir_context *bld_base)
2061 {
2062 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2063 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
2064 }
2065
lp_build_skip_branch(struct lp_build_nir_context * bld_base,bool flatten)2066 static void lp_build_skip_branch(struct lp_build_nir_context *bld_base, bool flatten)
2067 {
2068 if (flatten)
2069 return;
2070
2071 struct gallivm_state *gallivm = bld_base->base.gallivm;
2072 LLVMBuilderRef builder = gallivm->builder;
2073
2074 LLVMValueRef exec_mask = mask_vec_with_helpers(bld_base);
2075
2076 LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2077 bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, bld_base->uint_bld.type.length), "");
2078 bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
2079
2080 LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
2081
2082 assert(bld_base->if_stack_size < LP_MAX_TGSI_NESTING);
2083 lp_build_if(&bld_base->if_stack[bld_base->if_stack_size], gallivm, any_active);
2084 bld_base->if_stack_size++;
2085 }
2086
lp_build_skip_branch_end(struct lp_build_nir_context * bld_base,bool flatten)2087 static void lp_build_skip_branch_end(struct lp_build_nir_context *bld_base, bool flatten)
2088 {
2089 if (flatten)
2090 return;
2091
2092 assert(bld_base->if_stack_size);
2093 bld_base->if_stack_size--;
2094 lp_build_endif(&bld_base->if_stack[bld_base->if_stack_size]);
2095 }
2096
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond,bool flatten)2097 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond, bool flatten)
2098 {
2099 struct gallivm_state *gallivm = bld_base->base.gallivm;
2100
2101 LLVMBuilderRef builder = gallivm->builder;
2102 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2103 lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
2104
2105 lp_build_skip_branch(bld_base, flatten);
2106 }
2107
else_stmt(struct lp_build_nir_context * bld_base,bool flatten_then,bool flatten_else)2108 static void else_stmt(struct lp_build_nir_context *bld_base, bool flatten_then, bool flatten_else)
2109 {
2110 lp_build_skip_branch_end(bld_base, flatten_then);
2111
2112 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2113 lp_exec_mask_cond_invert(&bld->exec_mask);
2114
2115 lp_build_skip_branch(bld_base, flatten_else);
2116 }
2117
endif_stmt(struct lp_build_nir_context * bld_base,bool flatten)2118 static void endif_stmt(struct lp_build_nir_context *bld_base, bool flatten)
2119 {
2120 lp_build_skip_branch_end(bld_base, flatten);
2121
2122 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2123 lp_exec_mask_cond_pop(&bld->exec_mask);
2124 }
2125
break_stmt(struct lp_build_nir_context * bld_base)2126 static void break_stmt(struct lp_build_nir_context *bld_base)
2127 {
2128 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2129
2130 lp_exec_break(&bld->exec_mask, NULL, false);
2131 }
2132
continue_stmt(struct lp_build_nir_context * bld_base)2133 static void continue_stmt(struct lp_build_nir_context *bld_base)
2134 {
2135 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2136 lp_exec_continue(&bld->exec_mask);
2137 }
2138
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2139 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2140 {
2141 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2142 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2143 LLVMValueRef mask;
2144
2145 if (!cond) {
2146 if (bld->exec_mask.has_mask) {
2147 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2148 } else {
2149 mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
2150 }
2151 } else {
2152 mask = LLVMBuildNot(builder, cond, "");
2153 if (bld->exec_mask.has_mask) {
2154 LLVMValueRef invmask;
2155 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2156 mask = LLVMBuildOr(builder, mask, invmask, "");
2157 }
2158 }
2159 lp_build_mask_update(bld->mask, mask);
2160 }
2161
2162 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2163 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
2164 LLVMValueRef ptr,
2165 LLVMValueRef mask)
2166 {
2167 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2168 LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
2169
2170 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2171
2172 LLVMBuildStore(builder, current_vec, ptr);
2173 }
2174
2175 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2176 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
2177 LLVMValueRef ptr,
2178 LLVMValueRef mask)
2179 {
2180 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2181 LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
2182
2183 current_vec = lp_build_select(&bld_base->uint_bld,
2184 mask,
2185 bld_base->uint_bld.zero,
2186 current_vec);
2187
2188 LLVMBuildStore(builder, current_vec, ptr);
2189 }
2190
2191 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)2192 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
2193 LLVMValueRef current_mask_vec,
2194 LLVMValueRef total_emitted_vertices_vec)
2195 {
2196 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2197 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2198 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2199 total_emitted_vertices_vec,
2200 bld->max_output_vertices_vec);
2201
2202 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2203 }
2204
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)2205 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2206 {
2207 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2208 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2209
2210 if (stream_id >= bld->gs_vertex_streams)
2211 return;
2212 assert(bld->gs_iface->emit_vertex);
2213 LLVMValueRef total_emitted_vertices_vec =
2214 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2215 LLVMValueRef mask = mask_vec(bld_base);
2216 mask = clamp_mask_to_max_output_vertices(bld, mask,
2217 total_emitted_vertices_vec);
2218 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
2219 bld->outputs,
2220 total_emitted_vertices_vec,
2221 mask,
2222 lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
2223
2224 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2225 mask);
2226 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2227 mask);
2228 }
2229
2230 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2231 end_primitive_masked(struct lp_build_nir_context * bld_base,
2232 LLVMValueRef mask, uint32_t stream_id)
2233 {
2234 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2235 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2236
2237 if (stream_id >= bld->gs_vertex_streams)
2238 return;
2239 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2240 LLVMValueRef emitted_vertices_vec =
2241 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2242 LLVMValueRef emitted_prims_vec =
2243 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2244 LLVMValueRef total_emitted_vertices_vec =
2245 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2246
2247 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2248 PIPE_FUNC_NOTEQUAL,
2249 emitted_vertices_vec,
2250 uint_bld->zero);
2251 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2252 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2253 total_emitted_vertices_vec,
2254 emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2255 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2256 mask);
2257 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2258 mask);
2259 }
2260
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2261 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2262 {
2263 ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2264
2265 assert(bld->gs_iface->end_primitive);
2266
2267 LLVMValueRef mask = mask_vec(bld_base);
2268 end_primitive_masked(bld_base, mask, stream_id);
2269 }
2270
2271 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2272 emit_prologue(struct lp_build_nir_soa_context *bld)
2273 {
2274 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2275 if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2276 uint32_t num_inputs = bld->num_inputs;
2277 /* If this is an indirect case, the number of inputs should not be 0 */
2278 assert(num_inputs > 0);
2279
2280 unsigned index, chan;
2281 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2282 LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2283 bld->inputs_array = lp_build_array_alloca(gallivm,
2284 vec_type, array_size,
2285 "input_array");
2286
2287 for (index = 0; index < num_inputs; ++index) {
2288 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2289 LLVMValueRef lindex =
2290 lp_build_const_int32(gallivm, index * 4 + chan);
2291 LLVMValueRef input_ptr =
2292 LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2293 LLVMValueRef value = bld->inputs[index][chan];
2294 if (value)
2295 LLVMBuildStore(gallivm->builder, value, input_ptr);
2296 }
2297 }
2298 }
2299 }
2300
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2301 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2302 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2303 {
2304 struct gallivm_state * gallivm = bld_base->base.gallivm;
2305 LLVMBuilderRef builder = gallivm->builder;
2306 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2307 LLVMValueRef exec_mask = mask_vec(bld_base);
2308 struct lp_build_loop_state loop_state;
2309 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2310
2311 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2312 LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2313 LLVMValueRef init_val = NULL;
2314 if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2315 instr->intrinsic == nir_intrinsic_vote_feq) {
2316 /* for equal we unfortunately have to loop and find the first valid one. */
2317 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2318 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2319
2320 struct lp_build_if_state ifthen;
2321 lp_build_if(&ifthen, gallivm, if_cond);
2322 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2323 loop_state.counter, "");
2324 LLVMBuildStore(builder, value_ptr, eq_store);
2325 LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2326 lp_build_endif(&ifthen);
2327 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2328 NULL, LLVMIntUGE);
2329 init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2330 } else {
2331 LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2332 }
2333
2334 LLVMValueRef res;
2335 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2336 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2337 loop_state.counter, "");
2338 struct lp_build_if_state ifthen;
2339 LLVMValueRef if_cond;
2340 if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2341
2342 lp_build_if(&ifthen, gallivm, if_cond);
2343 res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2344
2345 if (instr->intrinsic == nir_intrinsic_vote_feq) {
2346 struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2347 LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2348 LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2349 LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2350 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2351 res = LLVMBuildAnd(builder, res, tmp, "");
2352 } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2353 LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2354 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2355 res = LLVMBuildAnd(builder, res, tmp, "");
2356 } else if (instr->intrinsic == nir_intrinsic_vote_any)
2357 res = LLVMBuildOr(builder, res, value_ptr, "");
2358 else
2359 res = LLVMBuildAnd(builder, res, value_ptr, "");
2360 LLVMBuildStore(builder, res, res_store);
2361 lp_build_endif(&ifthen);
2362 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2363 NULL, LLVMIntUGE);
2364 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2365 LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2366 }
2367
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2368 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2369 {
2370 struct gallivm_state * gallivm = bld_base->base.gallivm;
2371 LLVMBuilderRef builder = gallivm->builder;
2372 LLVMValueRef exec_mask = mask_vec(bld_base);
2373 struct lp_build_loop_state loop_state;
2374 src = LLVMBuildAnd(builder, src, exec_mask, "");
2375 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2376 LLVMValueRef res;
2377 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2378 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2379 loop_state.counter, "");
2380 res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2381 res = LLVMBuildOr(builder,
2382 res,
2383 LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2384 LLVMBuildStore(builder, res, res_store);
2385
2386 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2387 NULL, LLVMIntUGE);
2388 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2389 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2390 }
2391
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2392 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2393 {
2394 struct gallivm_state *gallivm = bld_base->base.gallivm;
2395 LLVMBuilderRef builder = gallivm->builder;
2396 LLVMValueRef exec_mask = mask_vec(bld_base);
2397 struct lp_build_loop_state loop_state;
2398
2399 LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2400 LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2401 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2402 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2403 loop_state.counter, "");
2404 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2405 LLVMIntEQ,
2406 value_ptr,
2407 lp_build_const_int32(gallivm, -1), "");
2408 LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2409 LLVMIntEQ,
2410 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2411 lp_build_const_int32(gallivm, 0), "");
2412
2413 cond = LLVMBuildAnd(builder, cond, cond2, "");
2414 struct lp_build_if_state ifthen;
2415 lp_build_if(&ifthen, gallivm, cond);
2416 LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2417 LLVMBuildStore(builder, loop_state.counter, idx_store);
2418 lp_build_endif(&ifthen);
2419 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2420 NULL, LLVMIntUGE);
2421
2422 result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2423 lp_build_const_int32(gallivm, -1),
2424 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2425 "");
2426 }
2427
2428 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2429 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2430 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2431 {
2432 assert(instr->intrinsic == nir_intrinsic_shuffle);
2433
2434 struct gallivm_state *gallivm = bld_base->base.gallivm;
2435 LLVMBuilderRef builder = gallivm->builder;
2436 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2437 uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2438 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2439
2440 if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2441 /* freeze `src` in case inactive invocations contain poison */
2442 src = LLVMBuildFreeze(builder, src, "");
2443 result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2444 } else {
2445 LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2446 struct lp_build_loop_state loop_state;
2447 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2448
2449 LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2450
2451 LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2452 /* freeze `src_value` in case an out-of-bounds index or an index into an
2453 * inactive invocation results in poison
2454 */
2455 src_value = LLVMBuildFreeze(builder, src_value, "");
2456
2457 LLVMValueRef res = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2458 res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2459 LLVMBuildStore(builder, res, res_store);
2460
2461 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2462 NULL, LLVMIntUGE);
2463
2464 result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2465 }
2466 }
2467 #endif
2468
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2469 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2470 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2471 {
2472 struct gallivm_state *gallivm = bld_base->base.gallivm;
2473 LLVMBuilderRef builder = gallivm->builder;
2474 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2475 /* can't use llvm reduction intrinsics because of exec_mask */
2476 LLVMValueRef exec_mask = mask_vec(bld_base);
2477 nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2478
2479 uint32_t cluster_size = 0;
2480
2481 if (instr->intrinsic == nir_intrinsic_reduce)
2482 cluster_size = nir_intrinsic_cluster_size(instr);
2483
2484 if (cluster_size == 0)
2485 cluster_size = bld_base->int_bld.type.length;
2486
2487 LLVMValueRef res_store = NULL;
2488 LLVMValueRef scan_store;
2489 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2490
2491 res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2492 scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2493
2494 struct lp_build_context elem_bld;
2495 bool is_flt = reduction_op == nir_op_fadd ||
2496 reduction_op == nir_op_fmul ||
2497 reduction_op == nir_op_fmin ||
2498 reduction_op == nir_op_fmax;
2499 bool is_unsigned = reduction_op == nir_op_umin ||
2500 reduction_op == nir_op_umax;
2501
2502 struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2503 get_int_bld(bld_base, is_unsigned, bit_size);
2504
2505 lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2506
2507 LLVMValueRef store_val = NULL;
2508 /*
2509 * Put the identity value for the operation into the storage
2510 */
2511 switch (reduction_op) {
2512 case nir_op_fmin: {
2513 LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2514 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2515 store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2516 break;
2517 }
2518 case nir_op_fmax: {
2519 LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2520 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2521 store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2522 break;
2523 }
2524 case nir_op_fmul: {
2525 LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2526 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2527 store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2528 break;
2529 }
2530 case nir_op_umin:
2531 switch (bit_size) {
2532 case 8:
2533 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2534 break;
2535 case 16:
2536 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2537 break;
2538 case 32:
2539 default:
2540 store_val = lp_build_const_int32(gallivm, UINT_MAX);
2541 break;
2542 case 64:
2543 store_val = lp_build_const_int64(gallivm, UINT64_MAX);
2544 break;
2545 }
2546 break;
2547 case nir_op_imin:
2548 switch (bit_size) {
2549 case 8:
2550 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2551 break;
2552 case 16:
2553 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2554 break;
2555 case 32:
2556 default:
2557 store_val = lp_build_const_int32(gallivm, INT_MAX);
2558 break;
2559 case 64:
2560 store_val = lp_build_const_int64(gallivm, INT64_MAX);
2561 break;
2562 }
2563 break;
2564 case nir_op_imax:
2565 switch (bit_size) {
2566 case 8:
2567 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2568 break;
2569 case 16:
2570 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2571 break;
2572 case 32:
2573 default:
2574 store_val = lp_build_const_int32(gallivm, INT_MIN);
2575 break;
2576 case 64:
2577 store_val = lp_build_const_int64(gallivm, INT64_MIN);
2578 break;
2579 }
2580 break;
2581 case nir_op_imul:
2582 switch (bit_size) {
2583 case 8:
2584 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2585 break;
2586 case 16:
2587 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2588 break;
2589 case 32:
2590 default:
2591 store_val = lp_build_const_int32(gallivm, 1);
2592 break;
2593 case 64:
2594 store_val = lp_build_const_int64(gallivm, 1);
2595 break;
2596 }
2597 break;
2598 case nir_op_iand:
2599 switch (bit_size) {
2600 case 8:
2601 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2602 break;
2603 case 16:
2604 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2605 break;
2606 case 32:
2607 default:
2608 store_val = lp_build_const_int32(gallivm, 0xffffffff);
2609 break;
2610 case 64:
2611 store_val = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2612 break;
2613 }
2614 break;
2615 default:
2616 break;
2617 }
2618 if (store_val)
2619 LLVMBuildStore(builder, store_val, scan_store);
2620
2621 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2622
2623 for (uint32_t i = 0; i < bld_base->uint_bld.type.length; i++) {
2624 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
2625
2626 struct lp_build_if_state ifthen;
2627 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, counter, "");
2628 lp_build_if(&ifthen, gallivm, if_cond);
2629
2630 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, counter, "");
2631
2632 LLVMValueRef res = NULL;
2633 LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2634
2635 if (instr->intrinsic != nir_intrinsic_reduce)
2636 res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2637
2638 if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2639 res = LLVMBuildInsertElement(builder, res, scan_val, counter, "");
2640
2641 if (is_flt) {
2642 scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2643 value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2644 }
2645
2646 switch (reduction_op) {
2647 case nir_op_fadd:
2648 case nir_op_iadd:
2649 scan_val = lp_build_add(&elem_bld, value, scan_val);
2650 break;
2651 case nir_op_fmul:
2652 case nir_op_imul:
2653 scan_val = lp_build_mul(&elem_bld, value, scan_val);
2654 break;
2655 case nir_op_imin:
2656 case nir_op_umin:
2657 case nir_op_fmin:
2658 scan_val = lp_build_min(&elem_bld, value, scan_val);
2659 break;
2660 case nir_op_imax:
2661 case nir_op_umax:
2662 case nir_op_fmax:
2663 scan_val = lp_build_max(&elem_bld, value, scan_val);
2664 break;
2665 case nir_op_iand:
2666 scan_val = lp_build_and(&elem_bld, value, scan_val);
2667 break;
2668 case nir_op_ior:
2669 scan_val = lp_build_or(&elem_bld, value, scan_val);
2670 break;
2671 case nir_op_ixor:
2672 scan_val = lp_build_xor(&elem_bld, value, scan_val);
2673 break;
2674 default:
2675 assert(0);
2676 break;
2677 }
2678
2679 if (is_flt)
2680 scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2681 LLVMBuildStore(builder, scan_val, scan_store);
2682
2683 if (instr->intrinsic == nir_intrinsic_inclusive_scan)
2684 res = LLVMBuildInsertElement(builder, res, scan_val, counter, "");
2685
2686 if (instr->intrinsic != nir_intrinsic_reduce)
2687 LLVMBuildStore(builder, res, res_store);
2688
2689 lp_build_endif(&ifthen);
2690
2691 if (instr->intrinsic == nir_intrinsic_reduce && (i % cluster_size) == (cluster_size - 1)) {
2692 res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2693 scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2694
2695 if (store_val)
2696 LLVMBuildStore(builder, store_val, scan_store);
2697 else
2698 LLVMBuildStore(builder, LLVMConstNull(int_bld->elem_type), scan_store);
2699
2700 LLVMValueRef cluster_index = lp_build_const_int32(gallivm, i / cluster_size);
2701 res = LLVMBuildInsertElement(builder, res, scan_val, cluster_index, "");
2702
2703 LLVMBuildStore(builder, res, res_store);
2704 }
2705 }
2706
2707 LLVMValueRef res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2708
2709 if (instr->intrinsic == nir_intrinsic_reduce) {
2710 LLVMValueRef swizzle[LP_MAX_VECTOR_LENGTH];
2711 for (uint32_t i = 0; i < bld_base->int_bld.type.length; i++)
2712 swizzle[i] = lp_build_const_int32(gallivm, i / cluster_size);
2713
2714 LLVMValueRef undef = LLVMGetUndef(int_bld->vec_type);
2715 result[0] = LLVMBuildShuffleVector(
2716 builder, res, undef, LLVMConstVector(swizzle, bld_base->int_bld.type.length), "");
2717 } else {
2718 result[0] = res;
2719 }
2720
2721 }
2722
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2723 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2724 LLVMValueRef src,
2725 unsigned bit_size,
2726 LLVMValueRef invoc,
2727 LLVMValueRef result[4])
2728 {
2729 struct gallivm_state *gallivm = bld_base->base.gallivm;
2730 LLVMValueRef idx = first_active_invocation(bld_base);
2731 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2732
2733 /* If we're emitting readInvocation() (as opposed to readFirstInvocation),
2734 * use the first active channel to pull the invocation index number out of
2735 * the invocation arg.
2736 */
2737 if (invoc)
2738 idx = LLVMBuildExtractElement(gallivm->builder, invoc, idx, "");
2739
2740 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2741 src, idx, "");
2742 result[0] = lp_build_broadcast_scalar(uint_bld, value);
2743 }
2744
2745 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2746 emit_interp_at(struct lp_build_nir_context *bld_base,
2747 unsigned num_components,
2748 nir_variable *var,
2749 bool centroid,
2750 bool sample,
2751 unsigned const_index,
2752 LLVMValueRef indir_index,
2753 LLVMValueRef offsets[2],
2754 LLVMValueRef dst[4])
2755 {
2756 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2757
2758 for (unsigned i = 0; i < num_components; i++) {
2759 dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2760 const_index + var->data.driver_location, i + var->data.location_frac,
2761 centroid, sample, indir_index, offsets);
2762 }
2763 }
2764
2765 static void
emit_set_vertex_and_primitive_count(struct lp_build_nir_context * bld_base,LLVMValueRef vert_count,LLVMValueRef prim_count)2766 emit_set_vertex_and_primitive_count(struct lp_build_nir_context *bld_base,
2767 LLVMValueRef vert_count,
2768 LLVMValueRef prim_count)
2769 {
2770 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2771 struct gallivm_state *gallivm = bld_base->base.gallivm;
2772 assert(bld->mesh_iface);
2773 LLVMValueRef idx = first_active_invocation(bld_base);
2774
2775 LLVMValueRef vcount = LLVMBuildExtractElement(gallivm->builder,
2776 vert_count, idx, "");
2777 LLVMValueRef pcount = LLVMBuildExtractElement(gallivm->builder,
2778 prim_count, idx, "");
2779
2780 bld->mesh_iface->emit_vertex_and_primitive_count(bld->mesh_iface, &bld_base->base, vcount, pcount);
2781 }
2782
2783 static void
emit_launch_mesh_workgroups(struct lp_build_nir_context * bld_base,LLVMValueRef launch_grid)2784 emit_launch_mesh_workgroups(struct lp_build_nir_context *bld_base,
2785 LLVMValueRef launch_grid)
2786 {
2787 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2788 struct gallivm_state *gallivm = bld_base->base.gallivm;
2789 LLVMTypeRef vec_type = LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), 3);
2790
2791 LLVMValueRef local_invoc_idx = get_local_invocation_index(bld);
2792
2793 vec_type = LLVMPointerType(vec_type, 0);
2794
2795 local_invoc_idx = LLVMBuildExtractElement(gallivm->builder, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2796 LLVMValueRef if_cond = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2797 struct lp_build_if_state ifthen;
2798 lp_build_if(&ifthen, gallivm, if_cond);
2799 LLVMValueRef ptr = bld->payload_ptr;
2800 ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
2801 for (unsigned i = 0; i < 3; i++) {
2802 LLVMValueRef lg = LLVMBuildExtractValue(gallivm->builder, launch_grid, i, "");
2803 lg = LLVMBuildExtractElement(gallivm->builder, lg, lp_build_const_int32(gallivm, 0), "");
2804 LLVMValueRef this_ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
2805 LLVMBuildStore(gallivm->builder, lg, this_ptr);
2806 ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 4), "");
2807 }
2808 lp_build_endif(&ifthen);
2809 }
2810
2811 static void
emit_call(struct lp_build_nir_context * bld_base,struct lp_build_fn * fn,int num_args,LLVMValueRef * args)2812 emit_call(struct lp_build_nir_context *bld_base,
2813 struct lp_build_fn *fn,
2814 int num_args,
2815 LLVMValueRef *args)
2816 {
2817 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2818
2819 args[0] = mask_vec(bld_base);
2820 args[1] = bld->call_context_ptr;
2821 LLVMBuildCall2(bld_base->base.gallivm->builder, fn->fn_type, fn->fn, args, num_args, "");
2822 }
2823
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2824 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2825 struct lp_type type,
2826 unsigned scratch_size)
2827 {
2828 LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2829 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2830 unsigned i;
2831
2832 if (type.length == 1)
2833 return LLVMConstInt(elem_type, 0, 0);
2834
2835 for (i = 0; i < type.length; ++i)
2836 elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2837
2838 return LLVMConstVector(elems, type.length);
2839 }
2840
2841 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2842 emit_load_scratch(struct lp_build_nir_context *bld_base,
2843 unsigned nc, unsigned bit_size,
2844 LLVMValueRef offset,
2845 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2846 {
2847 struct gallivm_state * gallivm = bld_base->base.gallivm;
2848 LLVMBuilderRef builder = gallivm->builder;
2849 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2850 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2851 struct lp_build_context *load_bld;
2852 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2853 LLVMValueRef exec_mask = mask_vec(bld_base);
2854 LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2855 LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2856 bld->scratch_ptr);
2857 load_bld = get_int_bld(bld_base, true, bit_size);
2858
2859 offset = lp_build_add(uint_bld, offset, thread_offsets);
2860
2861 for (unsigned c = 0; c < nc; c++) {
2862 LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2863
2864 outval[c] = lp_build_masked_gather(gallivm, load_bld->type.length, bit_size,
2865 load_bld->vec_type,
2866 lp_vec_add_offset_ptr(bld_base, bit_size,
2867 scratch_ptr_vec,
2868 chan_offset),
2869 exec_mask);
2870 outval[c] = LLVMBuildBitCast(builder, outval[c], load_bld->vec_type, "");
2871 }
2872 }
2873
2874 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2875 emit_store_scratch(struct lp_build_nir_context *bld_base,
2876 unsigned writemask, unsigned nc,
2877 unsigned bit_size, LLVMValueRef offset,
2878 LLVMValueRef dst)
2879 {
2880 struct gallivm_state * gallivm = bld_base->base.gallivm;
2881 LLVMBuilderRef builder = gallivm->builder;
2882 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2883 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2884 struct lp_build_context *store_bld;
2885 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2886 LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2887 LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2888 bld->scratch_ptr);
2889 store_bld = get_int_bld(bld_base, true, bit_size);
2890
2891 LLVMValueRef exec_mask = mask_vec(bld_base);
2892 offset = lp_build_add(uint_bld, offset, thread_offsets);
2893
2894 for (unsigned c = 0; c < nc; c++) {
2895 if (!(writemask & (1u << c)))
2896 continue;
2897 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2898
2899 LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2900
2901 val = LLVMBuildBitCast(builder, val, store_bld->vec_type, "");
2902
2903 lp_build_masked_scatter(gallivm, store_bld->type.length, bit_size,
2904 lp_vec_add_offset_ptr(bld_base, bit_size,
2905 scratch_ptr_vec, chan_offset),
2906 val, exec_mask);
2907 }
2908 }
2909
2910 static void
emit_clock(struct lp_build_nir_context * bld_base,LLVMValueRef dst[4])2911 emit_clock(struct lp_build_nir_context *bld_base,
2912 LLVMValueRef dst[4])
2913 {
2914 struct gallivm_state *gallivm = bld_base->base.gallivm;
2915 LLVMBuilderRef builder = gallivm->builder;
2916 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, 32);
2917
2918 lp_init_clock_hook(gallivm);
2919
2920 LLVMTypeRef get_time_type = LLVMFunctionType(LLVMInt64TypeInContext(gallivm->context), NULL, 0, 1);
2921 LLVMValueRef result = LLVMBuildCall2(builder, get_time_type, gallivm->get_time_hook, NULL, 0, "");
2922
2923 LLVMValueRef hi = LLVMBuildShl(builder, result, lp_build_const_int64(gallivm, 32), "");
2924 hi = LLVMBuildTrunc(builder, hi, uint_bld->elem_type, "");
2925 LLVMValueRef lo = LLVMBuildTrunc(builder, result, uint_bld->elem_type, "");
2926 dst[0] = lp_build_broadcast_scalar(uint_bld, lo);
2927 dst[1] = lp_build_broadcast_scalar(uint_bld, hi);
2928 }
2929
2930 LLVMTypeRef
lp_build_cs_func_call_context(struct gallivm_state * gallivm,int length,LLVMTypeRef context_type,LLVMTypeRef resources_type)2931 lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length,
2932 LLVMTypeRef context_type, LLVMTypeRef resources_type)
2933 {
2934 LLVMTypeRef args[LP_NIR_CALL_CONTEXT_MAX_ARGS];
2935
2936 args[LP_NIR_CALL_CONTEXT_CONTEXT] = LLVMPointerType(context_type, 0);
2937 args[LP_NIR_CALL_CONTEXT_RESOURCES] = LLVMPointerType(resources_type, 0);
2938 args[LP_NIR_CALL_CONTEXT_SHARED] = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); /* shared_ptr */
2939 args[LP_NIR_CALL_CONTEXT_SCRATCH] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* scratch ptr */
2940 args[LP_NIR_CALL_CONTEXT_WORK_DIM] = LLVMInt32TypeInContext(gallivm->context); /* work_dim */
2941 args[LP_NIR_CALL_CONTEXT_THREAD_ID_0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[0] */
2942 args[LP_NIR_CALL_CONTEXT_THREAD_ID_1] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[1] */
2943 args[LP_NIR_CALL_CONTEXT_THREAD_ID_2] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[2] */
2944 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[0] */
2945 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[1] */
2946 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[2] */
2947
2948 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[0] */
2949 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[1] */
2950 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[2] */
2951 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[0] */
2952 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[1] */
2953 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[2] */
2954
2955 LLVMTypeRef stype = LLVMStructTypeInContext(gallivm->context, args, LP_NIR_CALL_CONTEXT_MAX_ARGS, 0);
2956 return stype;
2957 }
2958
2959 static void
build_call_context(struct lp_build_nir_soa_context * bld)2960 build_call_context(struct lp_build_nir_soa_context *bld)
2961 {
2962 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2963 bld->call_context_ptr = lp_build_alloca(gallivm, bld->call_context_type, "callcontext");
2964 LLVMValueRef call_context = LLVMGetUndef(bld->call_context_type);
2965 call_context = LLVMBuildInsertValue(gallivm->builder,
2966 call_context, bld->context_ptr, LP_NIR_CALL_CONTEXT_CONTEXT, "");
2967 call_context = LLVMBuildInsertValue(gallivm->builder,
2968 call_context, bld->resources_ptr, LP_NIR_CALL_CONTEXT_RESOURCES, "");
2969 if (bld->shared_ptr) {
2970 call_context = LLVMBuildInsertValue(gallivm->builder,
2971 call_context, bld->shared_ptr, LP_NIR_CALL_CONTEXT_SHARED, "");
2972 } else {
2973 call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2974 LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2975 LP_NIR_CALL_CONTEXT_SHARED, "");
2976 }
2977 if (bld->scratch_ptr) {
2978 call_context = LLVMBuildInsertValue(gallivm->builder,
2979 call_context, bld->scratch_ptr, LP_NIR_CALL_CONTEXT_SCRATCH, "");
2980 } else {
2981 call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2982 LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2983 LP_NIR_CALL_CONTEXT_SCRATCH, "");
2984 }
2985 call_context = LLVMBuildInsertValue(gallivm->builder,
2986 call_context, bld->system_values.work_dim, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
2987 call_context = LLVMBuildInsertValue(gallivm->builder,
2988 call_context, bld->system_values.thread_id[0], LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
2989 call_context = LLVMBuildInsertValue(gallivm->builder,
2990 call_context, bld->system_values.thread_id[1], LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
2991 call_context = LLVMBuildInsertValue(gallivm->builder,
2992 call_context, bld->system_values.thread_id[2], LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
2993 call_context = LLVMBuildInsertValue(gallivm->builder,
2994 call_context, bld->system_values.block_id[0], LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
2995 call_context = LLVMBuildInsertValue(gallivm->builder,
2996 call_context, bld->system_values.block_id[1], LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
2997 call_context = LLVMBuildInsertValue(gallivm->builder,
2998 call_context, bld->system_values.block_id[2], LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
2999 call_context = LLVMBuildInsertValue(gallivm->builder,
3000 call_context, bld->system_values.grid_size[0], LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
3001 call_context = LLVMBuildInsertValue(gallivm->builder,
3002 call_context, bld->system_values.grid_size[1], LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
3003 call_context = LLVMBuildInsertValue(gallivm->builder,
3004 call_context, bld->system_values.grid_size[2], LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
3005 call_context = LLVMBuildInsertValue(gallivm->builder,
3006 call_context, bld->system_values.block_size[0], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
3007 call_context = LLVMBuildInsertValue(gallivm->builder,
3008 call_context, bld->system_values.block_size[1], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
3009 call_context = LLVMBuildInsertValue(gallivm->builder,
3010 call_context, bld->system_values.block_size[2], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
3011 LLVMBuildStore(gallivm->builder, call_context, bld->call_context_ptr);
3012 }
3013
lp_build_nir_soa_func(struct gallivm_state * gallivm,struct nir_shader * shader,nir_function_impl * impl,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3014 void lp_build_nir_soa_func(struct gallivm_state *gallivm,
3015 struct nir_shader *shader,
3016 nir_function_impl *impl,
3017 const struct lp_build_tgsi_params *params,
3018 LLVMValueRef (*outputs)[4])
3019 {
3020 struct lp_build_nir_soa_context bld;
3021 const struct lp_type type = params->type;
3022 struct lp_type res_type;
3023
3024 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3025 memset(&res_type, 0, sizeof res_type);
3026 res_type.width = type.width;
3027 res_type.length = type.length;
3028 res_type.sign = 1;
3029
3030 /* Setup build context */
3031 memset(&bld, 0, sizeof bld);
3032 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3033 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3034 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3035 lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
3036 {
3037 struct lp_type float_type = type;
3038 float_type.signed_zero_preserve =
3039 !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32);
3040 float_type.nan_preserve =
3041 !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32);
3042 lp_build_context_init(&bld.bld_base.base, gallivm, float_type);
3043 }
3044 {
3045 struct lp_type dbl_type;
3046 dbl_type = type;
3047 dbl_type.width *= 2;
3048 dbl_type.signed_zero_preserve =
3049 !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
3050 dbl_type.nan_preserve =
3051 !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
3052 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3053 }
3054 {
3055 struct lp_type half_type;
3056 half_type = type;
3057 half_type.width /= 2;
3058 half_type.signed_zero_preserve =
3059 !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16);
3060 half_type.nan_preserve =
3061 !!(shader->info.float_controls_execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16);
3062 lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
3063 }
3064 {
3065 struct lp_type uint64_type;
3066 uint64_type = lp_uint_type(type);
3067 uint64_type.width *= 2;
3068 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3069 }
3070 {
3071 struct lp_type int64_type;
3072 int64_type = lp_int_type(type);
3073 int64_type.width *= 2;
3074 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3075 }
3076 {
3077 struct lp_type uint16_type;
3078 uint16_type = lp_uint_type(type);
3079 uint16_type.width /= 2;
3080 lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
3081 }
3082 {
3083 struct lp_type int16_type;
3084 int16_type = lp_int_type(type);
3085 int16_type.width /= 2;
3086 lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
3087 }
3088 {
3089 struct lp_type uint8_type;
3090 uint8_type = lp_uint_type(type);
3091 uint8_type.width /= 4;
3092 lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
3093 }
3094 {
3095 struct lp_type int8_type;
3096 int8_type = lp_int_type(type);
3097 int8_type.width /= 4;
3098 lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
3099 }
3100 bld.bld_base.load_var = emit_load_var;
3101 bld.bld_base.store_var = emit_store_var;
3102 bld.bld_base.load_reg = emit_load_reg;
3103 bld.bld_base.store_reg = emit_store_reg;
3104 bld.bld_base.emit_var_decl = emit_var_decl;
3105 bld.bld_base.load_ubo = emit_load_ubo;
3106 bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
3107 bld.bld_base.load_global = emit_load_global;
3108 bld.bld_base.store_global = emit_store_global;
3109 bld.bld_base.atomic_global = emit_atomic_global;
3110 bld.bld_base.tex = emit_tex;
3111 bld.bld_base.tex_size = emit_tex_size;
3112 bld.bld_base.bgnloop = bgnloop;
3113 bld.bld_base.endloop = endloop;
3114 bld.bld_base.if_cond = if_cond;
3115 bld.bld_base.else_stmt = else_stmt;
3116 bld.bld_base.endif_stmt = endif_stmt;
3117 bld.bld_base.break_stmt = break_stmt;
3118 bld.bld_base.continue_stmt = continue_stmt;
3119 bld.bld_base.sysval_intrin = emit_sysval_intrin;
3120 bld.bld_base.discard = discard;
3121 bld.bld_base.emit_vertex = emit_vertex;
3122 bld.bld_base.end_primitive = end_primitive;
3123 bld.bld_base.load_mem = emit_load_mem;
3124 bld.bld_base.store_mem = emit_store_mem;
3125 bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
3126 bld.bld_base.atomic_mem = emit_atomic_mem;
3127 bld.bld_base.barrier = emit_barrier;
3128 bld.bld_base.image_op = emit_image_op;
3129 bld.bld_base.image_size = emit_image_size;
3130 bld.bld_base.vote = emit_vote;
3131 bld.bld_base.elect = emit_elect;
3132 bld.bld_base.reduce = emit_reduce;
3133 bld.bld_base.ballot = emit_ballot;
3134 #if LLVM_VERSION_MAJOR >= 10
3135 bld.bld_base.shuffle = emit_shuffle;
3136 #endif
3137 bld.bld_base.read_invocation = emit_read_invocation;
3138 bld.bld_base.helper_invocation = emit_helper_invocation;
3139 bld.bld_base.interp_at = emit_interp_at;
3140 bld.bld_base.call = emit_call;
3141 bld.bld_base.load_scratch = emit_load_scratch;
3142 bld.bld_base.store_scratch = emit_store_scratch;
3143 bld.bld_base.load_const = emit_load_const;
3144 bld.bld_base.clock = emit_clock;
3145 bld.bld_base.set_vertex_and_primitive_count = emit_set_vertex_and_primitive_count;
3146 bld.bld_base.launch_mesh_workgroups = emit_launch_mesh_workgroups;
3147
3148 bld.bld_base.fns = params->fns;
3149 bld.bld_base.func = params->current_func;
3150 bld.mask = params->mask;
3151 bld.inputs = params->inputs;
3152 bld.outputs = outputs;
3153 bld.consts_ptr = params->consts_ptr;
3154 bld.ssbo_ptr = params->ssbo_ptr;
3155 bld.sampler = params->sampler;
3156
3157 bld.context_type = params->context_type;
3158 bld.context_ptr = params->context_ptr;
3159 bld.resources_type = params->resources_type;
3160 bld.resources_ptr = params->resources_ptr;
3161 bld.thread_data_type = params->thread_data_type;
3162 bld.thread_data_ptr = params->thread_data_ptr;
3163 bld.image = params->image;
3164 bld.shared_ptr = params->shared_ptr;
3165 bld.payload_ptr = params->payload_ptr;
3166 bld.coro = params->coro;
3167 bld.kernel_args_ptr = params->kernel_args;
3168 bld.num_inputs = params->num_inputs;
3169 bld.indirects = 0;
3170 if (shader->info.inputs_read_indirectly)
3171 bld.indirects |= nir_var_shader_in;
3172
3173 bld.gs_iface = params->gs_iface;
3174 bld.tcs_iface = params->tcs_iface;
3175 bld.tes_iface = params->tes_iface;
3176 bld.fs_iface = params->fs_iface;
3177 bld.mesh_iface = params->mesh_iface;
3178 if (bld.gs_iface) {
3179 struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
3180
3181 bld.gs_vertex_streams = params->gs_vertex_streams;
3182 bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3183 shader->info.gs.vertices_out);
3184 for (int i = 0; i < params->gs_vertex_streams; i++) {
3185 bld.emitted_prims_vec_ptr[i] =
3186 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
3187 bld.emitted_vertices_vec_ptr[i] =
3188 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
3189 bld.total_emitted_vertices_vec_ptr[i] =
3190 lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
3191 }
3192 }
3193 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3194
3195 if (params->system_values)
3196 bld.system_values = *params->system_values;
3197
3198 bld.bld_base.shader = shader;
3199
3200 bld.scratch_size = ALIGN(shader->scratch_size, 8);
3201 if (params->scratch_ptr)
3202 bld.scratch_ptr = params->scratch_ptr;
3203 else if (shader->scratch_size) {
3204 bld.scratch_ptr = lp_build_array_alloca(gallivm,
3205 LLVMInt8TypeInContext(gallivm->context),
3206 lp_build_const_int32(gallivm, bld.scratch_size * type.length),
3207 "scratch");
3208 }
3209
3210 if (!exec_list_is_singular(&shader->functions)) {
3211 bld.call_context_type = lp_build_cs_func_call_context(gallivm, type.length, bld.context_type, bld.resources_type);
3212 if (!params->call_context_ptr) {
3213 build_call_context(&bld);
3214 } else
3215 bld.call_context_ptr = params->call_context_ptr;
3216 }
3217
3218 emit_prologue(&bld);
3219 lp_build_nir_llvm(&bld.bld_base, shader, impl);
3220
3221 if (bld.gs_iface) {
3222 LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
3223 LLVMValueRef total_emitted_vertices_vec;
3224 LLVMValueRef emitted_prims_vec;
3225
3226 for (int i = 0; i < params->gs_vertex_streams; i++) {
3227 end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
3228
3229 total_emitted_vertices_vec =
3230 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
3231
3232 emitted_prims_vec =
3233 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
3234 bld.gs_iface->gs_epilogue(bld.gs_iface,
3235 total_emitted_vertices_vec,
3236 emitted_prims_vec, i);
3237 }
3238 }
3239 lp_exec_mask_fini(&bld.exec_mask);
3240 }
3241
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3242 void lp_build_nir_soa(struct gallivm_state *gallivm,
3243 struct nir_shader *shader,
3244 const struct lp_build_tgsi_params *params,
3245 LLVMValueRef (*outputs)[4])
3246 {
3247 lp_build_nir_prepasses(shader);
3248 lp_build_nir_soa_func(gallivm, shader,
3249 nir_shader_get_entrypoint(shader),
3250 params, outputs);
3251 }
3252