1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "lp_bld_intr.h"
38 #include "util/u_cpu_detect.h"
39 #include "util/u_math.h"
40
bit_size_to_shift_size(int bit_size)41 static int bit_size_to_shift_size(int bit_size)
42 {
43 switch (bit_size) {
44 case 64:
45 return 3;
46 default:
47 case 32:
48 return 2;
49 case 16:
50 return 1;
51 case 8:
52 return 0;
53 }
54 }
55
56 /*
57 * combine the execution mask if there is one with the current mask.
58 */
59 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)60 mask_vec(struct lp_build_nir_context *bld_base)
61 {
62 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
63 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
64 struct lp_exec_mask *exec_mask = &bld->exec_mask;
65 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
66 if (!exec_mask->has_mask) {
67 return bld_mask;
68 }
69 if (!bld_mask)
70 return exec_mask->exec_mask;
71 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
72 exec_mask->exec_mask, "");
73 }
74
75 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)76 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
77 {
78 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
79
80 /* Fragment shaders may dispatch with invocation 0 inactive. All other
81 * stages have invocation 0 active at the top. (See
82 * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
83 */
84 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
85 return false;
86
87 /* If we're in some control flow right now, then invocation 0 may be
88 * disabled.
89 */
90 if (bld->exec_mask.has_mask)
91 return false;
92
93 return true;
94 }
95
96 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size)97 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size)
98 {
99 if (bit_size == 64)
100 return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
101 else if (bit_size == 16)
102 return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
103 else if (bit_size == 8)
104 return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
105 else
106 return lp_build_const_int32(gallivm, 0);
107 }
108
109 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)110 emit_fetch_64bit(
111 struct lp_build_nir_context * bld_base,
112 LLVMValueRef input,
113 LLVMValueRef input2)
114 {
115 struct gallivm_state *gallivm = bld_base->base.gallivm;
116 LLVMBuilderRef builder = gallivm->builder;
117 LLVMValueRef res;
118 int i;
119 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
120 int len = bld_base->base.type.length * 2;
121 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
122
123 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
124 #if UTIL_ARCH_LITTLE_ENDIAN
125 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
126 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
127 #else
128 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
129 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
130 #endif
131 }
132 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
133
134 return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
135 }
136
137 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])138 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
139 LLVMValueRef value,
140 LLVMValueRef split_values[2])
141 {
142 struct gallivm_state *gallivm = bld_base->base.gallivm;
143 LLVMBuilderRef builder = gallivm->builder;
144 unsigned i;
145 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
146 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
147 int len = bld_base->base.type.length * 2;
148
149 value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
150 for (i = 0; i < bld_base->base.type.length; i++) {
151 #if UTIL_ARCH_LITTLE_ENDIAN
152 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
153 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
154 #else
155 shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
156 shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
157 #endif
158 }
159
160 split_values[0] = LLVMBuildShuffleVector(builder, value,
161 LLVMGetUndef(LLVMTypeOf(value)),
162 LLVMConstVector(shuffles,
163 bld_base->base.type.length),
164 "");
165 split_values[1] = LLVMBuildShuffleVector(builder, value,
166 LLVMGetUndef(LLVMTypeOf(value)),
167 LLVMConstVector(shuffles2,
168 bld_base->base.type.length),
169 "");
170 }
171
172 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)173 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
174 LLVMValueRef chan_ptr,
175 LLVMValueRef chan_ptr2,
176 LLVMValueRef value)
177 {
178 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
179 struct lp_build_context *float_bld = &bld_base->base;
180 LLVMValueRef split_vals[2];
181
182 emit_store_64bit_split(bld_base, value, split_vals);
183
184 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
185 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
186 }
187
188 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)189 get_soa_array_offsets(struct lp_build_context *uint_bld,
190 LLVMValueRef indirect_index,
191 int num_components,
192 unsigned chan_index,
193 bool need_perelement_offset)
194 {
195 struct gallivm_state *gallivm = uint_bld->gallivm;
196 LLVMValueRef chan_vec =
197 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
198 LLVMValueRef length_vec =
199 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
200 LLVMValueRef index_vec;
201
202 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
203 index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
204 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
205 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
206
207 if (need_perelement_offset) {
208 LLVMValueRef pixel_offsets;
209 unsigned i;
210 /* build pixel offset vector: {0, 1, 2, 3, ...} */
211 pixel_offsets = uint_bld->undef;
212 for (i = 0; i < uint_bld->type.length; i++) {
213 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
214 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
215 ii, ii, "");
216 }
217 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
218 }
219 return index_vec;
220 }
221
222 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)223 build_gather(struct lp_build_nir_context *bld_base,
224 struct lp_build_context *bld,
225 LLVMValueRef base_ptr,
226 LLVMValueRef indexes,
227 LLVMValueRef overflow_mask,
228 LLVMValueRef indexes2)
229 {
230 struct gallivm_state *gallivm = bld_base->base.gallivm;
231 LLVMBuilderRef builder = gallivm->builder;
232 struct lp_build_context *uint_bld = &bld_base->uint_bld;
233 LLVMValueRef res;
234 unsigned i;
235
236 if (indexes2)
237 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
238 else
239 res = bld->undef;
240 /*
241 * overflow_mask is a vector telling us which channels
242 * in the vector overflowed. We use the overflow behavior for
243 * constant buffers which is defined as:
244 * Out of bounds access to constant buffer returns 0 in all
245 * components. Out of bounds behavior is always with respect
246 * to the size of the buffer bound at that slot.
247 */
248
249 if (overflow_mask) {
250 /*
251 * We avoid per-element control flow here (also due to llvm going crazy,
252 * though I suspect it's better anyway since overflow is likely rare).
253 * Note that since we still fetch from buffers even if num_elements was
254 * zero (in this case we'll fetch from index zero) the jit func callers
255 * MUST provide valid fake constant buffers of size 4x32 (the values do
256 * not matter), otherwise we'd still need (not per element though)
257 * control flow.
258 */
259 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
260 if (indexes2)
261 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
262 }
263
264 /*
265 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
266 */
267 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
268 LLVMValueRef si, di;
269 LLVMValueRef index;
270 LLVMValueRef scalar_ptr, scalar;
271
272 di = lp_build_const_int32(gallivm, i);
273 if (indexes2)
274 si = lp_build_const_int32(gallivm, i >> 1);
275 else
276 si = di;
277
278 if (indexes2 && (i & 1)) {
279 index = LLVMBuildExtractElement(builder,
280 indexes2, si, "");
281 } else {
282 index = LLVMBuildExtractElement(builder,
283 indexes, si, "");
284 }
285
286 scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "gather_ptr");
287 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
288
289 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
290 }
291
292 if (overflow_mask) {
293 if (indexes2) {
294 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
295 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
296 bld_base->dbl_bld.int_vec_type, "");
297 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
298 bld_base->dbl_bld.zero, res);
299 } else
300 res = lp_build_select(bld, overflow_mask, bld->zero, res);
301 }
302
303 return res;
304 }
305
306 /**
307 * Scatter/store vector.
308 */
309 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)310 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
311 LLVMValueRef base_ptr,
312 LLVMValueRef indexes,
313 LLVMValueRef values,
314 struct lp_exec_mask *mask)
315 {
316 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
317 LLVMBuilderRef builder = gallivm->builder;
318 unsigned i;
319 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
320
321 /*
322 * Loop over elements of index_vec, store scalar value.
323 */
324 for (i = 0; i < bld->bld_base.base.type.length; i++) {
325 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
326 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
327 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
328 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
329 LLVMValueRef scalar_pred = pred ?
330 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
331
332 if (0)
333 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
334 ii, val, index, scalar_ptr);
335
336 if (scalar_pred) {
337 LLVMValueRef real_val, dst_val;
338 dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
339 scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
340 real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
341 LLVMBuildStore(builder, real_val, scalar_ptr);
342 }
343 else {
344 LLVMBuildStore(builder, val, scalar_ptr);
345 }
346 }
347 }
348
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])349 static void emit_load_var(struct lp_build_nir_context *bld_base,
350 nir_variable_mode deref_mode,
351 unsigned num_components,
352 unsigned bit_size,
353 nir_variable *var,
354 unsigned vertex_index,
355 LLVMValueRef indir_vertex_index,
356 unsigned const_index,
357 LLVMValueRef indir_index,
358 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
359 {
360 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
361 struct gallivm_state *gallivm = bld_base->base.gallivm;
362 int dmul = bit_size == 64 ? 2 : 1;
363 unsigned location = var->data.driver_location;
364 unsigned location_frac = var->data.location_frac;
365
366 if (!var->data.compact && !indir_index)
367 location += const_index;
368 else if (var->data.compact) {
369 location += const_index / 4;
370 location_frac += const_index % 4;
371 const_index = 0;
372 }
373 switch (deref_mode) {
374 case nir_var_shader_in:
375 for (unsigned i = 0; i < num_components; i++) {
376 int idx = (i * dmul) + location_frac;
377 int comp_loc = location;
378
379 if (bit_size == 64 && idx >= 4) {
380 comp_loc++;
381 idx = idx % 4;
382 }
383
384 if (bld->gs_iface) {
385 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
386 LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
387 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
388 LLVMValueRef result2;
389
390 result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
391 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
392 if (bit_size == 64) {
393 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
394 result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
395 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
396 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
397 }
398 } else if (bld->tes_iface) {
399 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
400 LLVMValueRef attrib_index_val;
401 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
402 LLVMValueRef result2;
403
404 if (indir_index) {
405 if (var->data.compact) {
406 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
407 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
408 } else
409 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
410 } else
411 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
412
413 if (var->data.patch) {
414 result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
415 indir_index ? true : false, attrib_index_val, swizzle_index_val);
416 if (bit_size == 64) {
417 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
418 result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
419 indir_index ? true : false, attrib_index_val, swizzle_index_val);
420 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
421 }
422 }
423 else {
424 result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
425 indir_vertex_index ? true : false,
426 indir_vertex_index ? indir_vertex_index : vertex_index_val,
427 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
428 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
429 if (bit_size == 64) {
430 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
431 result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
432 indir_vertex_index ? true : false,
433 indir_vertex_index ? indir_vertex_index : vertex_index_val,
434 indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
435 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
436 }
437 }
438 } else if (bld->tcs_iface) {
439 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
440 LLVMValueRef attrib_index_val;
441 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
442
443 if (indir_index) {
444 if (var->data.compact) {
445 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447 } else
448 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
449 } else
450 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451 result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
452 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
453 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
454 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
455 if (bit_size == 64) {
456 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
458 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
459 indir_index ? true : false, attrib_index_val,
460 false, swizzle_index_val);
461 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
462 }
463 } else {
464 if (indir_index) {
465 LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
466 LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
467 attrib_index_val, 4, idx,
468 TRUE);
469 LLVMValueRef index_vec2 = NULL;
470 LLVMTypeRef fptr_type;
471 LLVMValueRef inputs_array;
472 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
473 inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
474
475 if (bit_size == 64)
476 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
477 indir_index, 4, idx + 1, TRUE);
478
479 /* Gather values from the input register array */
480 result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
481 } else {
482 if (bld->indirects & nir_var_shader_in) {
483 LLVMValueRef lindex = lp_build_const_int32(gallivm,
484 comp_loc * 4 + idx);
485 LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
486 bld->inputs_array, lindex);
487 if (bit_size == 64) {
488 LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
489 comp_loc * 4 + (idx + 1));
490 LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
491 bld->inputs_array, lindex2);
492 result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
493 } else {
494 result[i] = input_ptr;
495 }
496 } else {
497 if (bit_size == 64) {
498 LLVMValueRef tmp[2];
499 tmp[0] = bld->inputs[comp_loc][idx];
500 tmp[1] = bld->inputs[comp_loc][idx + 1];
501 result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
502 } else {
503 result[i] = bld->inputs[comp_loc][idx];
504 }
505 }
506 }
507 }
508 }
509 break;
510 case nir_var_shader_out:
511 if (bld->fs_iface && bld->fs_iface->fb_fetch) {
512 bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
513 return;
514 }
515 for (unsigned i = 0; i < num_components; i++) {
516 int idx = (i * dmul) + location_frac;
517 if (bld->tcs_iface) {
518 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
519 LLVMValueRef attrib_index_val;
520 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
521
522 if (indir_index)
523 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
524 else
525 attrib_index_val = lp_build_const_int32(gallivm, location);
526
527 result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
528 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
529 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
530 (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
531 if (bit_size == 64) {
532 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
533 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
534 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
535 indir_index ? true : false, attrib_index_val,
536 false, swizzle_index_val, 0);
537 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
538 }
539 }
540 }
541 break;
542 default:
543 break;
544 }
545 }
546
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)547 static void emit_store_chan(struct lp_build_nir_context *bld_base,
548 nir_variable_mode deref_mode,
549 unsigned bit_size,
550 unsigned location, unsigned comp,
551 unsigned chan,
552 LLVMValueRef dst)
553 {
554 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
555 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
556 struct lp_build_context *float_bld = &bld_base->base;
557
558 if (bit_size == 64) {
559 chan *= 2;
560 chan += comp;
561 if (chan >= 4) {
562 chan -= 4;
563 location++;
564 }
565 emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
566 bld->outputs[location][chan + 1], dst);
567 } else {
568 dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
569 lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
570 bld->outputs[location][chan + comp]);
571 }
572 }
573
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)574 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
575 bool is_compact,
576 unsigned bit_size,
577 unsigned location,
578 unsigned const_index,
579 LLVMValueRef indir_vertex_index,
580 LLVMValueRef indir_index,
581 unsigned comp,
582 unsigned chan,
583 LLVMValueRef chan_val)
584 {
585 struct gallivm_state *gallivm = bld_base->base.gallivm;
586 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
587 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
588 unsigned swizzle = chan;
589 if (bit_size == 64) {
590 swizzle *= 2;
591 swizzle += comp;
592 if (swizzle >= 4) {
593 swizzle -= 4;
594 location++;
595 }
596 } else
597 swizzle += comp;
598 LLVMValueRef attrib_index_val;
599 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
600
601 if (indir_index) {
602 if (is_compact) {
603 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
604 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
605 } else
606 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
607 } else
608 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
609 LLVMValueRef exec_mask = mask_vec(bld_base);
610 if (bit_size == 64) {
611 LLVMValueRef split_vals[2];
612 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
613 emit_store_64bit_split(bld_base, chan_val, split_vals);
614 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
615 indir_vertex_index ? true : false,
616 indir_vertex_index,
617 indir_index ? true : false,
618 attrib_index_val,
619 false, swizzle_index_val,
620 split_vals[0], exec_mask);
621 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
622 indir_vertex_index ? true : false,
623 indir_vertex_index,
624 indir_index ? true : false,
625 attrib_index_val,
626 false, swizzle_index_val2,
627 split_vals[1], exec_mask);
628 } else {
629 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
630 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
631 indir_vertex_index ? true : false,
632 indir_vertex_index,
633 indir_index && !is_compact ? true : false,
634 attrib_index_val,
635 indir_index && is_compact ? true : false,
636 swizzle_index_val,
637 chan_val, exec_mask);
638 }
639 }
640
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)641 static void emit_store_var(struct lp_build_nir_context *bld_base,
642 nir_variable_mode deref_mode,
643 unsigned num_components,
644 unsigned bit_size,
645 nir_variable *var,
646 unsigned writemask,
647 LLVMValueRef indir_vertex_index,
648 unsigned const_index,
649 LLVMValueRef indir_index,
650 LLVMValueRef dst)
651 {
652 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
653 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
654 switch (deref_mode) {
655 case nir_var_shader_out: {
656 unsigned location = var->data.driver_location;
657 unsigned comp = var->data.location_frac;
658 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
659 if (var->data.location == FRAG_RESULT_STENCIL)
660 comp = 1;
661 else if (var->data.location == FRAG_RESULT_DEPTH)
662 comp = 2;
663 }
664
665 if (var->data.compact) {
666 location += const_index / 4;
667 comp += const_index % 4;
668 const_index = 0;
669 }
670
671 for (unsigned chan = 0; chan < num_components; chan++) {
672 if (writemask & (1u << chan)) {
673 LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
674 if (bld->tcs_iface) {
675 emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
676 } else
677 emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
678 }
679 }
680 break;
681 }
682 default:
683 break;
684 }
685 }
686
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_src * reg,LLVMValueRef indir_src,LLVMValueRef reg_storage)687 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
688 struct lp_build_context *reg_bld,
689 const nir_reg_src *reg,
690 LLVMValueRef indir_src,
691 LLVMValueRef reg_storage)
692 {
693 struct gallivm_state *gallivm = bld_base->base.gallivm;
694 LLVMBuilderRef builder = gallivm->builder;
695 int nc = reg->reg->num_components;
696 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
697 struct lp_build_context *uint_bld = &bld_base->uint_bld;
698 if (reg->reg->num_array_elems) {
699 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
700 if (reg->indirect) {
701 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
702 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
703 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
704 }
705 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
706 for (unsigned i = 0; i < nc; i++) {
707 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
708 vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
709 }
710 } else {
711 for (unsigned i = 0; i < nc; i++) {
712 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
713 lp_build_const_int32(gallivm, i));
714 vals[i] = LLVMBuildLoad(builder, this_storage, "");
715 }
716 }
717 return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
718 }
719
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_dest * reg,unsigned writemask,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])720 static void emit_store_reg(struct lp_build_nir_context *bld_base,
721 struct lp_build_context *reg_bld,
722 const nir_reg_dest *reg,
723 unsigned writemask,
724 LLVMValueRef indir_src,
725 LLVMValueRef reg_storage,
726 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
727 {
728 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
729 struct gallivm_state *gallivm = bld_base->base.gallivm;
730 LLVMBuilderRef builder = gallivm->builder;
731 struct lp_build_context *uint_bld = &bld_base->uint_bld;
732 int nc = reg->reg->num_components;
733 if (reg->reg->num_array_elems > 0) {
734 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
735 if (reg->indirect) {
736 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
737 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
738 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
739 }
740 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
741 for (unsigned i = 0; i < nc; i++) {
742 if (!(writemask & (1 << i)))
743 continue;
744 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
745 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
746 emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
747 }
748 return;
749 }
750
751 for (unsigned i = 0; i < nc; i++) {
752 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
753 lp_build_const_int32(gallivm, i));
754 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
755 lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
756 }
757 }
758
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])759 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
760 unsigned nc,
761 unsigned bit_size,
762 unsigned offset_bit_size,
763 bool offset_is_uniform,
764 LLVMValueRef offset,
765 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
766 {
767 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
768 struct gallivm_state *gallivm = bld_base->base.gallivm;
769 LLVMBuilderRef builder = gallivm->builder;
770 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
771 LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
772 unsigned size_shift = bit_size_to_shift_size(bit_size);
773 struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
774 if (size_shift)
775 offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
776
777 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
778 kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
779
780 if (!invocation_0_must_be_active(bld_base)) {
781 mesa_logw_once("Treating load_kernel_arg in control flow as uniform, results may be incorrect.");
782 }
783
784 if (offset_is_uniform) {
785 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
786
787 for (unsigned c = 0; c < nc; c++) {
788 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
789
790 LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
791 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
792 }
793 }
794 }
795
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)796 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
797 {
798 LLVMBuilderRef builder = gallivm->builder;
799 switch (bit_size) {
800 case 8:
801 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
802 break;
803 case 16:
804 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
805 break;
806 case 32:
807 default:
808 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
809 break;
810 case 64:
811 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
812 break;
813 }
814 return addr_ptr;
815 }
816
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])817 static void emit_load_global(struct lp_build_nir_context *bld_base,
818 unsigned nc,
819 unsigned bit_size,
820 unsigned addr_bit_size,
821 bool offset_is_uniform,
822 LLVMValueRef addr,
823 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
824 {
825 struct gallivm_state *gallivm = bld_base->base.gallivm;
826 LLVMBuilderRef builder = gallivm->builder;
827 struct lp_build_context *uint_bld = &bld_base->uint_bld;
828 struct lp_build_context *res_bld;
829 LLVMValueRef exec_mask = mask_vec(bld_base);
830
831 res_bld = get_int_bld(bld_base, true, bit_size);
832
833 if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
834 /* If the offset is uniform, then use the address from invocation 0 to
835 * load, and broadcast to all invocations.
836 */
837 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
838 lp_build_const_int32(gallivm, 0), "");
839 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
840
841 for (unsigned c = 0; c < nc; c++) {
842 LLVMValueRef scalar = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
843 outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
844 }
845 return;
846 }
847
848 for (unsigned c = 0; c < nc; c++) {
849 LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
850 struct lp_build_loop_state loop_state;
851 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
852
853 struct lp_build_if_state ifthen;
854 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
855 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
856 lp_build_if(&ifthen, gallivm, cond);
857
858 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
859 loop_state.counter, "");
860 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
861
862 LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
863
864 LLVMValueRef temp_res;
865 temp_res = LLVMBuildLoad2(builder, res_bld->vec_type, result, "");
866 temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
867 LLVMBuildStore(builder, temp_res, result);
868 lp_build_endif(&ifthen);
869 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
870 NULL, LLVMIntUGE);
871 outval[c] = LLVMBuildLoad2(builder, res_bld->vec_type, result, "");
872 }
873 }
874
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)875 static void emit_store_global(struct lp_build_nir_context *bld_base,
876 unsigned writemask,
877 unsigned nc, unsigned bit_size,
878 unsigned addr_bit_size,
879 LLVMValueRef addr,
880 LLVMValueRef dst)
881 {
882 struct gallivm_state *gallivm = bld_base->base.gallivm;
883 LLVMBuilderRef builder = gallivm->builder;
884 struct lp_build_context *uint_bld = &bld_base->uint_bld;
885 LLVMValueRef exec_mask = mask_vec(bld_base);
886
887 for (unsigned c = 0; c < nc; c++) {
888 if (!(writemask & (1u << c)))
889 continue;
890 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
891
892 struct lp_build_loop_state loop_state;
893 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
894 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
895 loop_state.counter, "");
896
897 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
898 loop_state.counter, "");
899 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
900 switch (bit_size) {
901 case 8:
902 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
903 break;
904 case 16:
905 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
906 break;
907 case 32:
908 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
909 break;
910 case 64:
911 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
912 break;
913 default:
914 break;
915 }
916 struct lp_build_if_state ifthen;
917
918 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
919 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
920 lp_build_if(&ifthen, gallivm, cond);
921 lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
922 lp_build_endif(&ifthen);
923 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
924 NULL, LLVMIntUGE);
925 }
926 }
927
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)928 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
929 nir_intrinsic_op nir_op,
930 unsigned addr_bit_size,
931 unsigned val_bit_size,
932 LLVMValueRef addr,
933 LLVMValueRef val, LLVMValueRef val2,
934 LLVMValueRef *result)
935 {
936 struct gallivm_state *gallivm = bld_base->base.gallivm;
937 LLVMBuilderRef builder = gallivm->builder;
938 struct lp_build_context *uint_bld = &bld_base->uint_bld;
939 struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
940 LLVMValueRef atom_res = lp_build_alloca(gallivm,
941 LLVMTypeOf(val), "");
942 LLVMValueRef exec_mask = mask_vec(bld_base);
943 struct lp_build_loop_state loop_state;
944 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
945
946 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
947 loop_state.counter, "");
948
949 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
950 loop_state.counter, "");
951 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
952 struct lp_build_if_state ifthen;
953 LLVMValueRef cond, temp_res;
954 LLVMValueRef scalar;
955 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
956 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
957 lp_build_if(&ifthen, gallivm, cond);
958
959 addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
960 if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
961 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
962 loop_state.counter, "");
963 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
964 scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
965 cas_src_ptr,
966 LLVMAtomicOrderingSequentiallyConsistent,
967 LLVMAtomicOrderingSequentiallyConsistent,
968 false);
969 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
970 } else {
971 LLVMAtomicRMWBinOp op;
972 switch (nir_op) {
973 case nir_intrinsic_global_atomic_add:
974 op = LLVMAtomicRMWBinOpAdd;
975 break;
976 case nir_intrinsic_global_atomic_exchange:
977
978 op = LLVMAtomicRMWBinOpXchg;
979 break;
980 case nir_intrinsic_global_atomic_and:
981 op = LLVMAtomicRMWBinOpAnd;
982 break;
983 case nir_intrinsic_global_atomic_or:
984 op = LLVMAtomicRMWBinOpOr;
985 break;
986 case nir_intrinsic_global_atomic_xor:
987 op = LLVMAtomicRMWBinOpXor;
988 break;
989 case nir_intrinsic_global_atomic_umin:
990 op = LLVMAtomicRMWBinOpUMin;
991 break;
992 case nir_intrinsic_global_atomic_umax:
993 op = LLVMAtomicRMWBinOpUMax;
994 break;
995 case nir_intrinsic_global_atomic_imin:
996 op = LLVMAtomicRMWBinOpMin;
997 break;
998 case nir_intrinsic_global_atomic_imax:
999 op = LLVMAtomicRMWBinOpMax;
1000 break;
1001 default:
1002 unreachable("unknown atomic op");
1003 }
1004
1005 scalar = LLVMBuildAtomicRMW(builder, op,
1006 addr_ptr, value_ptr,
1007 LLVMAtomicOrderingSequentiallyConsistent,
1008 false);
1009 }
1010 temp_res = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1011 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1012 LLVMBuildStore(builder, temp_res, atom_res);
1013 lp_build_else(&ifthen);
1014 temp_res = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1015 bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;
1016 LLVMValueRef zero_val;
1017 if (is_float) {
1018 if (val_bit_size == 64)
1019 zero_val = lp_build_const_double(gallivm, 0);
1020 else
1021 zero_val = lp_build_const_float(gallivm, 0);
1022 } else {
1023 if (val_bit_size == 64)
1024 zero_val = lp_build_const_int64(gallivm, 0);
1025 else
1026 zero_val = lp_build_const_int32(gallivm, 0);
1027 }
1028
1029 temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1030 LLVMBuildStore(builder, temp_res, atom_res);
1031 lp_build_endif(&ifthen);
1032 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1033 NULL, LLVMIntUGE);
1034 *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1035 }
1036
1037 /* Returns a boolean for whether the offset is in range of the given limit for
1038 * SSBO/UBO dereferences.
1039 */
1040 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1041 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1042 LLVMValueRef offset,
1043 LLVMValueRef limit)
1044 {
1045 struct gallivm_state *gallivm = bld_base->base.gallivm;
1046 LLVMBuilderRef builder = gallivm->builder;
1047
1048 LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1049 LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1050 LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1051 return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1052 }
1053
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1054 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1055 unsigned nc,
1056 unsigned bit_size,
1057 bool offset_is_uniform,
1058 LLVMValueRef index,
1059 LLVMValueRef offset,
1060 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1061 {
1062 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1063 struct gallivm_state *gallivm = bld_base->base.gallivm;
1064 LLVMBuilderRef builder = gallivm->builder;
1065 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1066 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1067 LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
1068 LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
1069 unsigned size_shift = bit_size_to_shift_size(bit_size);
1070 if (size_shift)
1071 offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1072
1073 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1074 consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1075
1076 if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1077 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
1078 struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1079 switch (bit_size) {
1080 case 8:
1081 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1082 break;
1083 case 16:
1084 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1085 break;
1086 case 64:
1087 num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1088 break;
1089 default: break;
1090 }
1091 for (unsigned c = 0; c < nc; c++) {
1092 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1093
1094 LLVMValueRef scalar;
1095 /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1096 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1097 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1098 LLVMBuildStore(builder, zero, res_store);
1099
1100 struct lp_build_if_state ifthen;
1101 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1102 LLVMBuildStore(builder, lp_build_pointer_get(builder, consts_ptr, chan_offset), res_store);
1103 lp_build_endif(&ifthen);
1104
1105 scalar = LLVMBuildLoad(builder, res_store, "");
1106
1107 result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1108 }
1109 } else {
1110 LLVMValueRef overflow_mask;
1111
1112 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1113 if (bit_size == 64)
1114 num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1115 else if (bit_size == 16)
1116 num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1117 else if (bit_size == 8)
1118 num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1119
1120 for (unsigned c = 0; c < nc; c++) {
1121 LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1122 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1123 this_offset, num_consts);
1124 result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1125 }
1126 }
1127 }
1128
1129 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1130 emit_load_const(struct lp_build_nir_context *bld_base,
1131 const nir_load_const_instr *instr,
1132 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1133 {
1134 struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1135 const unsigned bits = instr->def.bit_size;
1136
1137 for (unsigned i = 0; i < instr->def.num_components; i++) {
1138 outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1139 bits == 32 ? instr->value[i].u32
1140 : instr->value[i].u64);
1141 }
1142 for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1143 outval[i] = NULL;
1144 }
1145 }
1146
1147 /**
1148 * Get the base address of SSBO[@index] for the @invocation channel, returning
1149 * the address and also the bounds (in units of the bit_size).
1150 */
1151 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1152 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1153 unsigned bit_size,
1154 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1155 {
1156 struct gallivm_state *gallivm = bld_base->base.gallivm;
1157 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1158 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1159
1160 LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1161 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1162 LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1163 if (bounds)
1164 *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1165
1166 return ssbo_ptr;
1167 }
1168
1169 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1170 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1171 struct lp_build_context *mem_bld,
1172 unsigned bit_size,
1173 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1174 {
1175 struct gallivm_state *gallivm = bld_base->base.gallivm;
1176 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1177 LLVMValueRef ptr;
1178
1179 if (index) {
1180 ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1181 } else {
1182 ptr = bld->shared_ptr;
1183 *bounds = NULL;
1184 }
1185
1186 /* Cast it to the pointer type of the access this instruction is doing. */
1187 if (bit_size == 32)
1188 return ptr;
1189 else
1190 return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1191 }
1192
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1193 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1194 unsigned nc,
1195 unsigned bit_size,
1196 bool index_and_offset_are_uniform,
1197 LLVMValueRef index,
1198 LLVMValueRef offset,
1199 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1200 {
1201 struct gallivm_state *gallivm = bld_base->base.gallivm;
1202 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1203 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1204 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1205 struct lp_build_context *load_bld;
1206 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1207
1208 load_bld = get_int_bld(bld_base, true, bit_size);
1209
1210 offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1211
1212 /* If the address is uniform, then use the address from invocation 0 to load,
1213 * and broadcast to all invocations.
1214 */
1215 if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1216 LLVMValueRef ssbo_limit;
1217 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1218 lp_build_const_int32(gallivm, 0), &ssbo_limit);
1219
1220 offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1221
1222 for (unsigned c = 0; c < nc; c++) {
1223 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1224
1225 LLVMValueRef scalar;
1226 /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1227 if (ssbo_limit) {
1228 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1229 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1230 LLVMBuildStore(builder, zero, res_store);
1231
1232 struct lp_build_if_state ifthen;
1233 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1234 LLVMBuildStore(builder, lp_build_pointer_get(builder, mem_ptr, chan_offset), res_store);
1235 lp_build_endif(&ifthen);
1236
1237 scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1238 } else {
1239 scalar = lp_build_pointer_get(builder, mem_ptr, chan_offset);
1240 }
1241
1242 outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1243 }
1244 return;
1245 }
1246
1247 /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1248
1249 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1250 for (unsigned c = 0; c < nc; c++)
1251 result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1252
1253 LLVMValueRef exec_mask = mask_vec(bld_base);
1254 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1255 struct lp_build_loop_state loop_state;
1256 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1257 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1258 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1259
1260 struct lp_build_if_state exec_ifthen;
1261 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1262
1263 LLVMValueRef ssbo_limit;
1264 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
1265 loop_state.counter, &ssbo_limit);
1266
1267 for (unsigned c = 0; c < nc; c++) {
1268 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1269 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1270 if (ssbo_limit) {
1271 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1272 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1273 }
1274
1275 struct lp_build_if_state ifthen;
1276 LLVMValueRef fetch_cond, temp_res;
1277
1278 fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1279
1280 lp_build_if(&ifthen, gallivm, fetch_cond);
1281 LLVMValueRef scalar = lp_build_pointer_get(builder, mem_ptr, loop_index);
1282
1283 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1284 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1285 LLVMBuildStore(builder, temp_res, result[c]);
1286 lp_build_else(&ifthen);
1287 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1288 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1289 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1290 LLVMBuildStore(builder, temp_res, result[c]);
1291 lp_build_endif(&ifthen);
1292 }
1293
1294 lp_build_endif(&exec_ifthen);
1295 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1296 NULL, LLVMIntUGE);
1297 for (unsigned c = 0; c < nc; c++)
1298 outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1299
1300 }
1301
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1302 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1303 unsigned writemask,
1304 unsigned nc,
1305 unsigned bit_size,
1306 bool index_and_offset_are_uniform,
1307 LLVMValueRef index,
1308 LLVMValueRef offset,
1309 LLVMValueRef dst)
1310 {
1311 struct gallivm_state *gallivm = bld_base->base.gallivm;
1312 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1313 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1314 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1315 struct lp_build_context *store_bld;
1316 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1317 store_bld = get_int_bld(bld_base, true, bit_size);
1318
1319 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1320
1321 /* If the address is uniform, then just store the value from the first
1322 * channel instead of making LLVM unroll the invocation loop.
1323 */
1324 if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1325 LLVMValueRef ssbo_limit;
1326 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
1327 lp_build_const_int32(gallivm, 0), &ssbo_limit);
1328
1329 offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1330
1331 for (unsigned c = 0; c < nc; c++) {
1332 if (!(writemask & (1u << c)))
1333 continue;
1334
1335 /* Pick out invocation 0's value. */
1336 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1337 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1338 lp_build_const_int32(gallivm, 0), "");
1339 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1340
1341 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1342
1343 /* If storing outside the SSBO, we need to skip the store instead. */
1344 if (ssbo_limit) {
1345 struct lp_build_if_state ifthen;
1346 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1347 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1348 lp_build_endif(&ifthen);
1349 } else {
1350 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1351 }
1352 }
1353 return;
1354 }
1355
1356 LLVMValueRef exec_mask = mask_vec(bld_base);
1357 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1358 struct lp_build_loop_state loop_state;
1359 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1360 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1361 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1362
1363 struct lp_build_if_state exec_ifthen;
1364 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1365
1366 LLVMValueRef ssbo_limit;
1367 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, index,
1368 loop_state.counter, &ssbo_limit);
1369
1370 for (unsigned c = 0; c < nc; c++) {
1371 if (!(writemask & (1u << c)))
1372 continue;
1373 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1374 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1375 LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1376
1377 if (ssbo_limit) {
1378 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1379 do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1380 }
1381
1382 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1383 loop_state.counter, "");
1384 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1385 struct lp_build_if_state ifthen;
1386 LLVMValueRef store_cond;
1387
1388 store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1389 lp_build_if(&ifthen, gallivm, store_cond);
1390 lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1391 lp_build_endif(&ifthen);
1392 }
1393
1394 lp_build_endif(&exec_ifthen);
1395 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1396 NULL, LLVMIntUGE);
1397
1398 }
1399
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,uint32_t bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1400 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1401 nir_intrinsic_op nir_op,
1402 uint32_t bit_size,
1403 LLVMValueRef index, LLVMValueRef offset,
1404 LLVMValueRef val, LLVMValueRef val2,
1405 LLVMValueRef *result)
1406 {
1407 struct gallivm_state *gallivm = bld_base->base.gallivm;
1408 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1409 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1410 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1411 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1412 struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
1413
1414 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1415 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1416 atomic_bld->vec_type, "");
1417
1418 LLVMValueRef exec_mask = mask_vec(bld_base);
1419 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1420 struct lp_build_loop_state loop_state;
1421 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1422 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1423 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1424
1425 struct lp_build_if_state exec_ifthen;
1426 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1427
1428 LLVMValueRef ssbo_limit;
1429 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, index,
1430 loop_state.counter, &ssbo_limit);
1431
1432 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1433 if (ssbo_limit) {
1434 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1435 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1436 }
1437
1438 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1439 loop_state.counter, "");
1440 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1441
1442 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1443
1444 struct lp_build_if_state ifthen;
1445 LLVMValueRef inner_cond, temp_res;
1446 LLVMValueRef scalar;
1447
1448 inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1449 lp_build_if(&ifthen, gallivm, inner_cond);
1450
1451 if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1452 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1453 loop_state.counter, "");
1454 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1455 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1456 cas_src_ptr,
1457 LLVMAtomicOrderingSequentiallyConsistent,
1458 LLVMAtomicOrderingSequentiallyConsistent,
1459 false);
1460 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1461 } else {
1462 LLVMAtomicRMWBinOp op;
1463
1464 switch (nir_op) {
1465 case nir_intrinsic_shared_atomic_add:
1466 case nir_intrinsic_ssbo_atomic_add:
1467 op = LLVMAtomicRMWBinOpAdd;
1468 break;
1469 case nir_intrinsic_shared_atomic_exchange:
1470 case nir_intrinsic_ssbo_atomic_exchange:
1471 op = LLVMAtomicRMWBinOpXchg;
1472 break;
1473 case nir_intrinsic_shared_atomic_and:
1474 case nir_intrinsic_ssbo_atomic_and:
1475 op = LLVMAtomicRMWBinOpAnd;
1476 break;
1477 case nir_intrinsic_shared_atomic_or:
1478 case nir_intrinsic_ssbo_atomic_or:
1479 op = LLVMAtomicRMWBinOpOr;
1480 break;
1481 case nir_intrinsic_shared_atomic_xor:
1482 case nir_intrinsic_ssbo_atomic_xor:
1483 op = LLVMAtomicRMWBinOpXor;
1484 break;
1485 case nir_intrinsic_shared_atomic_umin:
1486 case nir_intrinsic_ssbo_atomic_umin:
1487 op = LLVMAtomicRMWBinOpUMin;
1488 break;
1489 case nir_intrinsic_shared_atomic_umax:
1490 case nir_intrinsic_ssbo_atomic_umax:
1491 op = LLVMAtomicRMWBinOpUMax;
1492 break;
1493 case nir_intrinsic_ssbo_atomic_imin:
1494 case nir_intrinsic_shared_atomic_imin:
1495 op = LLVMAtomicRMWBinOpMin;
1496 break;
1497 case nir_intrinsic_ssbo_atomic_imax:
1498 case nir_intrinsic_shared_atomic_imax:
1499 op = LLVMAtomicRMWBinOpMax;
1500 break;
1501 default:
1502 unreachable("unknown atomic op");
1503 }
1504 scalar = LLVMBuildAtomicRMW(builder, op,
1505 scalar_ptr, value_ptr,
1506 LLVMAtomicOrderingSequentiallyConsistent,
1507 false);
1508 }
1509 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1510 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1511 LLVMBuildStore(builder, temp_res, atom_res);
1512 lp_build_else(&ifthen);
1513 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1514 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
1515 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1516 LLVMBuildStore(builder, temp_res, atom_res);
1517 lp_build_endif(&ifthen);
1518
1519 lp_build_endif(&exec_ifthen);
1520 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1521 NULL, LLVMIntUGE);
1522 *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1523 }
1524
emit_barrier(struct lp_build_nir_context * bld_base)1525 static void emit_barrier(struct lp_build_nir_context *bld_base)
1526 {
1527 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1528 struct gallivm_state * gallivm = bld_base->base.gallivm;
1529
1530 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1531
1532 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1533 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1534 }
1535
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1536 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1537 LLVMValueRef index)
1538 {
1539 struct gallivm_state *gallivm = bld_base->base.gallivm;
1540 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1541 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1542 struct lp_build_context *bld_broad = &bld_base->uint_bld;
1543 LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1544 LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1545 return lp_build_broadcast_scalar(bld_broad, size_ptr);
1546 }
1547
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1548 static void emit_image_op(struct lp_build_nir_context *bld_base,
1549 struct lp_img_params *params)
1550 {
1551 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1552 struct gallivm_state *gallivm = bld_base->base.gallivm;
1553
1554 params->type = bld_base->base.type;
1555 params->context_ptr = bld->context_ptr;
1556 params->thread_data_ptr = bld->thread_data_ptr;
1557 params->exec_mask = mask_vec(bld_base);
1558
1559 if (params->image_index_offset)
1560 params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1561 lp_build_const_int32(gallivm, 0), "");
1562
1563 bld->image->emit_op(bld->image,
1564 bld->bld_base.base.gallivm,
1565 params);
1566
1567 }
1568
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1569 static void emit_image_size(struct lp_build_nir_context *bld_base,
1570 struct lp_sampler_size_query_params *params)
1571 {
1572 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1573 struct gallivm_state *gallivm = bld_base->base.gallivm;
1574
1575 params->int_type = bld_base->int_bld.type;
1576 params->context_ptr = bld->context_ptr;
1577
1578 if (params->texture_unit_offset)
1579 params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1580 lp_build_const_int32(gallivm, 0), "");
1581 bld->image->emit_size_query(bld->image,
1582 bld->bld_base.base.gallivm,
1583 params);
1584
1585 }
1586
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1587 static void init_var_slots(struct lp_build_nir_context *bld_base,
1588 nir_variable *var, unsigned sc)
1589 {
1590 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1591 unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1592
1593 if (!bld->outputs)
1594 return;
1595 for (unsigned comp = sc; comp < slots + sc; comp++) {
1596 unsigned this_loc = var->data.driver_location + (comp / 4);
1597 unsigned this_chan = comp % 4;
1598
1599 if (!bld->outputs[this_loc][this_chan])
1600 bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1601 bld_base->base.vec_type, "output");
1602 }
1603 }
1604
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1605 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1606 nir_variable *var)
1607 {
1608 unsigned sc = var->data.location_frac;
1609 switch (var->data.mode) {
1610 case nir_var_shader_out: {
1611 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1612 if (var->data.location == FRAG_RESULT_STENCIL)
1613 sc = 1;
1614 else if (var->data.location == FRAG_RESULT_DEPTH)
1615 sc = 2;
1616 }
1617 init_var_slots(bld_base, var, sc);
1618 break;
1619 }
1620 default:
1621 break;
1622 }
1623 }
1624
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1625 static void emit_tex(struct lp_build_nir_context *bld_base,
1626 struct lp_sampler_params *params)
1627 {
1628 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1629 struct gallivm_state *gallivm = bld_base->base.gallivm;
1630 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1631
1632 params->type = bld_base->base.type;
1633 params->context_ptr = bld->context_ptr;
1634 params->thread_data_ptr = bld->thread_data_ptr;
1635
1636 if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1637 /* this is horrible but this can be dynamic */
1638 LLVMValueRef coords[5];
1639 LLVMValueRef *orig_texel_ptr;
1640 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1641 LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1642 LLVMGetUndef(bld_base->base.vec_type),
1643 LLVMGetUndef(bld_base->base.vec_type),
1644 LLVMGetUndef(bld_base->base.vec_type) };
1645 LLVMValueRef texel[4], orig_offset, orig_lod;
1646 unsigned i;
1647 orig_texel_ptr = params->texel;
1648 orig_lod = params->lod;
1649 for (i = 0; i < 5; i++) {
1650 coords[i] = params->coords[i];
1651 }
1652 orig_offset = params->texture_index_offset;
1653
1654 for (unsigned v = 0; v < uint_bld->type.length; v++) {
1655 LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1656 LLVMValueRef new_coords[5];
1657 for (i = 0; i < 5; i++) {
1658 new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1659 coords[i], idx, "");
1660 }
1661 params->coords = new_coords;
1662 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1663 orig_offset,
1664 idx, "");
1665 params->type = lp_elem_type(bld_base->base.type);
1666
1667 if (orig_lod)
1668 params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1669 params->texel = texel;
1670 bld->sampler->emit_tex_sample(bld->sampler,
1671 gallivm,
1672 params);
1673
1674 for (i = 0; i < 4; i++) {
1675 result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1676 }
1677 }
1678 for (i = 0; i < 4; i++) {
1679 orig_texel_ptr[i] = result[i];
1680 }
1681 return;
1682 }
1683
1684 if (params->texture_index_offset) {
1685 struct lp_build_loop_state loop_state;
1686 LLVMValueRef exec_mask = mask_vec(bld_base);
1687 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1688 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
1689 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1690 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1691
1692 struct lp_build_if_state ifthen;
1693 lp_build_if(&ifthen, gallivm, if_cond);
1694 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1695 loop_state.counter, "");
1696 LLVMBuildStore(builder, value_ptr, res_store);
1697 lp_build_endif(&ifthen);
1698 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1699 NULL, LLVMIntUGE);
1700 LLVMValueRef idx_val = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
1701 params->texture_index_offset = idx_val;
1702 }
1703
1704 params->type = bld_base->base.type;
1705 bld->sampler->emit_tex_sample(bld->sampler,
1706 bld->bld_base.base.gallivm,
1707 params);
1708 }
1709
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1710 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1711 struct lp_sampler_size_query_params *params)
1712 {
1713 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1714
1715 params->int_type = bld_base->int_bld.type;
1716 params->context_ptr = bld->context_ptr;
1717
1718 if (params->texture_unit_offset)
1719 params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1720 params->texture_unit_offset,
1721 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1722 bld->sampler->emit_size_query(bld->sampler,
1723 bld->bld_base.base.gallivm,
1724 params);
1725 }
1726
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1727 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1728 nir_intrinsic_instr *instr,
1729 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1730 {
1731 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1732 struct gallivm_state *gallivm = bld_base->base.gallivm;
1733 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1734 switch (instr->intrinsic) {
1735 case nir_intrinsic_load_instance_id:
1736 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1737 break;
1738 case nir_intrinsic_load_base_instance:
1739 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1740 break;
1741 case nir_intrinsic_load_base_vertex:
1742 result[0] = bld->system_values.basevertex;
1743 break;
1744 case nir_intrinsic_load_first_vertex:
1745 result[0] = bld->system_values.firstvertex;
1746 break;
1747 case nir_intrinsic_load_vertex_id:
1748 result[0] = bld->system_values.vertex_id;
1749 break;
1750 case nir_intrinsic_load_primitive_id:
1751 result[0] = bld->system_values.prim_id;
1752 break;
1753 case nir_intrinsic_load_workgroup_id: {
1754 LLVMValueRef tmp[3];
1755 for (unsigned i = 0; i < 3; i++) {
1756 tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1757 if (instr->dest.ssa.bit_size == 64)
1758 tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1759 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1760 }
1761 break;
1762 }
1763 case nir_intrinsic_load_local_invocation_id:
1764 for (unsigned i = 0; i < 3; i++)
1765 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1766 break;
1767 case nir_intrinsic_load_local_invocation_index: {
1768 LLVMValueRef tmp, tmp2;
1769 tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), ""));
1770 tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1771 tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1772 tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, ""));
1773
1774 tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1775 tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, ""));
1776 tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1777 tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, ""));
1778 result[0] = tmp;
1779 break;
1780 }
1781 case nir_intrinsic_load_num_workgroups: {
1782 LLVMValueRef tmp[3];
1783 for (unsigned i = 0; i < 3; i++) {
1784 tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1785 if (instr->dest.ssa.bit_size == 64)
1786 tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1787 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1788 }
1789 break;
1790 }
1791 case nir_intrinsic_load_invocation_id:
1792 if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1793 result[0] = bld->system_values.invocation_id;
1794 else
1795 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1796 break;
1797 case nir_intrinsic_load_front_face:
1798 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1799 break;
1800 case nir_intrinsic_load_draw_id:
1801 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1802 break;
1803 default:
1804 break;
1805 case nir_intrinsic_load_workgroup_size:
1806 for (unsigned i = 0; i < 3; i++)
1807 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1808 break;
1809 case nir_intrinsic_load_work_dim:
1810 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1811 break;
1812 case nir_intrinsic_load_tess_coord:
1813 for (unsigned i = 0; i < 3; i++) {
1814 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1815 }
1816 break;
1817 case nir_intrinsic_load_tess_level_outer:
1818 for (unsigned i = 0; i < 4; i++)
1819 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1820 break;
1821 case nir_intrinsic_load_tess_level_inner:
1822 for (unsigned i = 0; i < 2; i++)
1823 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1824 break;
1825 case nir_intrinsic_load_patch_vertices_in:
1826 result[0] = bld->system_values.vertices_in;
1827 break;
1828 case nir_intrinsic_load_sample_id:
1829 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1830 break;
1831 case nir_intrinsic_load_sample_pos:
1832 for (unsigned i = 0; i < 2; i++) {
1833 LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1834 idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1835 LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1836 result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1837 }
1838 break;
1839 case nir_intrinsic_load_sample_mask_in:
1840 result[0] = bld->system_values.sample_mask_in;
1841 break;
1842 case nir_intrinsic_load_view_index:
1843 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1844 break;
1845 case nir_intrinsic_load_subgroup_invocation: {
1846 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1847 for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1848 elems[i] = lp_build_const_int32(gallivm, i);
1849 result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1850 break;
1851 }
1852 case nir_intrinsic_load_subgroup_id:
1853 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1854 break;
1855 case nir_intrinsic_load_num_subgroups:
1856 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
1857 break;
1858 }
1859 }
1860
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)1861 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1862 LLVMValueRef *dst)
1863 {
1864 struct gallivm_state *gallivm = bld_base->base.gallivm;
1865 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1866 *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1867 }
1868
bgnloop(struct lp_build_nir_context * bld_base)1869 static void bgnloop(struct lp_build_nir_context *bld_base)
1870 {
1871 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1872 lp_exec_bgnloop(&bld->exec_mask, true);
1873 }
1874
endloop(struct lp_build_nir_context * bld_base)1875 static void endloop(struct lp_build_nir_context *bld_base)
1876 {
1877 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1878 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1879 }
1880
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1881 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1882 {
1883 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1884 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1885 lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1886 }
1887
else_stmt(struct lp_build_nir_context * bld_base)1888 static void else_stmt(struct lp_build_nir_context *bld_base)
1889 {
1890 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1891 lp_exec_mask_cond_invert(&bld->exec_mask);
1892 }
1893
endif_stmt(struct lp_build_nir_context * bld_base)1894 static void endif_stmt(struct lp_build_nir_context *bld_base)
1895 {
1896 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1897 lp_exec_mask_cond_pop(&bld->exec_mask);
1898 }
1899
break_stmt(struct lp_build_nir_context * bld_base)1900 static void break_stmt(struct lp_build_nir_context *bld_base)
1901 {
1902 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1903
1904 lp_exec_break(&bld->exec_mask, NULL, false);
1905 }
1906
continue_stmt(struct lp_build_nir_context * bld_base)1907 static void continue_stmt(struct lp_build_nir_context *bld_base)
1908 {
1909 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1910 lp_exec_continue(&bld->exec_mask);
1911 }
1912
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1913 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1914 {
1915 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1916 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1917 LLVMValueRef mask;
1918
1919 if (!cond) {
1920 if (bld->exec_mask.has_mask) {
1921 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1922 } else {
1923 mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1924 }
1925 } else {
1926 mask = LLVMBuildNot(builder, cond, "");
1927 if (bld->exec_mask.has_mask) {
1928 LLVMValueRef invmask;
1929 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1930 mask = LLVMBuildOr(builder, mask, invmask, "");
1931 }
1932 }
1933 lp_build_mask_update(bld->mask, mask);
1934 }
1935
1936 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1937 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1938 LLVMValueRef ptr,
1939 LLVMValueRef mask)
1940 {
1941 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1942 LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
1943
1944 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1945
1946 LLVMBuildStore(builder, current_vec, ptr);
1947 }
1948
1949 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1950 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1951 LLVMValueRef ptr,
1952 LLVMValueRef mask)
1953 {
1954 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1955 LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
1956
1957 current_vec = lp_build_select(&bld_base->uint_bld,
1958 mask,
1959 bld_base->uint_bld.zero,
1960 current_vec);
1961
1962 LLVMBuildStore(builder, current_vec, ptr);
1963 }
1964
1965 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)1966 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1967 LLVMValueRef current_mask_vec,
1968 LLVMValueRef total_emitted_vertices_vec)
1969 {
1970 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1971 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1972 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1973 total_emitted_vertices_vec,
1974 bld->max_output_vertices_vec);
1975
1976 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1977 }
1978
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)1979 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1980 {
1981 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1982 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1983
1984 if (stream_id >= bld->gs_vertex_streams)
1985 return;
1986 assert(bld->gs_iface->emit_vertex);
1987 LLVMValueRef total_emitted_vertices_vec =
1988 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1989 LLVMValueRef mask = mask_vec(bld_base);
1990 mask = clamp_mask_to_max_output_vertices(bld, mask,
1991 total_emitted_vertices_vec);
1992 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1993 bld->outputs,
1994 total_emitted_vertices_vec,
1995 mask,
1996 lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1997
1998 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1999 mask);
2000 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2001 mask);
2002 }
2003
2004 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2005 end_primitive_masked(struct lp_build_nir_context * bld_base,
2006 LLVMValueRef mask, uint32_t stream_id)
2007 {
2008 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2009 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2010
2011 if (stream_id >= bld->gs_vertex_streams)
2012 return;
2013 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2014 LLVMValueRef emitted_vertices_vec =
2015 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2016 LLVMValueRef emitted_prims_vec =
2017 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2018 LLVMValueRef total_emitted_vertices_vec =
2019 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2020
2021 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2022 PIPE_FUNC_NOTEQUAL,
2023 emitted_vertices_vec,
2024 uint_bld->zero);
2025 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2026 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2027 total_emitted_vertices_vec,
2028 emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2029 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2030 mask);
2031 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2032 mask);
2033 }
2034
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2035 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2036 {
2037 ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2038
2039 assert(bld->gs_iface->end_primitive);
2040
2041 LLVMValueRef mask = mask_vec(bld_base);
2042 end_primitive_masked(bld_base, mask, stream_id);
2043 }
2044
2045 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2046 emit_prologue(struct lp_build_nir_soa_context *bld)
2047 {
2048 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2049 if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2050 uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
2051 unsigned index, chan;
2052 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2053 LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2054 bld->inputs_array = lp_build_array_alloca(gallivm,
2055 vec_type, array_size,
2056 "input_array");
2057
2058 for (index = 0; index < num_inputs; ++index) {
2059 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2060 LLVMValueRef lindex =
2061 lp_build_const_int32(gallivm, index * 4 + chan);
2062 LLVMValueRef input_ptr =
2063 LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2064 LLVMValueRef value = bld->inputs[index][chan];
2065 if (value)
2066 LLVMBuildStore(gallivm->builder, value, input_ptr);
2067 }
2068 }
2069 }
2070 }
2071
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2072 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2073 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2074 {
2075 struct gallivm_state * gallivm = bld_base->base.gallivm;
2076 LLVMBuilderRef builder = gallivm->builder;
2077 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2078 LLVMValueRef exec_mask = mask_vec(bld_base);
2079 struct lp_build_loop_state loop_state;
2080 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2081
2082 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2083 LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2084 LLVMValueRef init_val = NULL;
2085 if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2086 instr->intrinsic == nir_intrinsic_vote_feq) {
2087 /* for equal we unfortunately have to loop and find the first valid one. */
2088 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2089 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2090
2091 struct lp_build_if_state ifthen;
2092 lp_build_if(&ifthen, gallivm, if_cond);
2093 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2094 loop_state.counter, "");
2095 LLVMBuildStore(builder, value_ptr, eq_store);
2096 LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2097 lp_build_endif(&ifthen);
2098 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2099 NULL, LLVMIntUGE);
2100 init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2101 } else {
2102 LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2103 }
2104
2105 LLVMValueRef res;
2106 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2107 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2108 loop_state.counter, "");
2109 struct lp_build_if_state ifthen;
2110 LLVMValueRef if_cond;
2111 if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2112
2113 lp_build_if(&ifthen, gallivm, if_cond);
2114 res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2115
2116 if (instr->intrinsic == nir_intrinsic_vote_feq) {
2117 struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2118 LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2119 LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2120 LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2121 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2122 res = LLVMBuildAnd(builder, res, tmp, "");
2123 } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2124 LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2125 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2126 res = LLVMBuildAnd(builder, res, tmp, "");
2127 } else if (instr->intrinsic == nir_intrinsic_vote_any)
2128 res = LLVMBuildOr(builder, res, value_ptr, "");
2129 else
2130 res = LLVMBuildAnd(builder, res, value_ptr, "");
2131 LLVMBuildStore(builder, res, res_store);
2132 lp_build_endif(&ifthen);
2133 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2134 NULL, LLVMIntUGE);
2135 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2136 LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2137 }
2138
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2139 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2140 {
2141 struct gallivm_state * gallivm = bld_base->base.gallivm;
2142 LLVMBuilderRef builder = gallivm->builder;
2143 LLVMValueRef exec_mask = mask_vec(bld_base);
2144 struct lp_build_loop_state loop_state;
2145 src = LLVMBuildAnd(builder, src, exec_mask, "");
2146 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2147 LLVMValueRef res;
2148 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2149 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2150 loop_state.counter, "");
2151 res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2152 res = LLVMBuildOr(builder,
2153 res,
2154 LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2155 LLVMBuildStore(builder, res, res_store);
2156
2157 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2158 NULL, LLVMIntUGE);
2159 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2160 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2161 }
2162
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2163 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2164 {
2165 struct gallivm_state *gallivm = bld_base->base.gallivm;
2166 LLVMBuilderRef builder = gallivm->builder;
2167 LLVMValueRef exec_mask = mask_vec(bld_base);
2168 struct lp_build_loop_state loop_state;
2169
2170 LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2171 LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2172 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2173 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2174 loop_state.counter, "");
2175 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2176 LLVMIntEQ,
2177 value_ptr,
2178 lp_build_const_int32(gallivm, -1), "");
2179 LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2180 LLVMIntEQ,
2181 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2182 lp_build_const_int32(gallivm, 0), "");
2183
2184 cond = LLVMBuildAnd(builder, cond, cond2, "");
2185 struct lp_build_if_state ifthen;
2186 lp_build_if(&ifthen, gallivm, cond);
2187 LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2188 LLVMBuildStore(builder, loop_state.counter, idx_store);
2189 lp_build_endif(&ifthen);
2190 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2191 NULL, LLVMIntUGE);
2192
2193 result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2194 lp_build_const_int32(gallivm, -1),
2195 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2196 "");
2197 }
2198
2199 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2200 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2201 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2202 {
2203 assert(instr->intrinsic == nir_intrinsic_shuffle);
2204
2205 struct gallivm_state *gallivm = bld_base->base.gallivm;
2206 LLVMBuilderRef builder = gallivm->builder;
2207 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2208 uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2209 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2210
2211 bool index_is_constant_data = LLVMIsAConstantAggregateZero(index) || LLVMIsAConstantDataSequential(index) || LLVMIsAUndefValue(index);
2212
2213 if (index_is_constant_data) {
2214 /* freeze `src` in case inactive invocations contain poison */
2215 src = LLVMBuildFreeze(builder, src, "");
2216 result[0] = LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), index, "");
2217 } else if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2218 /* freeze `src` in case inactive invocations contain poison */
2219 src = LLVMBuildFreeze(builder, src, "");
2220 result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2221 } else {
2222 LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2223 struct lp_build_loop_state loop_state;
2224 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2225
2226 LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2227
2228 LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2229 /* freeze `src_value` in case an out-of-bounds index or an index into an
2230 * inactive invocation results in poison
2231 */
2232 src_value = LLVMBuildFreeze(builder, src_value, "");
2233
2234 LLVMValueRef res = LLVMBuildLoad(builder, res_store, "");
2235 res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2236 LLVMBuildStore(builder, res, res_store);
2237
2238 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2239 NULL, LLVMIntUGE);
2240
2241 result[0] = LLVMBuildLoad(builder, res_store, "");
2242 }
2243 }
2244 #endif
2245
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2246 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2247 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2248 {
2249 struct gallivm_state *gallivm = bld_base->base.gallivm;
2250 LLVMBuilderRef builder = gallivm->builder;
2251 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2252 /* can't use llvm reduction intrinsics because of exec_mask */
2253 LLVMValueRef exec_mask = mask_vec(bld_base);
2254 struct lp_build_loop_state loop_state;
2255 nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2256
2257 LLVMValueRef res_store = NULL;
2258 LLVMValueRef scan_store;
2259 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2260
2261 if (instr->intrinsic != nir_intrinsic_reduce)
2262 res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2263
2264 scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2265
2266 struct lp_build_context elem_bld;
2267 bool is_flt = reduction_op == nir_op_fadd ||
2268 reduction_op == nir_op_fmul ||
2269 reduction_op == nir_op_fmin ||
2270 reduction_op == nir_op_fmax;
2271 bool is_unsigned = reduction_op == nir_op_umin ||
2272 reduction_op == nir_op_umax;
2273
2274 struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2275 get_int_bld(bld_base, is_unsigned, bit_size);
2276
2277 lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2278
2279 LLVMValueRef store_val = NULL;
2280 /*
2281 * Put the identity value for the operation into the storage
2282 */
2283 switch (reduction_op) {
2284 case nir_op_fmin: {
2285 LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2286 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2287 store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2288 break;
2289 }
2290 case nir_op_fmax: {
2291 LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2292 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2293 store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2294 break;
2295 }
2296 case nir_op_fmul: {
2297 LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2298 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2299 store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2300 break;
2301 }
2302 case nir_op_umin:
2303 switch (bit_size) {
2304 case 8:
2305 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2306 break;
2307 case 16:
2308 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2309 break;
2310 case 32:
2311 default:
2312 store_val = lp_build_const_int32(gallivm, UINT_MAX);
2313 break;
2314 case 64:
2315 store_val = lp_build_const_int64(gallivm, UINT64_MAX);
2316 break;
2317 }
2318 break;
2319 case nir_op_imin:
2320 switch (bit_size) {
2321 case 8:
2322 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2323 break;
2324 case 16:
2325 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2326 break;
2327 case 32:
2328 default:
2329 store_val = lp_build_const_int32(gallivm, INT_MAX);
2330 break;
2331 case 64:
2332 store_val = lp_build_const_int64(gallivm, INT64_MAX);
2333 break;
2334 }
2335 break;
2336 case nir_op_imax:
2337 switch (bit_size) {
2338 case 8:
2339 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2340 break;
2341 case 16:
2342 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2343 break;
2344 case 32:
2345 default:
2346 store_val = lp_build_const_int32(gallivm, INT_MIN);
2347 break;
2348 case 64:
2349 store_val = lp_build_const_int64(gallivm, INT64_MIN);
2350 break;
2351 }
2352 break;
2353 case nir_op_imul:
2354 switch (bit_size) {
2355 case 8:
2356 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2357 break;
2358 case 16:
2359 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2360 break;
2361 case 32:
2362 default:
2363 store_val = lp_build_const_int32(gallivm, 1);
2364 break;
2365 case 64:
2366 store_val = lp_build_const_int64(gallivm, 1);
2367 break;
2368 }
2369 break;
2370 case nir_op_iand:
2371 switch (bit_size) {
2372 case 8:
2373 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2374 break;
2375 case 16:
2376 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2377 break;
2378 case 32:
2379 default:
2380 store_val = lp_build_const_int32(gallivm, 0xffffffff);
2381 break;
2382 case 64:
2383 store_val = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2384 break;
2385 }
2386 break;
2387 default:
2388 break;
2389 }
2390 if (store_val)
2391 LLVMBuildStore(builder, store_val, scan_store);
2392
2393 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2394
2395 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2396
2397 struct lp_build_if_state ifthen;
2398 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2399 lp_build_if(&ifthen, gallivm, if_cond);
2400 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2401
2402 LLVMValueRef res = NULL;
2403 LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2404 if (instr->intrinsic != nir_intrinsic_reduce)
2405 res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2406
2407 if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2408 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2409
2410 if (is_flt) {
2411 scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2412 value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2413 }
2414 switch (reduction_op) {
2415 case nir_op_fadd:
2416 case nir_op_iadd:
2417 scan_val = lp_build_add(&elem_bld, value, scan_val);
2418 break;
2419 case nir_op_fmul:
2420 case nir_op_imul:
2421 scan_val = lp_build_mul(&elem_bld, value, scan_val);
2422 break;
2423 case nir_op_imin:
2424 case nir_op_umin:
2425 case nir_op_fmin:
2426 scan_val = lp_build_min(&elem_bld, value, scan_val);
2427 break;
2428 case nir_op_imax:
2429 case nir_op_umax:
2430 case nir_op_fmax:
2431 scan_val = lp_build_max(&elem_bld, value, scan_val);
2432 break;
2433 case nir_op_iand:
2434 scan_val = lp_build_and(&elem_bld, value, scan_val);
2435 break;
2436 case nir_op_ior:
2437 scan_val = lp_build_or(&elem_bld, value, scan_val);
2438 break;
2439 case nir_op_ixor:
2440 scan_val = lp_build_xor(&elem_bld, value, scan_val);
2441 break;
2442 default:
2443 assert(0);
2444 break;
2445 }
2446 if (is_flt)
2447 scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2448 LLVMBuildStore(builder, scan_val, scan_store);
2449
2450 if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2451 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2452 }
2453
2454 if (instr->intrinsic != nir_intrinsic_reduce)
2455 LLVMBuildStore(builder, res, res_store);
2456 lp_build_endif(&ifthen);
2457
2458 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2459 NULL, LLVMIntUGE);
2460 if (instr->intrinsic == nir_intrinsic_reduce)
2461 result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad2(builder, int_bld->elem_type, scan_store, ""));
2462 else
2463 result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2464 }
2465
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2466 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2467 LLVMValueRef src,
2468 unsigned bit_size,
2469 LLVMValueRef invoc,
2470 LLVMValueRef result[4])
2471 {
2472 struct gallivm_state *gallivm = bld_base->base.gallivm;
2473 LLVMBuilderRef builder = gallivm->builder;
2474 LLVMValueRef idx;
2475 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2476
2477 /* have to find the first active invocation */
2478 LLVMValueRef exec_mask = mask_vec(bld_base);
2479 struct lp_build_loop_state loop_state;
2480 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2481 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2482 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));
2483
2484 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2485 struct lp_build_if_state ifthen;
2486
2487 lp_build_if(&ifthen, gallivm, if_cond);
2488 LLVMValueRef store_val = loop_state.counter;
2489 if (invoc)
2490 store_val = LLVMBuildExtractElement(gallivm->builder, invoc, loop_state.counter, "");
2491 LLVMBuildStore(builder, store_val, res_store);
2492 lp_build_endif(&ifthen);
2493
2494 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),
2495 lp_build_const_int32(gallivm, -1), LLVMIntEQ);
2496 idx = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2497
2498 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2499 src, idx, "");
2500 result[0] = lp_build_broadcast_scalar(uint_bld, value);
2501 }
2502
2503 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2504 emit_interp_at(struct lp_build_nir_context *bld_base,
2505 unsigned num_components,
2506 nir_variable *var,
2507 bool centroid,
2508 bool sample,
2509 unsigned const_index,
2510 LLVMValueRef indir_index,
2511 LLVMValueRef offsets[2],
2512 LLVMValueRef dst[4])
2513 {
2514 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2515
2516 for (unsigned i = 0; i < num_components; i++) {
2517 dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2518 const_index + var->data.driver_location, i + var->data.location_frac,
2519 centroid, sample, indir_index, offsets);
2520 }
2521 }
2522
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2523 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2524 struct lp_type type,
2525 unsigned scratch_size)
2526 {
2527 LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2528 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2529 unsigned i;
2530
2531 if (type.length == 1)
2532 return LLVMConstInt(elem_type, 0, 0);
2533
2534 for (i = 0; i < type.length; ++i)
2535 elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2536
2537 return LLVMConstVector(elems, type.length);
2538 }
2539
2540 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2541 emit_load_scratch(struct lp_build_nir_context *bld_base,
2542 unsigned nc, unsigned bit_size,
2543 LLVMValueRef offset,
2544 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2545 {
2546 struct gallivm_state * gallivm = bld_base->base.gallivm;
2547 LLVMBuilderRef builder = gallivm->builder;
2548 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2549 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2550 struct lp_build_context *load_bld;
2551 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2552 uint32_t shift_val = bit_size_to_shift_size(bit_size);
2553 LLVMValueRef exec_mask = mask_vec(bld_base);
2554
2555 load_bld = get_int_bld(bld_base, true, bit_size);
2556
2557 offset = lp_build_add(uint_bld, offset, thread_offsets);
2558 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2559 for (unsigned c = 0; c < nc; c++) {
2560 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2561
2562 LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
2563 struct lp_build_loop_state loop_state;
2564 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2565
2566 struct lp_build_if_state ifthen;
2567 LLVMValueRef cond, temp_res;
2568
2569 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2570 loop_state.counter, "");
2571 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2572 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2573
2574 lp_build_if(&ifthen, gallivm, cond);
2575 LLVMValueRef scalar;
2576 LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
2577 scalar = lp_build_pointer_get(builder, ptr2, loop_index);
2578
2579 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result, "");
2580 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
2581 LLVMBuildStore(builder, temp_res, result);
2582 lp_build_else(&ifthen);
2583 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result, "");
2584 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
2585 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
2586 LLVMBuildStore(builder, temp_res, result);
2587 lp_build_endif(&ifthen);
2588 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2589 NULL, LLVMIntUGE);
2590 outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result, "");
2591 }
2592 }
2593
2594 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2595 emit_store_scratch(struct lp_build_nir_context *bld_base,
2596 unsigned writemask, unsigned nc,
2597 unsigned bit_size, LLVMValueRef offset,
2598 LLVMValueRef dst)
2599 {
2600 struct gallivm_state * gallivm = bld_base->base.gallivm;
2601 LLVMBuilderRef builder = gallivm->builder;
2602 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2603 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2604 struct lp_build_context *store_bld;
2605 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2606 uint32_t shift_val = bit_size_to_shift_size(bit_size);
2607 store_bld = get_int_bld(bld_base, true, bit_size);
2608
2609 LLVMValueRef exec_mask = mask_vec(bld_base);
2610 offset = lp_build_add(uint_bld, offset, thread_offsets);
2611 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2612
2613 for (unsigned c = 0; c < nc; c++) {
2614 if (!(writemask & (1u << c)))
2615 continue;
2616 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2617 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2618
2619 struct lp_build_loop_state loop_state;
2620 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2621
2622 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
2623 loop_state.counter, "");
2624 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
2625
2626 struct lp_build_if_state ifthen;
2627 LLVMValueRef cond;
2628
2629 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2630 loop_state.counter, "");
2631
2632 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2633 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2634 lp_build_if(&ifthen, gallivm, cond);
2635
2636 LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
2637 lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
2638
2639 lp_build_endif(&ifthen);
2640 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2641 NULL, LLVMIntUGE);
2642 }
2643 }
2644
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2645 void lp_build_nir_soa(struct gallivm_state *gallivm,
2646 struct nir_shader *shader,
2647 const struct lp_build_tgsi_params *params,
2648 LLVMValueRef (*outputs)[4])
2649 {
2650 struct lp_build_nir_soa_context bld;
2651 const struct lp_type type = params->type;
2652 struct lp_type res_type;
2653
2654 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2655 memset(&res_type, 0, sizeof res_type);
2656 res_type.width = type.width;
2657 res_type.length = type.length;
2658 res_type.sign = 1;
2659
2660 /* Setup build context */
2661 memset(&bld, 0, sizeof bld);
2662 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2663 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2664 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2665 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2666 lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2667 {
2668 struct lp_type dbl_type;
2669 dbl_type = type;
2670 dbl_type.width *= 2;
2671 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2672 }
2673 {
2674 struct lp_type half_type;
2675 half_type = type;
2676 half_type.width /= 2;
2677 lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2678 }
2679 {
2680 struct lp_type uint64_type;
2681 uint64_type = lp_uint_type(type);
2682 uint64_type.width *= 2;
2683 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2684 }
2685 {
2686 struct lp_type int64_type;
2687 int64_type = lp_int_type(type);
2688 int64_type.width *= 2;
2689 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2690 }
2691 {
2692 struct lp_type uint16_type;
2693 uint16_type = lp_uint_type(type);
2694 uint16_type.width /= 2;
2695 lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2696 }
2697 {
2698 struct lp_type int16_type;
2699 int16_type = lp_int_type(type);
2700 int16_type.width /= 2;
2701 lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2702 }
2703 {
2704 struct lp_type uint8_type;
2705 uint8_type = lp_uint_type(type);
2706 uint8_type.width /= 4;
2707 lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2708 }
2709 {
2710 struct lp_type int8_type;
2711 int8_type = lp_int_type(type);
2712 int8_type.width /= 4;
2713 lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2714 }
2715 bld.bld_base.load_var = emit_load_var;
2716 bld.bld_base.store_var = emit_store_var;
2717 bld.bld_base.load_reg = emit_load_reg;
2718 bld.bld_base.store_reg = emit_store_reg;
2719 bld.bld_base.emit_var_decl = emit_var_decl;
2720 bld.bld_base.load_ubo = emit_load_ubo;
2721 bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2722 bld.bld_base.load_global = emit_load_global;
2723 bld.bld_base.store_global = emit_store_global;
2724 bld.bld_base.atomic_global = emit_atomic_global;
2725 bld.bld_base.tex = emit_tex;
2726 bld.bld_base.tex_size = emit_tex_size;
2727 bld.bld_base.bgnloop = bgnloop;
2728 bld.bld_base.endloop = endloop;
2729 bld.bld_base.if_cond = if_cond;
2730 bld.bld_base.else_stmt = else_stmt;
2731 bld.bld_base.endif_stmt = endif_stmt;
2732 bld.bld_base.break_stmt = break_stmt;
2733 bld.bld_base.continue_stmt = continue_stmt;
2734 bld.bld_base.sysval_intrin = emit_sysval_intrin;
2735 bld.bld_base.discard = discard;
2736 bld.bld_base.emit_vertex = emit_vertex;
2737 bld.bld_base.end_primitive = end_primitive;
2738 bld.bld_base.load_mem = emit_load_mem;
2739 bld.bld_base.store_mem = emit_store_mem;
2740 bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2741 bld.bld_base.atomic_mem = emit_atomic_mem;
2742 bld.bld_base.barrier = emit_barrier;
2743 bld.bld_base.image_op = emit_image_op;
2744 bld.bld_base.image_size = emit_image_size;
2745 bld.bld_base.vote = emit_vote;
2746 bld.bld_base.elect = emit_elect;
2747 bld.bld_base.reduce = emit_reduce;
2748 bld.bld_base.ballot = emit_ballot;
2749 #if LLVM_VERSION_MAJOR >= 10
2750 bld.bld_base.shuffle = emit_shuffle;
2751 #endif
2752 bld.bld_base.read_invocation = emit_read_invocation;
2753 bld.bld_base.helper_invocation = emit_helper_invocation;
2754 bld.bld_base.interp_at = emit_interp_at;
2755 bld.bld_base.load_scratch = emit_load_scratch;
2756 bld.bld_base.store_scratch = emit_store_scratch;
2757 bld.bld_base.load_const = emit_load_const;
2758
2759 bld.mask = params->mask;
2760 bld.inputs = params->inputs;
2761 bld.outputs = outputs;
2762 bld.consts_ptr = params->consts_ptr;
2763 bld.const_sizes_ptr = params->const_sizes_ptr;
2764 bld.ssbo_ptr = params->ssbo_ptr;
2765 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2766 bld.sampler = params->sampler;
2767 // bld.bld_base.info = params->info;
2768
2769 bld.context_ptr = params->context_ptr;
2770 bld.thread_data_ptr = params->thread_data_ptr;
2771 bld.bld_base.aniso_filter_table = params->aniso_filter_table;
2772 bld.image = params->image;
2773 bld.shared_ptr = params->shared_ptr;
2774 bld.coro = params->coro;
2775 bld.kernel_args_ptr = params->kernel_args;
2776 bld.indirects = 0;
2777 if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2778 bld.indirects |= nir_var_shader_in;
2779
2780 bld.gs_iface = params->gs_iface;
2781 bld.tcs_iface = params->tcs_iface;
2782 bld.tes_iface = params->tes_iface;
2783 bld.fs_iface = params->fs_iface;
2784 if (bld.gs_iface) {
2785 struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2786
2787 bld.gs_vertex_streams = params->gs_vertex_streams;
2788 bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2789 shader->info.gs.vertices_out);
2790 for (int i = 0; i < params->gs_vertex_streams; i++) {
2791 bld.emitted_prims_vec_ptr[i] =
2792 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2793 bld.emitted_vertices_vec_ptr[i] =
2794 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2795 bld.total_emitted_vertices_vec_ptr[i] =
2796 lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2797 }
2798 }
2799 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2800
2801 bld.system_values = *params->system_values;
2802
2803 bld.bld_base.shader = shader;
2804
2805 bld.scratch_size = ALIGN(shader->scratch_size, 8);
2806 if (shader->scratch_size) {
2807 bld.scratch_ptr = lp_build_array_alloca(gallivm,
2808 LLVMInt8TypeInContext(gallivm->context),
2809 lp_build_const_int32(gallivm, bld.scratch_size * type.length),
2810 "scratch");
2811 }
2812
2813 emit_prologue(&bld);
2814 lp_build_nir_llvm(&bld.bld_base, shader);
2815
2816 if (bld.gs_iface) {
2817 LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2818 LLVMValueRef total_emitted_vertices_vec;
2819 LLVMValueRef emitted_prims_vec;
2820
2821 for (int i = 0; i < params->gs_vertex_streams; i++) {
2822 end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2823
2824 total_emitted_vertices_vec =
2825 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
2826
2827 emitted_prims_vec =
2828 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
2829 bld.gs_iface->gs_epilogue(bld.gs_iface,
2830 total_emitted_vertices_vec,
2831 emitted_prims_vec, i);
2832 }
2833 }
2834 lp_exec_mask_fini(&bld.exec_mask);
2835 }
2836