1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "util/u_math.h"
38
bit_size_to_shift_size(int bit_size)39 static int bit_size_to_shift_size(int bit_size)
40 {
41 switch (bit_size) {
42 case 64:
43 return 3;
44 default:
45 case 32:
46 return 2;
47 case 16:
48 return 1;
49 case 8:
50 return 0;
51 }
52 }
53
54 /*
55 * combine the execution mask if there is one with the current mask.
56 */
57 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)58 mask_vec(struct lp_build_nir_context *bld_base)
59 {
60 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
61 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
62 struct lp_exec_mask *exec_mask = &bld->exec_mask;
63 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
64 if (!exec_mask->has_mask) {
65 return bld_mask;
66 }
67 if (!bld_mask)
68 return exec_mask->exec_mask;
69 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
70 exec_mask->exec_mask, "");
71 }
72
73 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)74 emit_fetch_64bit(
75 struct lp_build_nir_context * bld_base,
76 LLVMValueRef input,
77 LLVMValueRef input2)
78 {
79 struct gallivm_state *gallivm = bld_base->base.gallivm;
80 LLVMBuilderRef builder = gallivm->builder;
81 LLVMValueRef res;
82 int i;
83 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
84 int len = bld_base->base.type.length * 2;
85 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
86
87 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
88 #if UTIL_ARCH_LITTLE_ENDIAN
89 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
90 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
91 #else
92 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
93 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
94 #endif
95 }
96 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
97
98 return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
99 }
100
101 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])102 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
103 LLVMValueRef value,
104 LLVMValueRef split_values[2])
105 {
106 struct gallivm_state *gallivm = bld_base->base.gallivm;
107 LLVMBuilderRef builder = gallivm->builder;
108 unsigned i;
109 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
110 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
111 int len = bld_base->base.type.length * 2;
112
113 value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
114 for (i = 0; i < bld_base->base.type.length; i++) {
115 #if UTIL_ARCH_LITTLE_ENDIAN
116 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
117 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
118 #else
119 shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
120 shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
121 #endif
122 }
123
124 split_values[0] = LLVMBuildShuffleVector(builder, value,
125 LLVMGetUndef(LLVMTypeOf(value)),
126 LLVMConstVector(shuffles,
127 bld_base->base.type.length),
128 "");
129 split_values[1] = LLVMBuildShuffleVector(builder, value,
130 LLVMGetUndef(LLVMTypeOf(value)),
131 LLVMConstVector(shuffles2,
132 bld_base->base.type.length),
133 "");
134 }
135
136 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)137 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
138 LLVMValueRef chan_ptr,
139 LLVMValueRef chan_ptr2,
140 LLVMValueRef value)
141 {
142 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
143 struct lp_build_context *float_bld = &bld_base->base;
144 LLVMValueRef split_vals[2];
145
146 emit_store_64bit_split(bld_base, value, split_vals);
147
148 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
149 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
150 }
151
152 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)153 get_soa_array_offsets(struct lp_build_context *uint_bld,
154 LLVMValueRef indirect_index,
155 int num_components,
156 unsigned chan_index,
157 bool need_perelement_offset)
158 {
159 struct gallivm_state *gallivm = uint_bld->gallivm;
160 LLVMValueRef chan_vec =
161 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
162 LLVMValueRef length_vec =
163 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
164 LLVMValueRef index_vec;
165
166 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
167 index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
168 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
169 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
170
171 if (need_perelement_offset) {
172 LLVMValueRef pixel_offsets;
173 unsigned i;
174 /* build pixel offset vector: {0, 1, 2, 3, ...} */
175 pixel_offsets = uint_bld->undef;
176 for (i = 0; i < uint_bld->type.length; i++) {
177 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
178 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
179 ii, ii, "");
180 }
181 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
182 }
183 return index_vec;
184 }
185
186 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)187 build_gather(struct lp_build_nir_context *bld_base,
188 struct lp_build_context *bld,
189 LLVMValueRef base_ptr,
190 LLVMValueRef indexes,
191 LLVMValueRef overflow_mask,
192 LLVMValueRef indexes2)
193 {
194 struct gallivm_state *gallivm = bld_base->base.gallivm;
195 LLVMBuilderRef builder = gallivm->builder;
196 struct lp_build_context *uint_bld = &bld_base->uint_bld;
197 LLVMValueRef res;
198 unsigned i;
199
200 if (indexes2)
201 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
202 else
203 res = bld->undef;
204 /*
205 * overflow_mask is a vector telling us which channels
206 * in the vector overflowed. We use the overflow behavior for
207 * constant buffers which is defined as:
208 * Out of bounds access to constant buffer returns 0 in all
209 * components. Out of bounds behavior is always with respect
210 * to the size of the buffer bound at that slot.
211 */
212
213 if (overflow_mask) {
214 /*
215 * We avoid per-element control flow here (also due to llvm going crazy,
216 * though I suspect it's better anyway since overflow is likely rare).
217 * Note that since we still fetch from buffers even if num_elements was
218 * zero (in this case we'll fetch from index zero) the jit func callers
219 * MUST provide valid fake constant buffers of size 4x32 (the values do
220 * not matter), otherwise we'd still need (not per element though)
221 * control flow.
222 */
223 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
224 if (indexes2)
225 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
226 }
227
228 /*
229 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
230 */
231 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
232 LLVMValueRef si, di;
233 LLVMValueRef index;
234 LLVMValueRef scalar_ptr, scalar;
235
236 di = lp_build_const_int32(gallivm, i);
237 if (indexes2)
238 si = lp_build_const_int32(gallivm, i >> 1);
239 else
240 si = di;
241
242 if (indexes2 && (i & 1)) {
243 index = LLVMBuildExtractElement(builder,
244 indexes2, si, "");
245 } else {
246 index = LLVMBuildExtractElement(builder,
247 indexes, si, "");
248 }
249 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
250 &index, 1, "gather_ptr");
251 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
252
253 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
254 }
255
256 if (overflow_mask) {
257 if (indexes2) {
258 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
259 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
260 bld_base->dbl_bld.int_vec_type, "");
261 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
262 bld_base->dbl_bld.zero, res);
263 } else
264 res = lp_build_select(bld, overflow_mask, bld->zero, res);
265 }
266
267 return res;
268 }
269
270 /**
271 * Scatter/store vector.
272 */
273 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)274 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
275 LLVMValueRef base_ptr,
276 LLVMValueRef indexes,
277 LLVMValueRef values,
278 struct lp_exec_mask *mask)
279 {
280 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
281 LLVMBuilderRef builder = gallivm->builder;
282 unsigned i;
283 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
284
285 /*
286 * Loop over elements of index_vec, store scalar value.
287 */
288 for (i = 0; i < bld->bld_base.base.type.length; i++) {
289 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
290 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
291 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
292 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
293 LLVMValueRef scalar_pred = pred ?
294 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
295
296 if (0)
297 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
298 ii, val, index, scalar_ptr);
299
300 if (scalar_pred) {
301 LLVMValueRef real_val, dst_val;
302 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
303 scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
304 real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
305 LLVMBuildStore(builder, real_val, scalar_ptr);
306 }
307 else {
308 LLVMBuildStore(builder, val, scalar_ptr);
309 }
310 }
311 }
312
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])313 static void emit_load_var(struct lp_build_nir_context *bld_base,
314 nir_variable_mode deref_mode,
315 unsigned num_components,
316 unsigned bit_size,
317 nir_variable *var,
318 unsigned vertex_index,
319 LLVMValueRef indir_vertex_index,
320 unsigned const_index,
321 LLVMValueRef indir_index,
322 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
323 {
324 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
325 struct gallivm_state *gallivm = bld_base->base.gallivm;
326 int dmul = bit_size == 64 ? 2 : 1;
327 unsigned location = var->data.driver_location;
328 unsigned location_frac = var->data.location_frac;
329
330 if (!var->data.compact && !indir_index)
331 location += const_index;
332 else if (var->data.compact) {
333 location += const_index / 4;
334 location_frac += const_index % 4;
335 const_index = 0;
336 }
337 switch (deref_mode) {
338 case nir_var_shader_in:
339 for (unsigned i = 0; i < num_components; i++) {
340 int idx = (i * dmul) + location_frac;
341 int comp_loc = location;
342
343 if (bit_size == 64 && idx >= 4) {
344 comp_loc++;
345 idx = idx % 4;
346 }
347
348 if (bld->gs_iface) {
349 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
350 LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
351 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
352 LLVMValueRef result2;
353
354 result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
355 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
356 if (bit_size == 64) {
357 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
358 result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
359 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
360 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
361 }
362 } else if (bld->tes_iface) {
363 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
364 LLVMValueRef attrib_index_val;
365 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
366 LLVMValueRef result2;
367
368 if (indir_index) {
369 if (var->data.compact) {
370 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
371 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
372 } else
373 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
374 } else
375 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
376
377 if (var->data.patch) {
378 result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
379 indir_index ? true : false, attrib_index_val, swizzle_index_val);
380 if (bit_size == 64) {
381 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
382 result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
383 indir_index ? true : false, attrib_index_val, swizzle_index_val);
384 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
385 }
386 }
387 else {
388 result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
389 indir_vertex_index ? true : false,
390 indir_vertex_index ? indir_vertex_index : vertex_index_val,
391 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
392 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
393 if (bit_size == 64) {
394 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
395 result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
396 indir_vertex_index ? true : false,
397 indir_vertex_index ? indir_vertex_index : vertex_index_val,
398 indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
399 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
400 }
401 }
402 } else if (bld->tcs_iface) {
403 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
404 LLVMValueRef attrib_index_val;
405 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
406
407 if (indir_index) {
408 if (var->data.compact) {
409 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
410 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
411 } else
412 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
413 } else
414 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
415 result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
416 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
417 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
418 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
419 if (bit_size == 64) {
420 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
421 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
422 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
423 indir_index ? true : false, attrib_index_val,
424 false, swizzle_index_val);
425 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
426 }
427 } else {
428 if (indir_index) {
429 LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
430 LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
431 attrib_index_val, 4, idx,
432 TRUE);
433 LLVMValueRef index_vec2 = NULL;
434 LLVMTypeRef fptr_type;
435 LLVMValueRef inputs_array;
436 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
437 inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
438
439 if (bit_size == 64)
440 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
441 indir_index, 4, idx + 1, TRUE);
442
443 /* Gather values from the input register array */
444 result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
445 } else {
446 if (bld->indirects & nir_var_shader_in) {
447 LLVMValueRef lindex = lp_build_const_int32(gallivm,
448 comp_loc * 4 + idx);
449 LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
450 bld->inputs_array, lindex);
451 if (bit_size == 64) {
452 LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
453 comp_loc * 4 + (idx + 1));
454 LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
455 bld->inputs_array, lindex2);
456 result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
457 } else {
458 result[i] = input_ptr;
459 }
460 } else {
461 if (bit_size == 64) {
462 LLVMValueRef tmp[2];
463 tmp[0] = bld->inputs[comp_loc][idx];
464 tmp[1] = bld->inputs[comp_loc][idx + 1];
465 result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
466 } else {
467 result[i] = bld->inputs[comp_loc][idx];
468 }
469 }
470 }
471 }
472 }
473 break;
474 case nir_var_shader_out:
475 if (bld->fs_iface && bld->fs_iface->fb_fetch) {
476 bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
477 return;
478 }
479 for (unsigned i = 0; i < num_components; i++) {
480 int idx = (i * dmul) + location_frac;
481 if (bld->tcs_iface) {
482 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
483 LLVMValueRef attrib_index_val;
484 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
485
486 if (indir_index)
487 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
488 else
489 attrib_index_val = lp_build_const_int32(gallivm, location);
490
491 result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
492 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
493 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
494 (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
495 if (bit_size == 64) {
496 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
497 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
498 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
499 indir_index ? true : false, attrib_index_val,
500 false, swizzle_index_val, 0);
501 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
502 }
503 }
504 }
505 break;
506 default:
507 break;
508 }
509 }
510
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)511 static void emit_store_chan(struct lp_build_nir_context *bld_base,
512 nir_variable_mode deref_mode,
513 unsigned bit_size,
514 unsigned location, unsigned comp,
515 unsigned chan,
516 LLVMValueRef dst)
517 {
518 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
519 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
520 struct lp_build_context *float_bld = &bld_base->base;
521
522 if (bit_size == 64) {
523 chan *= 2;
524 chan += comp;
525 if (chan >= 4) {
526 chan -= 4;
527 location++;
528 }
529 emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
530 bld->outputs[location][chan + 1], dst);
531 } else {
532 dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
533 lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
534 bld->outputs[location][chan + comp]);
535 }
536 }
537
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)538 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
539 bool is_compact,
540 unsigned bit_size,
541 unsigned location,
542 unsigned const_index,
543 LLVMValueRef indir_vertex_index,
544 LLVMValueRef indir_index,
545 unsigned comp,
546 unsigned chan,
547 LLVMValueRef chan_val)
548 {
549 struct gallivm_state *gallivm = bld_base->base.gallivm;
550 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
551 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
552 unsigned swizzle = chan;
553 if (bit_size == 64) {
554 swizzle *= 2;
555 swizzle += comp;
556 if (swizzle >= 4) {
557 swizzle -= 4;
558 location++;
559 }
560 } else
561 swizzle += comp;
562 LLVMValueRef attrib_index_val;
563 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
564
565 if (indir_index) {
566 if (is_compact) {
567 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
568 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
569 } else
570 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
571 } else
572 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
573 if (bit_size == 64) {
574 LLVMValueRef split_vals[2];
575 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
576 emit_store_64bit_split(bld_base, chan_val, split_vals);
577 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
578 indir_vertex_index ? true : false,
579 indir_vertex_index,
580 indir_index ? true : false,
581 attrib_index_val,
582 false, swizzle_index_val,
583 split_vals[0], mask_vec(bld_base));
584 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
585 indir_vertex_index ? true : false,
586 indir_vertex_index,
587 indir_index ? true : false,
588 attrib_index_val,
589 false, swizzle_index_val2,
590 split_vals[1], mask_vec(bld_base));
591 } else {
592 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
593 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
594 indir_vertex_index ? true : false,
595 indir_vertex_index,
596 indir_index && !is_compact ? true : false,
597 attrib_index_val,
598 indir_index && is_compact ? true : false,
599 swizzle_index_val,
600 chan_val, mask_vec(bld_base));
601 }
602 }
603
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)604 static void emit_store_var(struct lp_build_nir_context *bld_base,
605 nir_variable_mode deref_mode,
606 unsigned num_components,
607 unsigned bit_size,
608 nir_variable *var,
609 unsigned writemask,
610 LLVMValueRef indir_vertex_index,
611 unsigned const_index,
612 LLVMValueRef indir_index,
613 LLVMValueRef dst)
614 {
615 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
616 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
617 switch (deref_mode) {
618 case nir_var_shader_out: {
619 unsigned location = var->data.driver_location;
620 unsigned comp = var->data.location_frac;
621 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
622 if (var->data.location == FRAG_RESULT_STENCIL)
623 comp = 1;
624 else if (var->data.location == FRAG_RESULT_DEPTH)
625 comp = 2;
626 }
627
628 if (var->data.compact) {
629 location += const_index / 4;
630 comp += const_index % 4;
631 const_index = 0;
632 }
633
634 for (unsigned chan = 0; chan < num_components; chan++) {
635 if (writemask & (1u << chan)) {
636 LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
637 if (bld->tcs_iface) {
638 emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
639 } else
640 emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
641 }
642 }
643 break;
644 }
645 default:
646 break;
647 }
648 }
649
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_src * reg,LLVMValueRef indir_src,LLVMValueRef reg_storage)650 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
651 struct lp_build_context *reg_bld,
652 const nir_reg_src *reg,
653 LLVMValueRef indir_src,
654 LLVMValueRef reg_storage)
655 {
656 struct gallivm_state *gallivm = bld_base->base.gallivm;
657 LLVMBuilderRef builder = gallivm->builder;
658 int nc = reg->reg->num_components;
659 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
660 struct lp_build_context *uint_bld = &bld_base->uint_bld;
661 if (reg->reg->num_array_elems) {
662 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
663 if (reg->indirect) {
664 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
665 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
666 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
667 }
668 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
669 for (unsigned i = 0; i < nc; i++) {
670 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
671 vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
672 }
673 } else {
674 for (unsigned i = 0; i < nc; i++) {
675 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
676 lp_build_const_int32(gallivm, i));
677 vals[i] = LLVMBuildLoad(builder, this_storage, "");
678 }
679 }
680 return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
681 }
682
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_dest * reg,unsigned writemask,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])683 static void emit_store_reg(struct lp_build_nir_context *bld_base,
684 struct lp_build_context *reg_bld,
685 const nir_reg_dest *reg,
686 unsigned writemask,
687 LLVMValueRef indir_src,
688 LLVMValueRef reg_storage,
689 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
690 {
691 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
692 struct gallivm_state *gallivm = bld_base->base.gallivm;
693 LLVMBuilderRef builder = gallivm->builder;
694 struct lp_build_context *uint_bld = &bld_base->uint_bld;
695 int nc = reg->reg->num_components;
696 if (reg->reg->num_array_elems > 0) {
697 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
698 if (reg->indirect) {
699 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
700 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
701 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
702 }
703 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
704 for (unsigned i = 0; i < nc; i++) {
705 if (!(writemask & (1 << i)))
706 continue;
707 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
708 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
709 emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
710 }
711 return;
712 }
713
714 for (unsigned i = 0; i < nc; i++) {
715 LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
716 lp_build_const_int32(gallivm, i));
717 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
718 lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
719 }
720 }
721
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])722 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
723 unsigned nc,
724 unsigned bit_size,
725 unsigned offset_bit_size,
726 bool offset_is_uniform,
727 LLVMValueRef offset,
728 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
729 {
730 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
731 struct gallivm_state *gallivm = bld_base->base.gallivm;
732 LLVMBuilderRef builder = gallivm->builder;
733 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
734 LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
735 unsigned size_shift = bit_size_to_shift_size(bit_size);
736 struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
737 if (size_shift)
738 offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
739
740 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
741 kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
742
743 if (offset_is_uniform) {
744 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
745
746 for (unsigned c = 0; c < nc; c++) {
747 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
748
749 LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
750 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
751 }
752 }
753 }
754
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)755 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
756 {
757 LLVMBuilderRef builder = gallivm->builder;
758 switch (bit_size) {
759 case 8:
760 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
761 break;
762 case 16:
763 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
764 break;
765 case 32:
766 default:
767 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
768 break;
769 case 64:
770 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
771 break;
772 }
773 return addr_ptr;
774 }
775
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])776 static void emit_load_global(struct lp_build_nir_context *bld_base,
777 unsigned nc,
778 unsigned bit_size,
779 unsigned addr_bit_size,
780 LLVMValueRef addr,
781 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
782 {
783 struct gallivm_state *gallivm = bld_base->base.gallivm;
784 LLVMBuilderRef builder = gallivm->builder;
785 struct lp_build_context *uint_bld = &bld_base->uint_bld;
786 struct lp_build_context *res_bld;
787
788 res_bld = get_int_bld(bld_base, true, bit_size);
789
790 for (unsigned c = 0; c < nc; c++) {
791 LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
792 LLVMValueRef exec_mask = mask_vec(bld_base);
793 struct lp_build_loop_state loop_state;
794 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
795
796 struct lp_build_if_state ifthen;
797 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
798 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
799 lp_build_if(&ifthen, gallivm, cond);
800
801 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
802 loop_state.counter, "");
803 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
804
805 LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
806
807 LLVMValueRef temp_res;
808 temp_res = LLVMBuildLoad(builder, result, "");
809 temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
810 LLVMBuildStore(builder, temp_res, result);
811 lp_build_endif(&ifthen);
812 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
813 NULL, LLVMIntUGE);
814 outval[c] = LLVMBuildLoad(builder, result, "");
815 }
816 }
817
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)818 static void emit_store_global(struct lp_build_nir_context *bld_base,
819 unsigned writemask,
820 unsigned nc, unsigned bit_size,
821 unsigned addr_bit_size,
822 LLVMValueRef addr,
823 LLVMValueRef dst)
824 {
825 struct gallivm_state *gallivm = bld_base->base.gallivm;
826 LLVMBuilderRef builder = gallivm->builder;
827 struct lp_build_context *uint_bld = &bld_base->uint_bld;
828
829 for (unsigned c = 0; c < nc; c++) {
830 if (!(writemask & (1u << c)))
831 continue;
832 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
833
834 LLVMValueRef exec_mask = mask_vec(bld_base);
835 struct lp_build_loop_state loop_state;
836 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
837 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
838 loop_state.counter, "");
839
840 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
841 loop_state.counter, "");
842 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
843 switch (bit_size) {
844 case 8:
845 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
846 break;
847 case 16:
848 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
849 break;
850 case 32:
851 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
852 break;
853 case 64:
854 value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
855 break;
856 default:
857 break;
858 }
859 struct lp_build_if_state ifthen;
860
861 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
862 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
863 lp_build_if(&ifthen, gallivm, cond);
864 lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
865 lp_build_endif(&ifthen);
866 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
867 NULL, LLVMIntUGE);
868 }
869 }
870
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)871 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
872 nir_intrinsic_op nir_op,
873 unsigned addr_bit_size,
874 unsigned val_bit_size,
875 LLVMValueRef addr,
876 LLVMValueRef val, LLVMValueRef val2,
877 LLVMValueRef *result)
878 {
879 struct gallivm_state *gallivm = bld_base->base.gallivm;
880 LLVMBuilderRef builder = gallivm->builder;
881 struct lp_build_context *uint_bld = &bld_base->uint_bld;
882 struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
883 LLVMValueRef atom_res = lp_build_alloca(gallivm,
884 LLVMTypeOf(val), "");
885 LLVMValueRef exec_mask = mask_vec(bld_base);
886 struct lp_build_loop_state loop_state;
887 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
888
889 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
890 loop_state.counter, "");
891
892 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
893 loop_state.counter, "");
894 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
895 struct lp_build_if_state ifthen;
896 LLVMValueRef cond, temp_res;
897 LLVMValueRef scalar;
898 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
899 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
900 lp_build_if(&ifthen, gallivm, cond);
901
902 addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
903 if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
904 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
905 loop_state.counter, "");
906 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
907 scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
908 cas_src_ptr,
909 LLVMAtomicOrderingSequentiallyConsistent,
910 LLVMAtomicOrderingSequentiallyConsistent,
911 false);
912 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
913 } else {
914 LLVMAtomicRMWBinOp op;
915 switch (nir_op) {
916 case nir_intrinsic_global_atomic_add:
917 op = LLVMAtomicRMWBinOpAdd;
918 break;
919 case nir_intrinsic_global_atomic_exchange:
920
921 op = LLVMAtomicRMWBinOpXchg;
922 break;
923 case nir_intrinsic_global_atomic_and:
924 op = LLVMAtomicRMWBinOpAnd;
925 break;
926 case nir_intrinsic_global_atomic_or:
927 op = LLVMAtomicRMWBinOpOr;
928 break;
929 case nir_intrinsic_global_atomic_xor:
930 op = LLVMAtomicRMWBinOpXor;
931 break;
932 case nir_intrinsic_global_atomic_umin:
933 op = LLVMAtomicRMWBinOpUMin;
934 break;
935 case nir_intrinsic_global_atomic_umax:
936 op = LLVMAtomicRMWBinOpUMax;
937 break;
938 case nir_intrinsic_global_atomic_imin:
939 op = LLVMAtomicRMWBinOpMin;
940 break;
941 case nir_intrinsic_global_atomic_imax:
942 op = LLVMAtomicRMWBinOpMax;
943 break;
944 default:
945 unreachable("unknown atomic op");
946 }
947
948 scalar = LLVMBuildAtomicRMW(builder, op,
949 addr_ptr, value_ptr,
950 LLVMAtomicOrderingSequentiallyConsistent,
951 false);
952 }
953 temp_res = LLVMBuildLoad(builder, atom_res, "");
954 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
955 LLVMBuildStore(builder, temp_res, atom_res);
956 lp_build_else(&ifthen);
957 temp_res = LLVMBuildLoad(builder, atom_res, "");
958 bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;
959 LLVMValueRef zero_val;
960 if (is_float) {
961 if (val_bit_size == 64)
962 zero_val = lp_build_const_double(gallivm, 0);
963 else
964 zero_val = lp_build_const_float(gallivm, 0);
965 } else {
966 if (val_bit_size == 64)
967 zero_val = lp_build_const_int64(gallivm, 0);
968 else
969 zero_val = lp_build_const_int32(gallivm, 0);
970 }
971
972 temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
973 LLVMBuildStore(builder, temp_res, atom_res);
974 lp_build_endif(&ifthen);
975 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
976 NULL, LLVMIntUGE);
977 *result = LLVMBuildLoad(builder, atom_res, "");
978 }
979
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])980 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
981 unsigned nc,
982 unsigned bit_size,
983 bool offset_is_uniform,
984 LLVMValueRef index,
985 LLVMValueRef offset,
986 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
987 {
988 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
989 struct gallivm_state *gallivm = bld_base->base.gallivm;
990 LLVMBuilderRef builder = gallivm->builder;
991 struct lp_build_context *uint_bld = &bld_base->uint_bld;
992 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
993 LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
994 unsigned size_shift = bit_size_to_shift_size(bit_size);
995 if (size_shift)
996 offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
997
998 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
999 consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1000
1001 if (offset_is_uniform) {
1002 offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
1003
1004 for (unsigned c = 0; c < nc; c++) {
1005 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1006
1007 LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset);
1008 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
1009 }
1010 } else {
1011 LLVMValueRef overflow_mask;
1012 LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
1013
1014 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1015 if (bit_size == 64)
1016 num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1017 else if (bit_size == 16)
1018 num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1019 else if (bit_size == 8)
1020 num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1021
1022 for (unsigned c = 0; c < nc; c++) {
1023 LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1024 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1025 this_offset, num_consts);
1026 result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1027 }
1028 }
1029 }
1030
1031
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1032 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1033 unsigned nc,
1034 unsigned bit_size,
1035 LLVMValueRef index,
1036 LLVMValueRef offset,
1037 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1038 {
1039 struct gallivm_state *gallivm = bld_base->base.gallivm;
1040 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1041 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1042 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1043 LLVMValueRef ssbo_limit = NULL;
1044 struct lp_build_context *load_bld;
1045 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1046
1047 load_bld = get_int_bld(bld_base, true, bit_size);
1048
1049 offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1050
1051 /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1052
1053 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1054 for (unsigned c = 0; c < nc; c++)
1055 result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1056
1057 LLVMValueRef exec_mask = mask_vec(bld_base);
1058 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1059 struct lp_build_loop_state loop_state;
1060 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1061 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1062 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1063
1064 struct lp_build_if_state exec_ifthen;
1065 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1066
1067 LLVMValueRef mem_ptr;
1068
1069 if (index) {
1070 LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, "");
1071 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1072 LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1073 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1074 mem_ptr = ssbo_ptr;
1075 } else
1076 mem_ptr = bld->shared_ptr;
1077
1078 for (unsigned c = 0; c < nc; c++) {
1079 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1080 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1081 if (ssbo_limit) {
1082 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1083 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1084 }
1085
1086 struct lp_build_if_state ifthen;
1087 LLVMValueRef fetch_cond, temp_res;
1088
1089 fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1090
1091 lp_build_if(&ifthen, gallivm, fetch_cond);
1092 LLVMValueRef scalar;
1093 if (bit_size != 32) {
1094 LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
1095 scalar = lp_build_pointer_get(builder, mem_ptr2, loop_index);
1096 } else
1097 scalar = lp_build_pointer_get(builder, mem_ptr, loop_index);
1098
1099 temp_res = LLVMBuildLoad(builder, result[c], "");
1100 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1101 LLVMBuildStore(builder, temp_res, result[c]);
1102 lp_build_else(&ifthen);
1103 temp_res = LLVMBuildLoad(builder, result[c], "");
1104 LLVMValueRef zero;
1105 if (bit_size == 64)
1106 zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
1107 else if (bit_size == 16)
1108 zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
1109 else if (bit_size == 8)
1110 zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
1111 else
1112 zero = lp_build_const_int32(gallivm, 0);
1113 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1114 LLVMBuildStore(builder, temp_res, result[c]);
1115 lp_build_endif(&ifthen);
1116 }
1117
1118 lp_build_endif(&exec_ifthen);
1119 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1120 NULL, LLVMIntUGE);
1121 for (unsigned c = 0; c < nc; c++)
1122 outval[c] = LLVMBuildLoad(gallivm->builder, result[c], "");
1123
1124 }
1125
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1126 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1127 unsigned writemask,
1128 unsigned nc,
1129 unsigned bit_size,
1130 LLVMValueRef index,
1131 LLVMValueRef offset,
1132 LLVMValueRef dst)
1133 {
1134 struct gallivm_state *gallivm = bld_base->base.gallivm;
1135 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1136 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1137 LLVMValueRef mem_ptr;
1138 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1139 LLVMValueRef ssbo_limit = NULL;
1140 struct lp_build_context *store_bld;
1141 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1142 store_bld = get_int_bld(bld_base, true, bit_size);
1143
1144 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1145
1146 LLVMValueRef exec_mask = mask_vec(bld_base);
1147 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1148 struct lp_build_loop_state loop_state;
1149 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1150 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1151 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1152
1153 struct lp_build_if_state exec_ifthen;
1154 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1155
1156 if (index) {
1157 LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, "");
1158 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1159 LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1160 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1161 mem_ptr = ssbo_ptr;
1162 } else
1163 mem_ptr = bld->shared_ptr;
1164
1165 for (unsigned c = 0; c < nc; c++) {
1166 if (!(writemask & (1u << c)))
1167 continue;
1168 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1169 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1170 LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1171
1172 if (ssbo_limit) {
1173 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1174 do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1175 }
1176
1177 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1178 loop_state.counter, "");
1179 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1180 struct lp_build_if_state ifthen;
1181 LLVMValueRef store_cond;
1182
1183 store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1184 lp_build_if(&ifthen, gallivm, store_cond);
1185 if (bit_size != 32) {
1186 LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
1187 lp_build_pointer_set(builder, mem_ptr2, loop_index, value_ptr);
1188 } else
1189 lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1190 lp_build_endif(&ifthen);
1191 }
1192
1193 lp_build_endif(&exec_ifthen);
1194 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1195 NULL, LLVMIntUGE);
1196
1197 }
1198
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,uint32_t bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1199 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1200 nir_intrinsic_op nir_op,
1201 uint32_t bit_size,
1202 LLVMValueRef index, LLVMValueRef offset,
1203 LLVMValueRef val, LLVMValueRef val2,
1204 LLVMValueRef *result)
1205 {
1206 struct gallivm_state *gallivm = bld_base->base.gallivm;
1207 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1208 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1209 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1210 LLVMValueRef ssbo_limit = NULL;
1211 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1212 struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
1213
1214 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1215 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1216 atomic_bld->vec_type, "");
1217
1218 LLVMValueRef exec_mask = mask_vec(bld_base);
1219 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1220 struct lp_build_loop_state loop_state;
1221 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1222 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1223 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
1224
1225 struct lp_build_if_state exec_ifthen;
1226 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1227
1228 LLVMValueRef mem_ptr;
1229 if (index) {
1230 LLVMValueRef ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, loop_state.counter, "");
1231 LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, ssbo_idx);
1232 LLVMValueRef ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, ssbo_idx);
1233 ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1234 mem_ptr = ssbo_ptr;
1235 } else
1236 mem_ptr = bld->shared_ptr;
1237
1238 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1239 if (ssbo_limit) {
1240 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1241 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1242 }
1243
1244 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1245 loop_state.counter, "");
1246 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1247
1248 LLVMValueRef scalar_ptr;
1249 if (bit_size != 32) {
1250 LLVMValueRef mem_ptr2 = LLVMBuildBitCast(builder, mem_ptr, LLVMPointerType(atomic_bld->elem_type, 0), "");
1251 scalar_ptr = LLVMBuildGEP(builder, mem_ptr2, &loop_offset, 1, "");
1252 } else
1253 scalar_ptr = LLVMBuildGEP(builder, mem_ptr, &loop_offset, 1, "");
1254
1255 struct lp_build_if_state ifthen;
1256 LLVMValueRef inner_cond, temp_res;
1257 LLVMValueRef scalar;
1258
1259 inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1260 lp_build_if(&ifthen, gallivm, inner_cond);
1261
1262 if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1263 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1264 loop_state.counter, "");
1265 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1266 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1267 cas_src_ptr,
1268 LLVMAtomicOrderingSequentiallyConsistent,
1269 LLVMAtomicOrderingSequentiallyConsistent,
1270 false);
1271 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1272 } else {
1273 LLVMAtomicRMWBinOp op;
1274
1275 switch (nir_op) {
1276 case nir_intrinsic_shared_atomic_add:
1277 case nir_intrinsic_ssbo_atomic_add:
1278 op = LLVMAtomicRMWBinOpAdd;
1279 break;
1280 case nir_intrinsic_shared_atomic_exchange:
1281 case nir_intrinsic_ssbo_atomic_exchange:
1282 op = LLVMAtomicRMWBinOpXchg;
1283 break;
1284 case nir_intrinsic_shared_atomic_and:
1285 case nir_intrinsic_ssbo_atomic_and:
1286 op = LLVMAtomicRMWBinOpAnd;
1287 break;
1288 case nir_intrinsic_shared_atomic_or:
1289 case nir_intrinsic_ssbo_atomic_or:
1290 op = LLVMAtomicRMWBinOpOr;
1291 break;
1292 case nir_intrinsic_shared_atomic_xor:
1293 case nir_intrinsic_ssbo_atomic_xor:
1294 op = LLVMAtomicRMWBinOpXor;
1295 break;
1296 case nir_intrinsic_shared_atomic_umin:
1297 case nir_intrinsic_ssbo_atomic_umin:
1298 op = LLVMAtomicRMWBinOpUMin;
1299 break;
1300 case nir_intrinsic_shared_atomic_umax:
1301 case nir_intrinsic_ssbo_atomic_umax:
1302 op = LLVMAtomicRMWBinOpUMax;
1303 break;
1304 case nir_intrinsic_ssbo_atomic_imin:
1305 case nir_intrinsic_shared_atomic_imin:
1306 op = LLVMAtomicRMWBinOpMin;
1307 break;
1308 case nir_intrinsic_ssbo_atomic_imax:
1309 case nir_intrinsic_shared_atomic_imax:
1310 op = LLVMAtomicRMWBinOpMax;
1311 break;
1312 default:
1313 unreachable("unknown atomic op");
1314 }
1315 scalar = LLVMBuildAtomicRMW(builder, op,
1316 scalar_ptr, value_ptr,
1317 LLVMAtomicOrderingSequentiallyConsistent,
1318 false);
1319 }
1320 temp_res = LLVMBuildLoad(builder, atom_res, "");
1321 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1322 LLVMBuildStore(builder, temp_res, atom_res);
1323 lp_build_else(&ifthen);
1324 temp_res = LLVMBuildLoad(builder, atom_res, "");
1325 LLVMValueRef zero = bit_size == 64 ? lp_build_const_int64(gallivm, 0) : lp_build_const_int32(gallivm, 0);
1326 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1327 LLVMBuildStore(builder, temp_res, atom_res);
1328 lp_build_endif(&ifthen);
1329
1330 lp_build_endif(&exec_ifthen);
1331 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1332 NULL, LLVMIntUGE);
1333 *result = LLVMBuildLoad(builder, atom_res, "");
1334 }
1335
emit_barrier(struct lp_build_nir_context * bld_base)1336 static void emit_barrier(struct lp_build_nir_context *bld_base)
1337 {
1338 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1339 struct gallivm_state * gallivm = bld_base->base.gallivm;
1340
1341 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1342
1343 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1344 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1345 }
1346
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1347 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1348 LLVMValueRef index)
1349 {
1350 struct gallivm_state *gallivm = bld_base->base.gallivm;
1351 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1352 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1353 struct lp_build_context *bld_broad = &bld_base->uint_bld;
1354 LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1355 LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1356 return lp_build_broadcast_scalar(bld_broad, size_ptr);
1357 }
1358
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1359 static void emit_image_op(struct lp_build_nir_context *bld_base,
1360 struct lp_img_params *params)
1361 {
1362 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1363 struct gallivm_state *gallivm = bld_base->base.gallivm;
1364
1365 params->type = bld_base->base.type;
1366 params->context_ptr = bld->context_ptr;
1367 params->thread_data_ptr = bld->thread_data_ptr;
1368 params->exec_mask = mask_vec(bld_base);
1369
1370 if (params->image_index_offset)
1371 params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1372 lp_build_const_int32(gallivm, 0), "");
1373
1374 bld->image->emit_op(bld->image,
1375 bld->bld_base.base.gallivm,
1376 params);
1377
1378 }
1379
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1380 static void emit_image_size(struct lp_build_nir_context *bld_base,
1381 struct lp_sampler_size_query_params *params)
1382 {
1383 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1384 struct gallivm_state *gallivm = bld_base->base.gallivm;
1385
1386 params->int_type = bld_base->int_bld.type;
1387 params->context_ptr = bld->context_ptr;
1388
1389 if (params->texture_unit_offset)
1390 params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1391 lp_build_const_int32(gallivm, 0), "");
1392 bld->image->emit_size_query(bld->image,
1393 bld->bld_base.base.gallivm,
1394 params);
1395
1396 }
1397
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1398 static void init_var_slots(struct lp_build_nir_context *bld_base,
1399 nir_variable *var, unsigned sc)
1400 {
1401 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1402 unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1403
1404 if (!bld->outputs)
1405 return;
1406 for (unsigned comp = sc; comp < slots + sc; comp++) {
1407 unsigned this_loc = var->data.driver_location + (comp / 4);
1408 unsigned this_chan = comp % 4;
1409
1410 if (!bld->outputs[this_loc][this_chan])
1411 bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1412 bld_base->base.vec_type, "output");
1413 }
1414 }
1415
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1416 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1417 nir_variable *var)
1418 {
1419 unsigned sc = var->data.location_frac;
1420 switch (var->data.mode) {
1421 case nir_var_shader_out: {
1422 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1423 if (var->data.location == FRAG_RESULT_STENCIL)
1424 sc = 1;
1425 else if (var->data.location == FRAG_RESULT_DEPTH)
1426 sc = 2;
1427 }
1428 init_var_slots(bld_base, var, sc);
1429 break;
1430 }
1431 default:
1432 break;
1433 }
1434 }
1435
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1436 static void emit_tex(struct lp_build_nir_context *bld_base,
1437 struct lp_sampler_params *params)
1438 {
1439 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1440 struct gallivm_state *gallivm = bld_base->base.gallivm;
1441
1442 params->type = bld_base->base.type;
1443 params->context_ptr = bld->context_ptr;
1444 params->thread_data_ptr = bld->thread_data_ptr;
1445
1446 if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1447 /* this is horrible but this can be dynamic */
1448 LLVMValueRef coords[5];
1449 LLVMValueRef *orig_texel_ptr;
1450 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1451 LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1452 LLVMGetUndef(bld_base->base.vec_type),
1453 LLVMGetUndef(bld_base->base.vec_type),
1454 LLVMGetUndef(bld_base->base.vec_type) };
1455 LLVMValueRef texel[4], orig_offset, orig_lod;
1456 unsigned i;
1457 orig_texel_ptr = params->texel;
1458 orig_lod = params->lod;
1459 for (i = 0; i < 5; i++) {
1460 coords[i] = params->coords[i];
1461 }
1462 orig_offset = params->texture_index_offset;
1463
1464 for (unsigned v = 0; v < uint_bld->type.length; v++) {
1465 LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1466 LLVMValueRef new_coords[5];
1467 for (i = 0; i < 5; i++) {
1468 new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1469 coords[i], idx, "");
1470 }
1471 params->coords = new_coords;
1472 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1473 orig_offset,
1474 idx, "");
1475 params->type = lp_elem_type(bld_base->base.type);
1476
1477 if (orig_lod)
1478 params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1479 params->texel = texel;
1480 bld->sampler->emit_tex_sample(bld->sampler,
1481 gallivm,
1482 params);
1483
1484 for (i = 0; i < 4; i++) {
1485 result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1486 }
1487 }
1488 for (i = 0; i < 4; i++) {
1489 orig_texel_ptr[i] = result[i];
1490 }
1491 return;
1492 }
1493
1494 if (params->texture_index_offset)
1495 params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1496 params->texture_index_offset,
1497 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1498
1499 params->type = bld_base->base.type;
1500 bld->sampler->emit_tex_sample(bld->sampler,
1501 bld->bld_base.base.gallivm,
1502 params);
1503 }
1504
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1505 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1506 struct lp_sampler_size_query_params *params)
1507 {
1508 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1509
1510 params->int_type = bld_base->int_bld.type;
1511 params->context_ptr = bld->context_ptr;
1512
1513 if (params->texture_unit_offset)
1514 params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1515 params->texture_unit_offset,
1516 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1517 bld->sampler->emit_size_query(bld->sampler,
1518 bld->bld_base.base.gallivm,
1519 params);
1520 }
1521
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1522 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1523 nir_intrinsic_instr *instr,
1524 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1525 {
1526 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1527 struct gallivm_state *gallivm = bld_base->base.gallivm;
1528 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1529 switch (instr->intrinsic) {
1530 case nir_intrinsic_load_instance_id:
1531 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1532 break;
1533 case nir_intrinsic_load_base_instance:
1534 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1535 break;
1536 case nir_intrinsic_load_base_vertex:
1537 result[0] = bld->system_values.basevertex;
1538 break;
1539 case nir_intrinsic_load_first_vertex:
1540 result[0] = bld->system_values.firstvertex;
1541 break;
1542 case nir_intrinsic_load_vertex_id:
1543 result[0] = bld->system_values.vertex_id;
1544 break;
1545 case nir_intrinsic_load_primitive_id:
1546 result[0] = bld->system_values.prim_id;
1547 break;
1548 case nir_intrinsic_load_workgroup_id: {
1549 LLVMValueRef tmp[3];
1550 for (unsigned i = 0; i < 3; i++) {
1551 tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1552 if (instr->dest.ssa.bit_size == 64)
1553 tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1554 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1555 }
1556 break;
1557 }
1558 case nir_intrinsic_load_local_invocation_id:
1559 for (unsigned i = 0; i < 3; i++)
1560 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1561 break;
1562 case nir_intrinsic_load_local_invocation_index: {
1563 LLVMValueRef tmp, tmp2;
1564 tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 1), ""));
1565 tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1566 tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1567 tmp = lp_build_mul(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 2, ""));
1568
1569 tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, 0), ""));
1570 tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 1, ""));
1571 tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1572 tmp = lp_build_add(&bld_base->uint_bld, tmp, LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, 0, ""));
1573 result[0] = tmp;
1574 break;
1575 }
1576 case nir_intrinsic_load_num_workgroups: {
1577 LLVMValueRef tmp[3];
1578 for (unsigned i = 0; i < 3; i++) {
1579 tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1580 if (instr->dest.ssa.bit_size == 64)
1581 tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1582 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1583 }
1584 break;
1585 }
1586 case nir_intrinsic_load_invocation_id:
1587 if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1588 result[0] = bld->system_values.invocation_id;
1589 else
1590 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1591 break;
1592 case nir_intrinsic_load_front_face:
1593 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1594 break;
1595 case nir_intrinsic_load_draw_id:
1596 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1597 break;
1598 default:
1599 break;
1600 case nir_intrinsic_load_workgroup_size:
1601 for (unsigned i = 0; i < 3; i++)
1602 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1603 break;
1604 case nir_intrinsic_load_work_dim:
1605 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1606 break;
1607 case nir_intrinsic_load_tess_coord:
1608 for (unsigned i = 0; i < 3; i++) {
1609 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1610 }
1611 break;
1612 case nir_intrinsic_load_tess_level_outer:
1613 for (unsigned i = 0; i < 4; i++)
1614 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1615 break;
1616 case nir_intrinsic_load_tess_level_inner:
1617 for (unsigned i = 0; i < 2; i++)
1618 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1619 break;
1620 case nir_intrinsic_load_patch_vertices_in:
1621 result[0] = bld->system_values.vertices_in;
1622 break;
1623 case nir_intrinsic_load_sample_id:
1624 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1625 break;
1626 case nir_intrinsic_load_sample_pos:
1627 for (unsigned i = 0; i < 2; i++) {
1628 LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1629 idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1630 LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1631 result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1632 }
1633 break;
1634 case nir_intrinsic_load_sample_mask_in:
1635 result[0] = bld->system_values.sample_mask_in;
1636 break;
1637 case nir_intrinsic_load_view_index:
1638 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1639 break;
1640 case nir_intrinsic_load_subgroup_invocation: {
1641 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1642 for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1643 elems[i] = lp_build_const_int32(gallivm, i);
1644 result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1645 break;
1646 }
1647 case nir_intrinsic_load_subgroup_id:
1648 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1649 break;
1650 case nir_intrinsic_load_num_subgroups:
1651 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
1652 break;
1653 }
1654 }
1655
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)1656 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1657 LLVMValueRef *dst)
1658 {
1659 struct gallivm_state *gallivm = bld_base->base.gallivm;
1660 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1661 *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1662 }
1663
bgnloop(struct lp_build_nir_context * bld_base)1664 static void bgnloop(struct lp_build_nir_context *bld_base)
1665 {
1666 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1667 lp_exec_bgnloop(&bld->exec_mask, true);
1668 }
1669
endloop(struct lp_build_nir_context * bld_base)1670 static void endloop(struct lp_build_nir_context *bld_base)
1671 {
1672 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1673 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1674 }
1675
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1676 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1677 {
1678 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1679 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1680 lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1681 }
1682
else_stmt(struct lp_build_nir_context * bld_base)1683 static void else_stmt(struct lp_build_nir_context *bld_base)
1684 {
1685 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1686 lp_exec_mask_cond_invert(&bld->exec_mask);
1687 }
1688
endif_stmt(struct lp_build_nir_context * bld_base)1689 static void endif_stmt(struct lp_build_nir_context *bld_base)
1690 {
1691 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1692 lp_exec_mask_cond_pop(&bld->exec_mask);
1693 }
1694
break_stmt(struct lp_build_nir_context * bld_base)1695 static void break_stmt(struct lp_build_nir_context *bld_base)
1696 {
1697 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1698
1699 lp_exec_break(&bld->exec_mask, NULL, false);
1700 }
1701
continue_stmt(struct lp_build_nir_context * bld_base)1702 static void continue_stmt(struct lp_build_nir_context *bld_base)
1703 {
1704 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1705 lp_exec_continue(&bld->exec_mask);
1706 }
1707
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1708 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1709 {
1710 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1711 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1712 LLVMValueRef mask;
1713
1714 if (!cond) {
1715 if (bld->exec_mask.has_mask) {
1716 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1717 } else {
1718 mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1719 }
1720 } else {
1721 mask = LLVMBuildNot(builder, cond, "");
1722 if (bld->exec_mask.has_mask) {
1723 LLVMValueRef invmask;
1724 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1725 mask = LLVMBuildOr(builder, mask, invmask, "");
1726 }
1727 }
1728 lp_build_mask_update(bld->mask, mask);
1729 }
1730
1731 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1732 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1733 LLVMValueRef ptr,
1734 LLVMValueRef mask)
1735 {
1736 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1737 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1738
1739 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1740
1741 LLVMBuildStore(builder, current_vec, ptr);
1742 }
1743
1744 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1745 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1746 LLVMValueRef ptr,
1747 LLVMValueRef mask)
1748 {
1749 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1750 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1751
1752 current_vec = lp_build_select(&bld_base->uint_bld,
1753 mask,
1754 bld_base->uint_bld.zero,
1755 current_vec);
1756
1757 LLVMBuildStore(builder, current_vec, ptr);
1758 }
1759
1760 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)1761 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1762 LLVMValueRef current_mask_vec,
1763 LLVMValueRef total_emitted_vertices_vec)
1764 {
1765 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1766 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1767 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1768 total_emitted_vertices_vec,
1769 bld->max_output_vertices_vec);
1770
1771 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1772 }
1773
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)1774 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1775 {
1776 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1777 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1778
1779 if (stream_id >= bld->gs_vertex_streams)
1780 return;
1781 assert(bld->gs_iface->emit_vertex);
1782 LLVMValueRef total_emitted_vertices_vec =
1783 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1784 LLVMValueRef mask = mask_vec(bld_base);
1785 mask = clamp_mask_to_max_output_vertices(bld, mask,
1786 total_emitted_vertices_vec);
1787 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1788 bld->outputs,
1789 total_emitted_vertices_vec,
1790 mask,
1791 lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1792
1793 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1794 mask);
1795 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1796 mask);
1797 }
1798
1799 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)1800 end_primitive_masked(struct lp_build_nir_context * bld_base,
1801 LLVMValueRef mask, uint32_t stream_id)
1802 {
1803 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1804 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1805
1806 if (stream_id >= bld->gs_vertex_streams)
1807 return;
1808 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1809 LLVMValueRef emitted_vertices_vec =
1810 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1811 LLVMValueRef emitted_prims_vec =
1812 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1813 LLVMValueRef total_emitted_vertices_vec =
1814 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1815
1816 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1817 PIPE_FUNC_NOTEQUAL,
1818 emitted_vertices_vec,
1819 uint_bld->zero);
1820 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1821 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1822 total_emitted_vertices_vec,
1823 emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
1824 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1825 mask);
1826 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1827 mask);
1828 }
1829
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)1830 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1831 {
1832 ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1833
1834 assert(bld->gs_iface->end_primitive);
1835
1836 LLVMValueRef mask = mask_vec(bld_base);
1837 end_primitive_masked(bld_base, mask, stream_id);
1838 }
1839
1840 static void
emit_prologue(struct lp_build_nir_soa_context * bld)1841 emit_prologue(struct lp_build_nir_soa_context *bld)
1842 {
1843 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1844 if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1845 uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1846 unsigned index, chan;
1847 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1848 LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
1849 bld->inputs_array = lp_build_array_alloca(gallivm,
1850 vec_type, array_size,
1851 "input_array");
1852
1853 for (index = 0; index < num_inputs; ++index) {
1854 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1855 LLVMValueRef lindex =
1856 lp_build_const_int32(gallivm, index * 4 + chan);
1857 LLVMValueRef input_ptr =
1858 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1859 &lindex, 1, "");
1860 LLVMValueRef value = bld->inputs[index][chan];
1861 if (value)
1862 LLVMBuildStore(gallivm->builder, value, input_ptr);
1863 }
1864 }
1865 }
1866 }
1867
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])1868 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
1869 nir_intrinsic_instr *instr, LLVMValueRef result[4])
1870 {
1871 struct gallivm_state * gallivm = bld_base->base.gallivm;
1872 LLVMBuilderRef builder = gallivm->builder;
1873 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
1874 LLVMValueRef exec_mask = mask_vec(bld_base);
1875 struct lp_build_loop_state loop_state;
1876 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1877
1878 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
1879 LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
1880 LLVMValueRef init_val = NULL;
1881 if (instr->intrinsic == nir_intrinsic_vote_ieq ||
1882 instr->intrinsic == nir_intrinsic_vote_feq) {
1883 /* for equal we unfortunately have to loop and find the first valid one. */
1884 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1885 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1886
1887 struct lp_build_if_state ifthen;
1888 lp_build_if(&ifthen, gallivm, if_cond);
1889 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1890 loop_state.counter, "");
1891 LLVMBuildStore(builder, value_ptr, eq_store);
1892 LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
1893 lp_build_endif(&ifthen);
1894 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1895 NULL, LLVMIntUGE);
1896 init_val = LLVMBuildLoad(builder, eq_store, "");
1897 } else {
1898 LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
1899 }
1900
1901 LLVMValueRef res;
1902 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1903 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1904 loop_state.counter, "");
1905 struct lp_build_if_state ifthen;
1906 LLVMValueRef if_cond;
1907 if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1908
1909 lp_build_if(&ifthen, gallivm, if_cond);
1910 res = LLVMBuildLoad(builder, res_store, "");
1911
1912 if (instr->intrinsic == nir_intrinsic_vote_feq) {
1913 struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
1914 LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
1915 LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
1916 LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
1917 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1918 res = LLVMBuildAnd(builder, res, tmp, "");
1919 } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1920 LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
1921 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1922 res = LLVMBuildAnd(builder, res, tmp, "");
1923 } else if (instr->intrinsic == nir_intrinsic_vote_any)
1924 res = LLVMBuildOr(builder, res, value_ptr, "");
1925 else
1926 res = LLVMBuildAnd(builder, res, value_ptr, "");
1927 LLVMBuildStore(builder, res, res_store);
1928 lp_build_endif(&ifthen);
1929 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1930 NULL, LLVMIntUGE);
1931 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1932 }
1933
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])1934 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
1935 {
1936 struct gallivm_state * gallivm = bld_base->base.gallivm;
1937 LLVMBuilderRef builder = gallivm->builder;
1938 LLVMValueRef exec_mask = mask_vec(bld_base);
1939 struct lp_build_loop_state loop_state;
1940 src = LLVMBuildAnd(builder, src, exec_mask, "");
1941 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1942 LLVMValueRef res;
1943 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1944 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1945 loop_state.counter, "");
1946 res = LLVMBuildLoad(builder, res_store, "");
1947 res = LLVMBuildOr(builder,
1948 res,
1949 LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
1950 LLVMBuildStore(builder, res, res_store);
1951
1952 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1953 NULL, LLVMIntUGE);
1954 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1955 }
1956
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])1957 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
1958 {
1959 struct gallivm_state *gallivm = bld_base->base.gallivm;
1960 LLVMBuilderRef builder = gallivm->builder;
1961 LLVMValueRef exec_mask = mask_vec(bld_base);
1962 struct lp_build_loop_state loop_state;
1963
1964 LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1965 LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1966 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1967 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
1968 loop_state.counter, "");
1969 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
1970 LLVMIntEQ,
1971 value_ptr,
1972 lp_build_const_int32(gallivm, -1), "");
1973 LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
1974 LLVMIntEQ,
1975 LLVMBuildLoad(builder, found_store, ""),
1976 lp_build_const_int32(gallivm, 0), "");
1977
1978 cond = LLVMBuildAnd(builder, cond, cond2, "");
1979 struct lp_build_if_state ifthen;
1980 lp_build_if(&ifthen, gallivm, cond);
1981 LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
1982 LLVMBuildStore(builder, loop_state.counter, idx_store);
1983 lp_build_endif(&ifthen);
1984 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1985 NULL, LLVMIntUGE);
1986
1987 result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
1988 lp_build_const_int32(gallivm, -1),
1989 LLVMBuildLoad(builder, idx_store, ""),
1990 "");
1991 }
1992
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])1993 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
1994 nir_intrinsic_instr *instr, LLVMValueRef result[4])
1995 {
1996 struct gallivm_state *gallivm = bld_base->base.gallivm;
1997 LLVMBuilderRef builder = gallivm->builder;
1998 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
1999 /* can't use llvm reduction intrinsics because of exec_mask */
2000 LLVMValueRef exec_mask = mask_vec(bld_base);
2001 struct lp_build_loop_state loop_state;
2002 nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2003
2004 LLVMValueRef res_store = NULL;
2005 LLVMValueRef scan_store;
2006 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2007
2008 if (instr->intrinsic != nir_intrinsic_reduce)
2009 res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2010
2011 scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2012
2013 struct lp_build_context elem_bld;
2014 bool is_flt = reduction_op == nir_op_fadd ||
2015 reduction_op == nir_op_fmul ||
2016 reduction_op == nir_op_fmin ||
2017 reduction_op == nir_op_fmax;
2018 bool is_unsigned = reduction_op == nir_op_umin ||
2019 reduction_op == nir_op_umax;
2020
2021 struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2022 get_int_bld(bld_base, is_unsigned, bit_size);
2023
2024 lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2025
2026 LLVMValueRef store_val = NULL;
2027 /*
2028 * Put the identity value for the operation into the storage
2029 */
2030 switch (reduction_op) {
2031 case nir_op_fmin: {
2032 LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2033 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2034 store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2035 break;
2036 }
2037 case nir_op_fmax: {
2038 LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2039 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2040 store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2041 break;
2042 }
2043 case nir_op_fmul: {
2044 LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2045 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2046 store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2047 break;
2048 }
2049 case nir_op_umin:
2050 switch (bit_size) {
2051 case 8:
2052 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2053 break;
2054 case 16:
2055 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2056 break;
2057 case 32:
2058 default:
2059 store_val = lp_build_const_int32(gallivm, UINT_MAX);
2060 break;
2061 case 64:
2062 store_val = lp_build_const_int64(gallivm, UINT64_MAX);
2063 break;
2064 }
2065 break;
2066 case nir_op_imin:
2067 switch (bit_size) {
2068 case 8:
2069 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2070 break;
2071 case 16:
2072 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2073 break;
2074 case 32:
2075 default:
2076 store_val = lp_build_const_int32(gallivm, INT_MAX);
2077 break;
2078 case 64:
2079 store_val = lp_build_const_int64(gallivm, INT64_MAX);
2080 break;
2081 }
2082 break;
2083 case nir_op_imax:
2084 switch (bit_size) {
2085 case 8:
2086 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2087 break;
2088 case 16:
2089 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2090 break;
2091 case 32:
2092 default:
2093 store_val = lp_build_const_int32(gallivm, INT_MIN);
2094 break;
2095 case 64:
2096 store_val = lp_build_const_int64(gallivm, INT64_MIN);
2097 break;
2098 }
2099 break;
2100 case nir_op_imul:
2101 switch (bit_size) {
2102 case 8:
2103 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2104 break;
2105 case 16:
2106 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2107 break;
2108 case 32:
2109 default:
2110 store_val = lp_build_const_int32(gallivm, 1);
2111 break;
2112 case 64:
2113 store_val = lp_build_const_int64(gallivm, 1);
2114 break;
2115 }
2116 break;
2117 case nir_op_iand:
2118 switch (bit_size) {
2119 case 8:
2120 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2121 break;
2122 case 16:
2123 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2124 break;
2125 case 32:
2126 default:
2127 store_val = lp_build_const_int32(gallivm, 0xffffffff);
2128 break;
2129 case 64:
2130 store_val = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2131 break;
2132 }
2133 break;
2134 default:
2135 break;
2136 }
2137 if (store_val)
2138 LLVMBuildStore(builder, store_val, scan_store);
2139
2140 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2141
2142 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2143
2144 struct lp_build_if_state ifthen;
2145 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2146 lp_build_if(&ifthen, gallivm, if_cond);
2147 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2148
2149 LLVMValueRef res = NULL;
2150 LLVMValueRef scan_val = LLVMBuildLoad(gallivm->builder, scan_store, "");
2151 if (instr->intrinsic != nir_intrinsic_reduce)
2152 res = LLVMBuildLoad(gallivm->builder, res_store, "");
2153
2154 if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2155 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2156
2157 if (is_flt) {
2158 scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2159 value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2160 }
2161 switch (reduction_op) {
2162 case nir_op_fadd:
2163 case nir_op_iadd:
2164 scan_val = lp_build_add(&elem_bld, value, scan_val);
2165 break;
2166 case nir_op_fmul:
2167 case nir_op_imul:
2168 scan_val = lp_build_mul(&elem_bld, value, scan_val);
2169 break;
2170 case nir_op_imin:
2171 case nir_op_umin:
2172 case nir_op_fmin:
2173 scan_val = lp_build_min(&elem_bld, value, scan_val);
2174 break;
2175 case nir_op_imax:
2176 case nir_op_umax:
2177 case nir_op_fmax:
2178 scan_val = lp_build_max(&elem_bld, value, scan_val);
2179 break;
2180 case nir_op_iand:
2181 scan_val = lp_build_and(&elem_bld, value, scan_val);
2182 break;
2183 case nir_op_ior:
2184 scan_val = lp_build_or(&elem_bld, value, scan_val);
2185 break;
2186 case nir_op_ixor:
2187 scan_val = lp_build_xor(&elem_bld, value, scan_val);
2188 break;
2189 default:
2190 assert(0);
2191 break;
2192 }
2193 if (is_flt)
2194 scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2195 LLVMBuildStore(builder, scan_val, scan_store);
2196
2197 if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2198 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2199 }
2200
2201 if (instr->intrinsic != nir_intrinsic_reduce)
2202 LLVMBuildStore(builder, res, res_store);
2203 lp_build_endif(&ifthen);
2204
2205 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2206 NULL, LLVMIntUGE);
2207 if (instr->intrinsic == nir_intrinsic_reduce)
2208 result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad(builder, scan_store, ""));
2209 else
2210 result[0] = LLVMBuildLoad(builder, res_store, "");
2211 }
2212
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2213 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2214 LLVMValueRef src,
2215 unsigned bit_size,
2216 LLVMValueRef invoc,
2217 LLVMValueRef result[4])
2218 {
2219 struct gallivm_state *gallivm = bld_base->base.gallivm;
2220 LLVMBuilderRef builder = gallivm->builder;
2221 LLVMValueRef idx;
2222 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2223
2224 /* have to find the first active invocation */
2225 LLVMValueRef exec_mask = mask_vec(bld_base);
2226 struct lp_build_loop_state loop_state;
2227 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2228 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2229 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));
2230
2231 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2232 struct lp_build_if_state ifthen;
2233
2234 lp_build_if(&ifthen, gallivm, if_cond);
2235 LLVMValueRef store_val = loop_state.counter;
2236 if (invoc)
2237 store_val = LLVMBuildExtractElement(gallivm->builder, invoc, loop_state.counter, "");
2238 LLVMBuildStore(builder, store_val, res_store);
2239 lp_build_endif(&ifthen);
2240
2241 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),
2242 lp_build_const_int32(gallivm, -1), LLVMIntEQ);
2243 idx = LLVMBuildLoad(builder, res_store, "");
2244
2245 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2246 src, idx, "");
2247 result[0] = lp_build_broadcast_scalar(uint_bld, value);
2248 }
2249
2250 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2251 emit_interp_at(struct lp_build_nir_context *bld_base,
2252 unsigned num_components,
2253 nir_variable *var,
2254 bool centroid,
2255 bool sample,
2256 unsigned const_index,
2257 LLVMValueRef indir_index,
2258 LLVMValueRef offsets[2],
2259 LLVMValueRef dst[4])
2260 {
2261 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2262
2263 for (unsigned i = 0; i < num_components; i++) {
2264 dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2265 const_index + var->data.driver_location, i + var->data.location_frac,
2266 centroid, sample, indir_index, offsets);
2267 }
2268 }
2269
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2270 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2271 struct lp_type type,
2272 unsigned scratch_size)
2273 {
2274 LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2275 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2276 unsigned i;
2277
2278 if (type.length == 1)
2279 return LLVMConstInt(elem_type, 0, 0);
2280
2281 for (i = 0; i < type.length; ++i)
2282 elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2283
2284 return LLVMConstVector(elems, type.length);
2285 }
2286
2287 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2288 emit_load_scratch(struct lp_build_nir_context *bld_base,
2289 unsigned nc, unsigned bit_size,
2290 LLVMValueRef offset,
2291 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2292 {
2293 struct gallivm_state * gallivm = bld_base->base.gallivm;
2294 LLVMBuilderRef builder = gallivm->builder;
2295 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2296 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2297 struct lp_build_context *load_bld;
2298 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2299 uint32_t shift_val = bit_size_to_shift_size(bit_size);
2300
2301 load_bld = get_int_bld(bld_base, true, bit_size);
2302
2303 offset = lp_build_add(uint_bld, offset, thread_offsets);
2304 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2305 for (unsigned c = 0; c < nc; c++) {
2306 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2307 LLVMValueRef exec_mask = mask_vec(bld_base);
2308
2309 LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
2310 struct lp_build_loop_state loop_state;
2311 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2312
2313 struct lp_build_if_state ifthen;
2314 LLVMValueRef cond, temp_res;
2315
2316 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2317 loop_state.counter, "");
2318 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2319 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2320
2321 lp_build_if(&ifthen, gallivm, cond);
2322 LLVMValueRef scalar;
2323 LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
2324 scalar = lp_build_pointer_get(builder, ptr2, loop_index);
2325
2326 temp_res = LLVMBuildLoad(builder, result, "");
2327 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
2328 LLVMBuildStore(builder, temp_res, result);
2329 lp_build_else(&ifthen);
2330 temp_res = LLVMBuildLoad(builder, result, "");
2331 LLVMValueRef zero;
2332 if (bit_size == 64)
2333 zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
2334 else if (bit_size == 16)
2335 zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
2336 else if (bit_size == 8)
2337 zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
2338 else
2339 zero = lp_build_const_int32(gallivm, 0);
2340 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
2341 LLVMBuildStore(builder, temp_res, result);
2342 lp_build_endif(&ifthen);
2343 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2344 NULL, LLVMIntUGE);
2345 outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
2346 }
2347 }
2348
2349 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2350 emit_store_scratch(struct lp_build_nir_context *bld_base,
2351 unsigned writemask, unsigned nc,
2352 unsigned bit_size, LLVMValueRef offset,
2353 LLVMValueRef dst)
2354 {
2355 struct gallivm_state * gallivm = bld_base->base.gallivm;
2356 LLVMBuilderRef builder = gallivm->builder;
2357 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2358 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2359 struct lp_build_context *store_bld;
2360 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2361 uint32_t shift_val = bit_size_to_shift_size(bit_size);
2362 store_bld = get_int_bld(bld_base, true, bit_size);
2363
2364 LLVMValueRef exec_mask = mask_vec(bld_base);
2365 offset = lp_build_add(uint_bld, offset, thread_offsets);
2366 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2367
2368 for (unsigned c = 0; c < nc; c++) {
2369 if (!(writemask & (1u << c)))
2370 continue;
2371 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2372 LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2373
2374 struct lp_build_loop_state loop_state;
2375 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2376
2377 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
2378 loop_state.counter, "");
2379 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
2380
2381 struct lp_build_if_state ifthen;
2382 LLVMValueRef cond;
2383
2384 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2385 loop_state.counter, "");
2386
2387 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2388 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2389 lp_build_if(&ifthen, gallivm, cond);
2390
2391 LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
2392 lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
2393
2394 lp_build_endif(&ifthen);
2395 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2396 NULL, LLVMIntUGE);
2397 }
2398 }
2399
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2400 void lp_build_nir_soa(struct gallivm_state *gallivm,
2401 struct nir_shader *shader,
2402 const struct lp_build_tgsi_params *params,
2403 LLVMValueRef (*outputs)[4])
2404 {
2405 struct lp_build_nir_soa_context bld;
2406 struct lp_type type = params->type;
2407 struct lp_type res_type;
2408
2409 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2410 memset(&res_type, 0, sizeof res_type);
2411 res_type.width = type.width;
2412 res_type.length = type.length;
2413 res_type.sign = 1;
2414
2415 /* Setup build context */
2416 memset(&bld, 0, sizeof bld);
2417 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2418 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2419 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2420 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2421 lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2422 {
2423 struct lp_type dbl_type;
2424 dbl_type = type;
2425 dbl_type.width *= 2;
2426 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2427 }
2428 {
2429 struct lp_type half_type;
2430 half_type = type;
2431 half_type.width /= 2;
2432 lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2433 }
2434 {
2435 struct lp_type uint64_type;
2436 uint64_type = lp_uint_type(type);
2437 uint64_type.width *= 2;
2438 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2439 }
2440 {
2441 struct lp_type int64_type;
2442 int64_type = lp_int_type(type);
2443 int64_type.width *= 2;
2444 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2445 }
2446 {
2447 struct lp_type uint16_type;
2448 uint16_type = lp_uint_type(type);
2449 uint16_type.width /= 2;
2450 lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2451 }
2452 {
2453 struct lp_type int16_type;
2454 int16_type = lp_int_type(type);
2455 int16_type.width /= 2;
2456 lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2457 }
2458 {
2459 struct lp_type uint8_type;
2460 uint8_type = lp_uint_type(type);
2461 uint8_type.width /= 4;
2462 lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2463 }
2464 {
2465 struct lp_type int8_type;
2466 int8_type = lp_int_type(type);
2467 int8_type.width /= 4;
2468 lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2469 }
2470 bld.bld_base.load_var = emit_load_var;
2471 bld.bld_base.store_var = emit_store_var;
2472 bld.bld_base.load_reg = emit_load_reg;
2473 bld.bld_base.store_reg = emit_store_reg;
2474 bld.bld_base.emit_var_decl = emit_var_decl;
2475 bld.bld_base.load_ubo = emit_load_ubo;
2476 bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2477 bld.bld_base.load_global = emit_load_global;
2478 bld.bld_base.store_global = emit_store_global;
2479 bld.bld_base.atomic_global = emit_atomic_global;
2480 bld.bld_base.tex = emit_tex;
2481 bld.bld_base.tex_size = emit_tex_size;
2482 bld.bld_base.bgnloop = bgnloop;
2483 bld.bld_base.endloop = endloop;
2484 bld.bld_base.if_cond = if_cond;
2485 bld.bld_base.else_stmt = else_stmt;
2486 bld.bld_base.endif_stmt = endif_stmt;
2487 bld.bld_base.break_stmt = break_stmt;
2488 bld.bld_base.continue_stmt = continue_stmt;
2489 bld.bld_base.sysval_intrin = emit_sysval_intrin;
2490 bld.bld_base.discard = discard;
2491 bld.bld_base.emit_vertex = emit_vertex;
2492 bld.bld_base.end_primitive = end_primitive;
2493 bld.bld_base.load_mem = emit_load_mem;
2494 bld.bld_base.store_mem = emit_store_mem;
2495 bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2496 bld.bld_base.atomic_mem = emit_atomic_mem;
2497 bld.bld_base.barrier = emit_barrier;
2498 bld.bld_base.image_op = emit_image_op;
2499 bld.bld_base.image_size = emit_image_size;
2500 bld.bld_base.vote = emit_vote;
2501 bld.bld_base.elect = emit_elect;
2502 bld.bld_base.reduce = emit_reduce;
2503 bld.bld_base.ballot = emit_ballot;
2504 bld.bld_base.read_invocation = emit_read_invocation;
2505 bld.bld_base.helper_invocation = emit_helper_invocation;
2506 bld.bld_base.interp_at = emit_interp_at;
2507 bld.bld_base.load_scratch = emit_load_scratch;
2508 bld.bld_base.store_scratch = emit_store_scratch;
2509
2510 bld.mask = params->mask;
2511 bld.inputs = params->inputs;
2512 bld.outputs = outputs;
2513 bld.consts_ptr = params->consts_ptr;
2514 bld.const_sizes_ptr = params->const_sizes_ptr;
2515 bld.ssbo_ptr = params->ssbo_ptr;
2516 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2517 bld.sampler = params->sampler;
2518 // bld.bld_base.info = params->info;
2519
2520 bld.context_ptr = params->context_ptr;
2521 bld.thread_data_ptr = params->thread_data_ptr;
2522 bld.bld_base.aniso_filter_table = params->aniso_filter_table;
2523 bld.image = params->image;
2524 bld.shared_ptr = params->shared_ptr;
2525 bld.coro = params->coro;
2526 bld.kernel_args_ptr = params->kernel_args;
2527 bld.indirects = 0;
2528 if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2529 bld.indirects |= nir_var_shader_in;
2530
2531 bld.gs_iface = params->gs_iface;
2532 bld.tcs_iface = params->tcs_iface;
2533 bld.tes_iface = params->tes_iface;
2534 bld.fs_iface = params->fs_iface;
2535 if (bld.gs_iface) {
2536 struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2537
2538 bld.gs_vertex_streams = params->gs_vertex_streams;
2539 bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2540 shader->info.gs.vertices_out);
2541 for (int i = 0; i < params->gs_vertex_streams; i++) {
2542 bld.emitted_prims_vec_ptr[i] =
2543 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2544 bld.emitted_vertices_vec_ptr[i] =
2545 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2546 bld.total_emitted_vertices_vec_ptr[i] =
2547 lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2548 }
2549 }
2550 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2551
2552 bld.system_values = *params->system_values;
2553
2554 bld.bld_base.shader = shader;
2555
2556 if (shader->scratch_size) {
2557 bld.scratch_ptr = lp_build_array_alloca(gallivm,
2558 LLVMInt8TypeInContext(gallivm->context),
2559 lp_build_const_int32(gallivm, shader->scratch_size * type.length),
2560 "scratch");
2561 }
2562 bld.scratch_size = shader->scratch_size;
2563 emit_prologue(&bld);
2564 lp_build_nir_llvm(&bld.bld_base, shader);
2565
2566 if (bld.gs_iface) {
2567 LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2568 LLVMValueRef total_emitted_vertices_vec;
2569 LLVMValueRef emitted_prims_vec;
2570
2571 for (int i = 0; i < params->gs_vertex_streams; i++) {
2572 end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2573
2574 total_emitted_vertices_vec =
2575 LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
2576
2577 emitted_prims_vec =
2578 LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
2579 bld.gs_iface->gs_epilogue(bld.gs_iface,
2580 total_emitted_vertices_vec,
2581 emitted_prims_vec, i);
2582 }
2583 }
2584 lp_exec_mask_fini(&bld.exec_mask);
2585 }
2586