1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_arit.h"
28 #include "lp_bld_bitarit.h"
29 #include "lp_bld_const.h"
30 #include "lp_bld_conv.h"
31 #include "lp_bld_gather.h"
32 #include "lp_bld_logic.h"
33 #include "lp_bld_quad.h"
34 #include "lp_bld_flow.h"
35 #include "lp_bld_intr.h"
36 #include "lp_bld_struct.h"
37 #include "lp_bld_debug.h"
38 #include "lp_bld_printf.h"
39 #include "nir_deref.h"
40 #include "nir_search_helpers.h"
41
42
43 // Doing AOS (and linear) codegen?
44 static bool
is_aos(const struct lp_build_nir_context * bld_base)45 is_aos(const struct lp_build_nir_context *bld_base)
46 {
47 // AOS is used for vectors of uint8[16]
48 return bld_base->base.type.length == 16 && bld_base->base.type.width == 8;
49 }
50
51
52 static void
53 visit_cf_list(struct lp_build_nir_context *bld_base,
54 struct exec_list *list);
55
56
57 static LLVMValueRef
cast_type(struct lp_build_nir_context * bld_base,LLVMValueRef val,nir_alu_type alu_type,unsigned bit_size)58 cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
59 nir_alu_type alu_type, unsigned bit_size)
60 {
61 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
62 switch (alu_type) {
63 case nir_type_float:
64 switch (bit_size) {
65 case 16:
66 return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
67 case 32:
68 return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
69 case 64:
70 return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
71 default:
72 assert(0);
73 break;
74 }
75 break;
76 case nir_type_int:
77 switch (bit_size) {
78 case 8:
79 return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
80 case 16:
81 return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
82 case 32:
83 return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
84 case 64:
85 return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
86 default:
87 assert(0);
88 break;
89 }
90 break;
91 case nir_type_uint:
92 switch (bit_size) {
93 case 8:
94 return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
95 case 16:
96 return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
97 case 1:
98 case 32:
99 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
100 case 64:
101 return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
102 default:
103 assert(0);
104 break;
105 }
106 break;
107 case nir_type_uint32:
108 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
109 default:
110 return val;
111 }
112 return NULL;
113 }
114
115
116 static unsigned
glsl_sampler_to_pipe(int sampler_dim,bool is_array)117 glsl_sampler_to_pipe(int sampler_dim, bool is_array)
118 {
119 unsigned pipe_target = PIPE_BUFFER;
120 switch (sampler_dim) {
121 case GLSL_SAMPLER_DIM_1D:
122 pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
123 break;
124 case GLSL_SAMPLER_DIM_2D:
125 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
126 break;
127 case GLSL_SAMPLER_DIM_SUBPASS:
128 case GLSL_SAMPLER_DIM_SUBPASS_MS:
129 pipe_target = PIPE_TEXTURE_2D_ARRAY;
130 break;
131 case GLSL_SAMPLER_DIM_3D:
132 pipe_target = PIPE_TEXTURE_3D;
133 break;
134 case GLSL_SAMPLER_DIM_MS:
135 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
136 break;
137 case GLSL_SAMPLER_DIM_CUBE:
138 pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
139 break;
140 case GLSL_SAMPLER_DIM_RECT:
141 pipe_target = PIPE_TEXTURE_RECT;
142 break;
143 case GLSL_SAMPLER_DIM_BUF:
144 pipe_target = PIPE_BUFFER;
145 break;
146 default:
147 break;
148 }
149 return pipe_target;
150 }
151
152
get_ssa_src(struct lp_build_nir_context * bld_base,nir_ssa_def * ssa)153 static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
154 {
155 return bld_base->ssa_defs[ssa->index];
156 }
157
158
159 static LLVMValueRef
160 get_src(struct lp_build_nir_context *bld_base, nir_src src);
161
162
163 static LLVMValueRef
get_reg_src(struct lp_build_nir_context * bld_base,nir_reg_src src)164 get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
165 {
166 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
167 LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
168 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
169 LLVMValueRef indir_src = NULL;
170 if (src.indirect)
171 indir_src = get_src(bld_base, *src.indirect);
172 return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
173 }
174
175
176 static LLVMValueRef
get_src(struct lp_build_nir_context * bld_base,nir_src src)177 get_src(struct lp_build_nir_context *bld_base, nir_src src)
178 {
179 if (src.is_ssa)
180 return get_ssa_src(bld_base, src.ssa);
181 else
182 return get_reg_src(bld_base, src.reg);
183 }
184
185
186 static void
assign_ssa(struct lp_build_nir_context * bld_base,int idx,LLVMValueRef ptr)187 assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
188 {
189 bld_base->ssa_defs[idx] = ptr;
190 }
191
192
193 static void
assign_ssa_dest(struct lp_build_nir_context * bld_base,const nir_ssa_def * ssa,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])194 assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
195 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
196 {
197 if ((ssa->num_components == 1 || is_aos(bld_base))) {
198 assign_ssa(bld_base, ssa->index, vals[0]);
199 } else {
200 assign_ssa(bld_base, ssa->index,
201 lp_nir_array_build_gather_values(bld_base->base.gallivm->builder,
202 vals, ssa->num_components));
203 }
204 }
205
206
207 static void
assign_reg(struct lp_build_nir_context * bld_base,const nir_reg_dest * reg,unsigned write_mask,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])208 assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
209 unsigned write_mask,
210 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
211 {
212 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
213 LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
214 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
215 LLVMValueRef indir_src = NULL;
216 if (reg->indirect)
217 indir_src = get_src(bld_base, *reg->indirect);
218 bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
219 }
220
221
222 static void
assign_dest(struct lp_build_nir_context * bld_base,const nir_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])223 assign_dest(struct lp_build_nir_context *bld_base,
224 const nir_dest *dest,
225 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
226 {
227 if (dest->is_ssa)
228 assign_ssa_dest(bld_base, &dest->ssa, vals);
229 else
230 assign_reg(bld_base, &dest->reg, 0, vals);
231 }
232
233
234 static void
assign_alu_dest(struct lp_build_nir_context * bld_base,const nir_alu_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])235 assign_alu_dest(struct lp_build_nir_context *bld_base,
236 const nir_alu_dest *dest,
237 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
238 {
239 if (dest->dest.is_ssa)
240 assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
241 else
242 assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
243 }
244
245
246 static LLVMValueRef
int_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,bool is_unsigned,LLVMValueRef val)247 int_to_bool32(struct lp_build_nir_context *bld_base,
248 uint32_t src_bit_size,
249 bool is_unsigned,
250 LLVMValueRef val)
251 {
252 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
253 struct lp_build_context *int_bld =
254 get_int_bld(bld_base, is_unsigned, src_bit_size);
255 LLVMValueRef result = lp_build_compare(bld_base->base.gallivm,
256 int_bld->type, PIPE_FUNC_NOTEQUAL,
257 val, int_bld->zero);
258 if (src_bit_size == 16)
259 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
260 else if (src_bit_size == 64)
261 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
262 return result;
263 }
264
265
266 static LLVMValueRef
flt_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,LLVMValueRef val)267 flt_to_bool32(struct lp_build_nir_context *bld_base,
268 uint32_t src_bit_size,
269 LLVMValueRef val)
270 {
271 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
272 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
273 LLVMValueRef result =
274 lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
275 if (src_bit_size == 64)
276 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
277 if (src_bit_size == 16)
278 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
279 return result;
280 }
281
282
283 static LLVMValueRef
fcmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])284 fcmp32(struct lp_build_nir_context *bld_base,
285 enum pipe_compare_func compare,
286 uint32_t src_bit_size,
287 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
288 {
289 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
290 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
291 LLVMValueRef result;
292
293 if (compare != PIPE_FUNC_NOTEQUAL)
294 result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
295 else
296 result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
297 if (src_bit_size == 64)
298 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
299 else if (src_bit_size == 16)
300 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
301 return result;
302 }
303
304
305 static LLVMValueRef
icmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,bool is_unsigned,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])306 icmp32(struct lp_build_nir_context *bld_base,
307 enum pipe_compare_func compare,
308 bool is_unsigned,
309 uint32_t src_bit_size,
310 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
311 {
312 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
313 struct lp_build_context *i_bld =
314 get_int_bld(bld_base, is_unsigned, src_bit_size);
315 LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
316 if (src_bit_size < 32)
317 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
318 else if (src_bit_size == 64)
319 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
320 return result;
321 }
322
323
324 /**
325 * Get a source register value for an ALU instruction.
326 * This is where swizzled are handled. There should be no negation
327 * or absolute value modifiers.
328 * num_components indicates the number of components needed in the
329 * returned array or vector.
330 */
331 static LLVMValueRef
get_alu_src(struct lp_build_nir_context * bld_base,nir_alu_src src,unsigned num_components)332 get_alu_src(struct lp_build_nir_context *bld_base,
333 nir_alu_src src,
334 unsigned num_components)
335 {
336 struct gallivm_state *gallivm = bld_base->base.gallivm;
337 LLVMBuilderRef builder = gallivm->builder;
338 LLVMValueRef value = get_src(bld_base, src.src);
339 bool need_swizzle = false;
340
341 assert(value);
342
343 if (is_aos(bld_base))
344 return value;
345
346 unsigned src_components = nir_src_num_components(src.src);
347 for (unsigned i = 0; i < num_components; ++i) {
348 assert(src.swizzle[i] < src_components);
349 if (src.swizzle[i] != i)
350 need_swizzle = true;
351 }
352
353 if (need_swizzle || num_components != src_components) {
354 if (src_components > 1 && num_components == 1) {
355 value = LLVMBuildExtractValue(gallivm->builder, value,
356 src.swizzle[0], "");
357 } else if (src_components == 1 && num_components > 1) {
358 LLVMValueRef values[] = {value, value, value, value,
359 value, value, value, value,
360 value, value, value, value,
361 value, value, value, value};
362 value = lp_nir_array_build_gather_values(builder, values, num_components);
363 } else {
364 LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
365 for (unsigned i = 0; i < num_components; i++)
366 arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
367 value = arr;
368 }
369 }
370 assert(!src.negate);
371 assert(!src.abs);
372 return value;
373 }
374
375
376 static LLVMValueRef
emit_b2f(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)377 emit_b2f(struct lp_build_nir_context *bld_base,
378 LLVMValueRef src0,
379 unsigned bitsize)
380 {
381 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
382 LLVMValueRef result =
383 LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
384 LLVMBuildBitCast(builder,
385 lp_build_const_vec(bld_base->base.gallivm,
386 bld_base->base.type,
387 1.0),
388 bld_base->int_bld.vec_type, ""),
389 "");
390 result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
391 switch (bitsize) {
392 case 16:
393 result = LLVMBuildFPTrunc(builder, result,
394 bld_base->half_bld.vec_type, "");
395 break;
396 case 32:
397 break;
398 case 64:
399 result = LLVMBuildFPExt(builder, result,
400 bld_base->dbl_bld.vec_type, "");
401 break;
402 default:
403 unreachable("unsupported bit size.");
404 }
405 return result;
406 }
407
408
409 static LLVMValueRef
emit_b2i(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)410 emit_b2i(struct lp_build_nir_context *bld_base,
411 LLVMValueRef src0,
412 unsigned bitsize)
413 {
414 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
415 LLVMValueRef result = LLVMBuildAnd(builder,
416 cast_type(bld_base, src0, nir_type_int, 32),
417 lp_build_const_int_vec(bld_base->base.gallivm,
418 bld_base->base.type, 1), "");
419 switch (bitsize) {
420 case 8:
421 return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
422 case 16:
423 return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
424 case 32:
425 return result;
426 case 64:
427 return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
428 default:
429 unreachable("unsupported bit size.");
430 }
431 }
432
433
434 static LLVMValueRef
emit_b32csel(struct lp_build_nir_context * bld_base,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])435 emit_b32csel(struct lp_build_nir_context *bld_base,
436 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
437 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
438 {
439 LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
440 LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
441 struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
442 return lp_build_select(bld, v, src[1], src[2]);
443 }
444
445
446 static LLVMValueRef
split_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)447 split_64bit(struct lp_build_nir_context *bld_base,
448 LLVMValueRef src,
449 bool hi)
450 {
451 struct gallivm_state *gallivm = bld_base->base.gallivm;
452 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
453 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
454 int len = bld_base->base.type.length * 2;
455 for (unsigned i = 0; i < bld_base->base.type.length; i++) {
456 #if UTIL_ARCH_LITTLE_ENDIAN
457 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
458 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
459 #else
460 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
461 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
462 #endif
463 }
464
465 src = LLVMBuildBitCast(gallivm->builder, src,
466 LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
467 return LLVMBuildShuffleVector(gallivm->builder, src,
468 LLVMGetUndef(LLVMTypeOf(src)),
469 LLVMConstVector(hi ? shuffles2 : shuffles,
470 bld_base->base.type.length),
471 "");
472 }
473
474
475 static LLVMValueRef
merge_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)476 merge_64bit(struct lp_build_nir_context *bld_base,
477 LLVMValueRef input,
478 LLVMValueRef input2)
479 {
480 struct gallivm_state *gallivm = bld_base->base.gallivm;
481 LLVMBuilderRef builder = gallivm->builder;
482 int i;
483 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
484 int len = bld_base->base.type.length * 2;
485 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
486
487 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
488 #if UTIL_ARCH_LITTLE_ENDIAN
489 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
490 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
491 #else
492 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
493 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
494 #endif
495 }
496 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
497 }
498
499
500 static LLVMValueRef
split_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)501 split_16bit(struct lp_build_nir_context *bld_base,
502 LLVMValueRef src,
503 bool hi)
504 {
505 struct gallivm_state *gallivm = bld_base->base.gallivm;
506 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
507 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
508 int len = bld_base->base.type.length * 2;
509 for (unsigned i = 0; i < bld_base->base.type.length; i++) {
510 #if UTIL_ARCH_LITTLE_ENDIAN
511 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
512 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
513 #else
514 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
515 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
516 #endif
517 }
518
519 src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
520 return LLVMBuildShuffleVector(gallivm->builder, src,
521 LLVMGetUndef(LLVMTypeOf(src)),
522 LLVMConstVector(hi ? shuffles2 : shuffles,
523 bld_base->base.type.length),
524 "");
525 }
526
527
528 static LLVMValueRef
merge_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)529 merge_16bit(struct lp_build_nir_context *bld_base,
530 LLVMValueRef input,
531 LLVMValueRef input2)
532 {
533 struct gallivm_state *gallivm = bld_base->base.gallivm;
534 LLVMBuilderRef builder = gallivm->builder;
535 int i;
536 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
537 int len = bld_base->int16_bld.type.length * 2;
538 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
539
540 for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
541 #if UTIL_ARCH_LITTLE_ENDIAN
542 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
543 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
544 #else
545 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
546 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
547 #endif
548 }
549 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
550 }
551
552
553 static LLVMValueRef
get_signed_divisor(struct gallivm_state * gallivm,struct lp_build_context * int_bld,struct lp_build_context * mask_bld,int src_bit_size,LLVMValueRef src,LLVMValueRef divisor)554 get_signed_divisor(struct gallivm_state *gallivm,
555 struct lp_build_context *int_bld,
556 struct lp_build_context *mask_bld,
557 int src_bit_size,
558 LLVMValueRef src, LLVMValueRef divisor)
559 {
560 LLVMBuilderRef builder = gallivm->builder;
561 /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
562 and divisor is -1. */
563 /* set mask if numerator == INT_MIN */
564 long long min_val;
565 switch (src_bit_size) {
566 case 8:
567 min_val = INT8_MIN;
568 break;
569 case 16:
570 min_val = INT16_MIN;
571 break;
572 default:
573 case 32:
574 min_val = INT_MIN;
575 break;
576 case 64:
577 min_val = INT64_MIN;
578 break;
579 }
580 LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
581 lp_build_const_int_vec(gallivm, int_bld->type, min_val));
582 /* set another mask if divisor is - 1 */
583 LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
584 lp_build_const_int_vec(gallivm, int_bld->type, -1));
585 div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
586
587 divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
588 return divisor;
589 }
590
591
592 static LLVMValueRef
do_int_divide(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)593 do_int_divide(struct lp_build_nir_context *bld_base,
594 bool is_unsigned, unsigned src_bit_size,
595 LLVMValueRef src, LLVMValueRef src2)
596 {
597 struct gallivm_state *gallivm = bld_base->base.gallivm;
598 LLVMBuilderRef builder = gallivm->builder;
599 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
600 struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
601
602 /* avoid divide by 0. Converted divisor from 0 to -1 */
603 LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
604 mask_bld->zero);
605
606 LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
607 if (!is_unsigned) {
608 divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
609 src_bit_size, src, divisor);
610 }
611 LLVMValueRef result = lp_build_div(int_bld, src, divisor);
612
613 if (!is_unsigned) {
614 LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
615 return LLVMBuildAnd(builder, not_div_mask, result, "");
616 } else
617 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
618 * may as well do same for idiv */
619 return LLVMBuildOr(builder, div_mask, result, "");
620 }
621
622
623 static LLVMValueRef
do_int_mod(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)624 do_int_mod(struct lp_build_nir_context *bld_base,
625 bool is_unsigned, unsigned src_bit_size,
626 LLVMValueRef src, LLVMValueRef src2)
627 {
628 struct gallivm_state *gallivm = bld_base->base.gallivm;
629 LLVMBuilderRef builder = gallivm->builder;
630 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
631 struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
632 LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
633 mask_bld->zero);
634 LLVMValueRef divisor = LLVMBuildOr(builder,
635 div_mask,
636 src2, "");
637 if (!is_unsigned) {
638 divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
639 src_bit_size, src, divisor);
640 }
641 LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
642 return LLVMBuildOr(builder, div_mask, result, "");
643 }
644
645
646 static LLVMValueRef
do_quantize_to_f16(struct lp_build_nir_context * bld_base,LLVMValueRef src)647 do_quantize_to_f16(struct lp_build_nir_context *bld_base,
648 LLVMValueRef src)
649 {
650 struct gallivm_state *gallivm = bld_base->base.gallivm;
651 LLVMBuilderRef builder = gallivm->builder;
652 LLVMValueRef result, cond, cond2, temp;
653
654 result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
655 result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
656
657 temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
658 cond = LLVMBuildFCmp(builder, LLVMRealOGT,
659 LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
660 temp, "");
661 cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
662 cond = LLVMBuildAnd(builder, cond, cond2, "");
663 result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
664 return result;
665 }
666
667
668 static LLVMValueRef
do_alu_action(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])669 do_alu_action(struct lp_build_nir_context *bld_base,
670 const nir_alu_instr *instr,
671 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
672 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
673 {
674 struct gallivm_state *gallivm = bld_base->base.gallivm;
675 LLVMBuilderRef builder = gallivm->builder;
676 LLVMValueRef result;
677
678 switch (instr->op) {
679 case nir_op_b2f16:
680 result = emit_b2f(bld_base, src[0], 16);
681 break;
682 case nir_op_b2f32:
683 result = emit_b2f(bld_base, src[0], 32);
684 break;
685 case nir_op_b2f64:
686 result = emit_b2f(bld_base, src[0], 64);
687 break;
688 case nir_op_b2i8:
689 result = emit_b2i(bld_base, src[0], 8);
690 break;
691 case nir_op_b2i16:
692 result = emit_b2i(bld_base, src[0], 16);
693 break;
694 case nir_op_b2i32:
695 result = emit_b2i(bld_base, src[0], 32);
696 break;
697 case nir_op_b2i64:
698 result = emit_b2i(bld_base, src[0], 64);
699 break;
700 case nir_op_b32csel:
701 result = emit_b32csel(bld_base, src_bit_size, src);
702 break;
703 case nir_op_bit_count:
704 result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
705 if (src_bit_size[0] < 32)
706 result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
707 else if (src_bit_size[0] > 32)
708 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
709 break;
710 case nir_op_bitfield_select:
711 result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
712 break;
713 case nir_op_bitfield_reverse:
714 result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
715 break;
716 case nir_op_f2b32:
717 result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
718 break;
719 case nir_op_f2f16:
720 if (src_bit_size[0] == 64)
721 src[0] = LLVMBuildFPTrunc(builder, src[0],
722 bld_base->base.vec_type, "");
723 result = LLVMBuildFPTrunc(builder, src[0],
724 bld_base->half_bld.vec_type, "");
725 break;
726 case nir_op_f2f32:
727 if (src_bit_size[0] < 32)
728 result = LLVMBuildFPExt(builder, src[0],
729 bld_base->base.vec_type, "");
730 else
731 result = LLVMBuildFPTrunc(builder, src[0],
732 bld_base->base.vec_type, "");
733 break;
734 case nir_op_f2f64:
735 result = LLVMBuildFPExt(builder, src[0],
736 bld_base->dbl_bld.vec_type, "");
737 break;
738 case nir_op_f2i8:
739 result = LLVMBuildFPToSI(builder,
740 src[0],
741 bld_base->uint8_bld.vec_type, "");
742 break;
743 case nir_op_f2i16:
744 result = LLVMBuildFPToSI(builder,
745 src[0],
746 bld_base->uint16_bld.vec_type, "");
747 break;
748 case nir_op_f2i32:
749 result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
750 break;
751 case nir_op_f2u8:
752 result = LLVMBuildFPToUI(builder,
753 src[0],
754 bld_base->uint8_bld.vec_type, "");
755 break;
756 case nir_op_f2u16:
757 result = LLVMBuildFPToUI(builder,
758 src[0],
759 bld_base->uint16_bld.vec_type, "");
760 break;
761 case nir_op_f2u32:
762 result = LLVMBuildFPToUI(builder,
763 src[0],
764 bld_base->base.int_vec_type, "");
765 break;
766 case nir_op_f2i64:
767 result = LLVMBuildFPToSI(builder,
768 src[0],
769 bld_base->int64_bld.vec_type, "");
770 break;
771 case nir_op_f2u64:
772 result = LLVMBuildFPToUI(builder,
773 src[0],
774 bld_base->uint64_bld.vec_type, "");
775 break;
776 case nir_op_fabs:
777 result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
778 break;
779 case nir_op_fadd:
780 result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
781 src[0], src[1]);
782 break;
783 case nir_op_fceil:
784 result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
785 break;
786 case nir_op_fcos:
787 result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
788 break;
789 case nir_op_fddx:
790 case nir_op_fddx_coarse:
791 case nir_op_fddx_fine:
792 result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
793 break;
794 case nir_op_fddy:
795 case nir_op_fddy_coarse:
796 case nir_op_fddy_fine:
797 result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
798 break;
799 case nir_op_fdiv:
800 result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
801 src[0], src[1]);
802 break;
803 case nir_op_feq32:
804 result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
805 break;
806 case nir_op_fexp2:
807 result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
808 break;
809 case nir_op_ffloor:
810 result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
811 break;
812 case nir_op_ffma:
813 result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
814 break;
815 case nir_op_ffract: {
816 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
817 LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
818 result = lp_build_sub(flt_bld, src[0], tmp);
819 break;
820 }
821 case nir_op_fge:
822 case nir_op_fge32:
823 result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
824 break;
825 case nir_op_find_lsb: {
826 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
827 result = lp_build_cttz(int_bld, src[0]);
828 if (src_bit_size[0] < 32)
829 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
830 else if (src_bit_size[0] > 32)
831 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
832 break;
833 }
834 case nir_op_fisfinite32:
835 unreachable("Should have been lowered in nir_opt_algebraic_late.");
836 case nir_op_flog2:
837 result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
838 break;
839 case nir_op_flt:
840 case nir_op_flt32:
841 result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
842 break;
843 case nir_op_fmax:
844 case nir_op_fmin: {
845 enum gallivm_nan_behavior minmax_nan;
846 int first = 0;
847
848 /* If one of the sources is known to be a number (i.e., not NaN), then
849 * better code can be generated by passing that information along.
850 */
851 if (is_a_number(bld_base->range_ht, instr, 1,
852 0 /* unused num_components */,
853 NULL /* unused swizzle */)) {
854 minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
855 } else if (is_a_number(bld_base->range_ht, instr, 0,
856 0 /* unused num_components */,
857 NULL /* unused swizzle */)) {
858 first = 1;
859 minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
860 } else {
861 minmax_nan = GALLIVM_NAN_RETURN_OTHER;
862 }
863
864 if (instr->op == nir_op_fmin) {
865 result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
866 src[first], src[1 - first], minmax_nan);
867 } else {
868 result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
869 src[first], src[1 - first], minmax_nan);
870 }
871 break;
872 }
873 case nir_op_fmod: {
874 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
875 result = lp_build_div(flt_bld, src[0], src[1]);
876 result = lp_build_floor(flt_bld, result);
877 result = lp_build_mul(flt_bld, src[1], result);
878 result = lp_build_sub(flt_bld, src[0], result);
879 break;
880 }
881 case nir_op_fmul:
882 result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
883 src[0], src[1]);
884 break;
885 case nir_op_fneu32:
886 result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
887 break;
888 case nir_op_fneg:
889 result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
890 break;
891 case nir_op_fpow:
892 result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
893 break;
894 case nir_op_fquantize2f16:
895 result = do_quantize_to_f16(bld_base, src[0]);
896 break;
897 case nir_op_frcp:
898 result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
899 break;
900 case nir_op_fround_even:
901 if (src_bit_size[0] == 16) {
902 struct lp_build_context *bld = get_flt_bld(bld_base, 16);
903 char intrinsic[64];
904 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
905 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
906 } else {
907 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
908 }
909 break;
910 case nir_op_frsq:
911 result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
912 break;
913 case nir_op_fsat:
914 result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
915 break;
916 case nir_op_fsign:
917 result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
918 break;
919 case nir_op_fsin:
920 result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
921 break;
922 case nir_op_fsqrt:
923 result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
924 break;
925 case nir_op_ftrunc:
926 result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
927 break;
928 case nir_op_i2b32:
929 result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
930 break;
931 case nir_op_i2f16:
932 result = LLVMBuildSIToFP(builder, src[0],
933 bld_base->half_bld.vec_type, "");
934 break;
935 case nir_op_i2f32:
936 result = lp_build_int_to_float(&bld_base->base, src[0]);
937 break;
938 case nir_op_i2f64:
939 result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
940 break;
941 case nir_op_i2i8:
942 result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
943 break;
944 case nir_op_i2i16:
945 if (src_bit_size[0] < 16)
946 result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
947 else
948 result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
949 break;
950 case nir_op_i2i32:
951 if (src_bit_size[0] < 32)
952 result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
953 else
954 result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
955 break;
956 case nir_op_i2i64:
957 result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
958 break;
959 case nir_op_iabs:
960 result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
961 break;
962 case nir_op_iadd:
963 result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
964 src[0], src[1]);
965 break;
966 case nir_op_iand:
967 result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
968 src[0], src[1]);
969 break;
970 case nir_op_idiv:
971 result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
972 break;
973 case nir_op_ieq32:
974 result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
975 break;
976 case nir_op_ige32:
977 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
978 break;
979 case nir_op_ilt32:
980 result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
981 break;
982 case nir_op_imax:
983 result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
984 break;
985 case nir_op_imin:
986 result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
987 break;
988 case nir_op_imul:
989 case nir_op_imul24:
990 result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
991 src[0], src[1]);
992 break;
993 case nir_op_imul_high: {
994 LLVMValueRef hi_bits;
995 lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
996 result = hi_bits;
997 break;
998 }
999 case nir_op_ine32:
1000 result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
1001 break;
1002 case nir_op_ineg:
1003 result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1004 break;
1005 case nir_op_inot:
1006 result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1007 break;
1008 case nir_op_ior:
1009 result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
1010 src[0], src[1]);
1011 break;
1012 case nir_op_imod:
1013 case nir_op_irem:
1014 result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
1015 break;
1016 case nir_op_ishl: {
1017 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1018 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
1019 if (src_bit_size[0] == 64)
1020 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1021 if (src_bit_size[0] < 32)
1022 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1023 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1024 result = lp_build_shl(int_bld, src[0], src[1]);
1025 break;
1026 }
1027 case nir_op_ishr: {
1028 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1029 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
1030 if (src_bit_size[0] == 64)
1031 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1032 if (src_bit_size[0] < 32)
1033 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1034 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1035 result = lp_build_shr(int_bld, src[0], src[1]);
1036 break;
1037 }
1038 case nir_op_isign:
1039 result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1040 break;
1041 case nir_op_isub:
1042 result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
1043 src[0], src[1]);
1044 break;
1045 case nir_op_ixor:
1046 result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
1047 src[0], src[1]);
1048 break;
1049 case nir_op_mov:
1050 result = src[0];
1051 break;
1052 case nir_op_unpack_64_2x32_split_x:
1053 result = split_64bit(bld_base, src[0], false);
1054 break;
1055 case nir_op_unpack_64_2x32_split_y:
1056 result = split_64bit(bld_base, src[0], true);
1057 break;
1058
1059 case nir_op_pack_32_2x16_split: {
1060 LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
1061 result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
1062 break;
1063 }
1064 case nir_op_unpack_32_2x16_split_x:
1065 result = split_16bit(bld_base, src[0], false);
1066 break;
1067 case nir_op_unpack_32_2x16_split_y:
1068 result = split_16bit(bld_base, src[0], true);
1069 break;
1070 case nir_op_pack_64_2x32_split: {
1071 LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
1072 result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
1073 break;
1074 }
1075 case nir_op_pack_32_4x8_split: {
1076 LLVMValueRef tmp1 = merge_16bit(bld_base, src[0], src[1]);
1077 LLVMValueRef tmp2 = merge_16bit(bld_base, src[2], src[3]);
1078 tmp1 = LLVMBuildBitCast(builder, tmp1, bld_base->uint16_bld.vec_type, "");
1079 tmp2 = LLVMBuildBitCast(builder, tmp2, bld_base->uint16_bld.vec_type, "");
1080 LLVMValueRef tmp = merge_16bit(bld_base, tmp1, tmp2);
1081 result = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.vec_type, "");
1082 break;
1083 }
1084 case nir_op_u2f16:
1085 result = LLVMBuildUIToFP(builder, src[0],
1086 bld_base->half_bld.vec_type, "");
1087 break;
1088 case nir_op_u2f32:
1089 result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
1090 break;
1091 case nir_op_u2f64:
1092 result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
1093 break;
1094 case nir_op_u2u8:
1095 result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
1096 break;
1097 case nir_op_u2u16:
1098 if (src_bit_size[0] < 16)
1099 result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
1100 else
1101 result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
1102 break;
1103 case nir_op_u2u32:
1104 if (src_bit_size[0] < 32)
1105 result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
1106 else
1107 result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
1108 break;
1109 case nir_op_u2u64:
1110 result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
1111 break;
1112 case nir_op_udiv:
1113 result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
1114 break;
1115 case nir_op_ufind_msb: {
1116 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1117 result = lp_build_ctlz(uint_bld, src[0]);
1118 result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
1119 if (src_bit_size[0] < 32)
1120 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
1121 else
1122 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
1123 break;
1124 }
1125 case nir_op_uge32:
1126 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
1127 break;
1128 case nir_op_ult32:
1129 result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
1130 break;
1131 case nir_op_umax:
1132 result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1133 break;
1134 case nir_op_umin:
1135 result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1136 break;
1137 case nir_op_umod:
1138 result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
1139 break;
1140 case nir_op_umul_high: {
1141 LLVMValueRef hi_bits;
1142 lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
1143 result = hi_bits;
1144 break;
1145 }
1146 case nir_op_ushr: {
1147 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1148 if (src_bit_size[0] == 64)
1149 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1150 if (src_bit_size[0] < 32)
1151 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1152 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1153 result = lp_build_shr(uint_bld, src[0], src[1]);
1154 break;
1155 }
1156 case nir_op_bcsel: {
1157 LLVMTypeRef src1_type = LLVMTypeOf(src[1]);
1158 LLVMTypeRef src2_type = LLVMTypeOf(src[2]);
1159
1160 if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
1161 LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
1162 src[2] = LLVMBuildIntToPtr(builder, src[2], src1_type, "");
1163 } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
1164 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
1165 src[1] = LLVMBuildIntToPtr(builder, src[1], src2_type, "");
1166 }
1167
1168 for (int i = 1; i <= 2; i++) {
1169 LLVMTypeRef type = LLVMTypeOf(src[i]);
1170 if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
1171 break;
1172 src[i] = LLVMBuildBitCast(builder, src[i], get_int_bld(bld_base, true, src_bit_size[i])->vec_type, "");
1173 }
1174 return LLVMBuildSelect(builder, src[0], src[1], src[2], "");
1175 }
1176 default:
1177 assert(0);
1178 break;
1179 }
1180 return result;
1181 }
1182
1183
1184 static void
visit_alu(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr)1185 visit_alu(struct lp_build_nir_context *bld_base,
1186 const nir_alu_instr *instr)
1187 {
1188 struct gallivm_state *gallivm = bld_base->base.gallivm;
1189 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
1190 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
1191 const unsigned num_components = nir_dest_num_components(instr->dest.dest);
1192 unsigned src_components;
1193
1194 switch (instr->op) {
1195 case nir_op_vec2:
1196 case nir_op_vec3:
1197 case nir_op_vec4:
1198 case nir_op_vec8:
1199 case nir_op_vec16:
1200 src_components = 1;
1201 break;
1202 case nir_op_pack_half_2x16:
1203 src_components = 2;
1204 break;
1205 case nir_op_unpack_half_2x16:
1206 src_components = 1;
1207 break;
1208 case nir_op_cube_face_coord_amd:
1209 case nir_op_cube_face_index_amd:
1210 src_components = 3;
1211 break;
1212 case nir_op_fsum2:
1213 case nir_op_fsum3:
1214 case nir_op_fsum4:
1215 src_components = nir_op_infos[instr->op].input_sizes[0];
1216 break;
1217 default:
1218 src_components = num_components;
1219 break;
1220 }
1221
1222 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1223 src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1224 src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1225 }
1226
1227 if (instr->op == nir_op_mov &&
1228 is_aos(bld_base) &&
1229 !instr->dest.dest.is_ssa) {
1230 for (unsigned i = 0; i < 4; i++) {
1231 if (instr->dest.write_mask & (1 << i)) {
1232 assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src);
1233 }
1234 }
1235 return;
1236 }
1237
1238 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1239 if (instr->op == nir_op_vec4 ||
1240 instr->op == nir_op_vec3 ||
1241 instr->op == nir_op_vec2 ||
1242 instr->op == nir_op_vec8 ||
1243 instr->op == nir_op_vec16) {
1244 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1245 result[i] = cast_type(bld_base, src[i],
1246 nir_op_infos[instr->op].input_types[i],
1247 src_bit_size[i]);
1248 }
1249 } else if (instr->op == nir_op_fsum4 ||
1250 instr->op == nir_op_fsum3 ||
1251 instr->op == nir_op_fsum2) {
1252 for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1253 LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1254 src[0], c, "");
1255 temp_chan = cast_type(bld_base, temp_chan,
1256 nir_op_infos[instr->op].input_types[0],
1257 src_bit_size[0]);
1258 result[0] = (c == 0) ? temp_chan
1259 : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
1260 result[0], temp_chan);
1261 }
1262 } else if (is_aos(bld_base)) {
1263 if (instr->op == nir_op_fmul) {
1264 if (LLVMIsConstant(src[0])) {
1265 src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
1266 }
1267 if (LLVMIsConstant(src[1])) {
1268 src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
1269 }
1270 }
1271 result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
1272 } else {
1273 /* Loop for R,G,B,A channels */
1274 for (unsigned c = 0; c < num_components; c++) {
1275 LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1276
1277 /* Loop over instruction operands */
1278 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1279 if (num_components > 1) {
1280 src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1281 src[i], c, "");
1282 } else {
1283 src_chan[i] = src[i];
1284 }
1285 src_chan[i] = cast_type(bld_base, src_chan[i],
1286 nir_op_infos[instr->op].input_types[i],
1287 src_bit_size[i]);
1288 }
1289 result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1290 result[c] = cast_type(bld_base, result[c],
1291 nir_op_infos[instr->op].output_type,
1292 nir_dest_bit_size(instr->dest.dest));
1293 }
1294 }
1295 assign_alu_dest(bld_base, &instr->dest, result);
1296 }
1297
1298
1299 static void
visit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr)1300 visit_load_const(struct lp_build_nir_context *bld_base,
1301 const nir_load_const_instr *instr)
1302 {
1303 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1304 bld_base->load_const(bld_base, instr, result);
1305 assign_ssa_dest(bld_base, &instr->def, result);
1306 }
1307
1308
1309 static void
get_deref_offset(struct lp_build_nir_context * bld_base,nir_deref_instr * instr,bool vs_in,unsigned * vertex_index_out,LLVMValueRef * vertex_index_ref,unsigned * const_out,LLVMValueRef * indir_out)1310 get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1311 bool vs_in, unsigned *vertex_index_out,
1312 LLVMValueRef *vertex_index_ref,
1313 unsigned *const_out, LLVMValueRef *indir_out)
1314 {
1315 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1316 nir_variable *var = nir_deref_instr_get_variable(instr);
1317 nir_deref_path path;
1318 unsigned idx_lvl = 1;
1319
1320 nir_deref_path_init(&path, instr, NULL);
1321
1322 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1323 if (vertex_index_ref) {
1324 *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1325 if (vertex_index_out)
1326 *vertex_index_out = 0;
1327 } else {
1328 *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1329 }
1330 ++idx_lvl;
1331 }
1332
1333 uint32_t const_offset = 0;
1334 LLVMValueRef offset = NULL;
1335
1336 if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1337 assert(instr->deref_type == nir_deref_type_array);
1338 const_offset = nir_src_as_uint(instr->arr.index);
1339 goto out;
1340 }
1341
1342 for (; path.path[idx_lvl]; ++idx_lvl) {
1343 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1344 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1345 unsigned index = path.path[idx_lvl]->strct.index;
1346
1347 for (unsigned i = 0; i < index; i++) {
1348 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1349 const_offset += glsl_count_attribute_slots(ft, vs_in);
1350 }
1351 } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1352 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1353 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1354 const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1355 } else {
1356 LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1357 idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1358 LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1359 idx_src);
1360 if (offset)
1361 offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1362 else
1363 offset = array_off;
1364 }
1365 } else
1366 unreachable("Uhandled deref type in get_deref_instr_offset");
1367 }
1368
1369 out:
1370 nir_deref_path_finish(&path);
1371
1372 if (const_offset && offset)
1373 offset = LLVMBuildAdd(builder, offset,
1374 lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1375 "");
1376 *const_out = const_offset;
1377 *indir_out = offset;
1378 }
1379
1380
1381 static void
visit_load_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1382 visit_load_input(struct lp_build_nir_context *bld_base,
1383 nir_intrinsic_instr *instr,
1384 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1385 {
1386 nir_variable var = {0};
1387 var.data.location = nir_intrinsic_io_semantics(instr).location;
1388 var.data.driver_location = nir_intrinsic_base(instr);
1389 var.data.location_frac = nir_intrinsic_component(instr);
1390
1391 unsigned nc = nir_dest_num_components(instr->dest);
1392 unsigned bit_size = nir_dest_bit_size(instr->dest);
1393
1394 nir_src offset = *nir_get_io_offset_src(instr);
1395 bool indirect = !nir_src_is_const(offset);
1396 if (!indirect)
1397 assert(nir_src_as_uint(offset) == 0);
1398 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1399
1400 bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1401 }
1402
1403
1404 static void
visit_store_output(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1405 visit_store_output(struct lp_build_nir_context *bld_base,
1406 nir_intrinsic_instr *instr)
1407 {
1408 nir_variable var = {0};
1409 var.data.location = nir_intrinsic_io_semantics(instr).location;
1410 var.data.driver_location = nir_intrinsic_base(instr);
1411 var.data.location_frac = nir_intrinsic_component(instr);
1412
1413 unsigned mask = nir_intrinsic_write_mask(instr);
1414
1415 unsigned bit_size = nir_src_bit_size(instr->src[0]);
1416 LLVMValueRef src = get_src(bld_base, instr->src[0]);
1417
1418 nir_src offset = *nir_get_io_offset_src(instr);
1419 bool indirect = !nir_src_is_const(offset);
1420 if (!indirect)
1421 assert(nir_src_as_uint(offset) == 0);
1422 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1423
1424 if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1425 src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1426 src, 0, "");
1427 }
1428
1429 bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1430 bit_size, &var, mask, NULL, 0, indir_index, src);
1431 }
1432
1433
1434 static void
visit_load_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1435 visit_load_var(struct lp_build_nir_context *bld_base,
1436 nir_intrinsic_instr *instr,
1437 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1438 {
1439 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1440 nir_variable *var = nir_deref_instr_get_variable(deref);
1441 assert(util_bitcount(deref->modes) == 1);
1442 nir_variable_mode mode = deref->modes;
1443 unsigned const_index;
1444 LLVMValueRef indir_index;
1445 LLVMValueRef indir_vertex_index = NULL;
1446 unsigned vertex_index = 0;
1447 unsigned nc = nir_dest_num_components(instr->dest);
1448 unsigned bit_size = nir_dest_bit_size(instr->dest);
1449 if (var) {
1450 bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1451 var->data.mode == nir_var_shader_in;
1452 bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1453 var->data.mode == nir_var_shader_in;
1454 bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1455 var->data.mode == nir_var_shader_in;
1456 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1457 var->data.mode == nir_var_shader_out && !var->data.patch;
1458 bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1459 var->data.mode == nir_var_shader_in && !var->data.patch;
1460
1461 mode = var->data.mode;
1462
1463 get_deref_offset(bld_base, deref, vs_in,
1464 gs_in ? &vertex_index : NULL,
1465 (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1466 &const_index, &indir_index);
1467 }
1468 bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index,
1469 indir_vertex_index, const_index, indir_index, result);
1470 }
1471
1472
1473 static void
visit_store_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1474 visit_store_var(struct lp_build_nir_context *bld_base,
1475 nir_intrinsic_instr *instr)
1476 {
1477 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1478 nir_variable *var = nir_deref_instr_get_variable(deref);
1479 assert(util_bitcount(deref->modes) == 1);
1480 nir_variable_mode mode = deref->modes;
1481 int writemask = instr->const_index[0];
1482 unsigned bit_size = nir_src_bit_size(instr->src[1]);
1483 LLVMValueRef src = get_src(bld_base, instr->src[1]);
1484 unsigned const_index = 0;
1485 LLVMValueRef indir_index, indir_vertex_index = NULL;
1486 if (var) {
1487 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1488 var->data.mode == nir_var_shader_out && !var->data.patch;
1489 get_deref_offset(bld_base, deref, false, NULL,
1490 tcs_out ? &indir_vertex_index : NULL,
1491 &const_index, &indir_index);
1492 }
1493 bld_base->store_var(bld_base, mode, instr->num_components, bit_size,
1494 var, writemask, indir_vertex_index, const_index,
1495 indir_index, src);
1496 }
1497
1498
1499 static void
visit_load_ubo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1500 visit_load_ubo(struct lp_build_nir_context *bld_base,
1501 nir_intrinsic_instr *instr,
1502 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1503 {
1504 struct gallivm_state *gallivm = bld_base->base.gallivm;
1505 LLVMBuilderRef builder = gallivm->builder;
1506 LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1507 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1508
1509 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1510 idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1511 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest),
1512 nir_dest_bit_size(instr->dest),
1513 offset_is_uniform, idx, offset, result);
1514 }
1515
1516
1517 static void
visit_load_push_constant(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[4])1518 visit_load_push_constant(struct lp_build_nir_context *bld_base,
1519 nir_intrinsic_instr *instr,
1520 LLVMValueRef result[4])
1521 {
1522 struct gallivm_state *gallivm = bld_base->base.gallivm;
1523 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1524 LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1525 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1526
1527 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest),
1528 nir_dest_bit_size(instr->dest),
1529 offset_is_uniform, idx, offset, result);
1530 }
1531
1532
1533 static void
visit_load_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1534 visit_load_ssbo(struct lp_build_nir_context *bld_base,
1535 nir_intrinsic_instr *instr,
1536 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1537 {
1538 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1539 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1540 bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[0]) && nir_src_is_always_uniform(instr->src[1]);
1541 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1542 index_and_offset_are_uniform, idx, offset, result);
1543 }
1544
1545
1546 static void
visit_store_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1547 visit_store_ssbo(struct lp_build_nir_context *bld_base,
1548 nir_intrinsic_instr *instr)
1549 {
1550 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1551 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
1552 LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1553 bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[1]) && nir_src_is_always_uniform(instr->src[2]);
1554 int writemask = instr->const_index[0];
1555 int nc = nir_src_num_components(instr->src[0]);
1556 int bitsize = nir_src_bit_size(instr->src[0]);
1557 bld_base->store_mem(bld_base, writemask, nc, bitsize, index_and_offset_are_uniform, idx, offset, val);
1558 }
1559
1560
1561 static void
visit_get_ssbo_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1562 visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1563 nir_intrinsic_instr *instr,
1564 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1565 {
1566 LLVMValueRef idx = cast_type(bld_base,
1567 get_src(bld_base, instr->src[0]),
1568 nir_type_uint, 32);
1569 result[0] = bld_base->get_ssbo_size(bld_base, idx);
1570 }
1571
1572
1573 static void
visit_ssbo_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1574 visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1575 nir_intrinsic_instr *instr,
1576 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1577 {
1578 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]),
1579 nir_type_uint, 32);
1580 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1581 LLVMValueRef val = get_src(bld_base, instr->src[2]);
1582 LLVMValueRef val2 = NULL;
1583 int bitsize = nir_src_bit_size(instr->src[2]);
1584 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1585 val2 = get_src(bld_base, instr->src[3]);
1586
1587 bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx,
1588 offset, val, val2, &result[0]);
1589 }
1590
1591
1592 static void
visit_load_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1593 visit_load_image(struct lp_build_nir_context *bld_base,
1594 nir_intrinsic_instr *instr,
1595 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1596 {
1597 struct gallivm_state *gallivm = bld_base->base.gallivm;
1598 LLVMBuilderRef builder = gallivm->builder;
1599 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1600 nir_variable *var = nir_deref_instr_get_variable(deref);
1601 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1602 LLVMValueRef coords[5];
1603 struct lp_img_params params;
1604 const struct glsl_type *type = glsl_without_array(var->type);
1605 unsigned const_index;
1606 LLVMValueRef indir_index;
1607 get_deref_offset(bld_base, deref, false, NULL, NULL,
1608 &const_index, &indir_index);
1609
1610 memset(¶ms, 0, sizeof(params));
1611 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1612 glsl_sampler_type_is_array(type));
1613 for (unsigned i = 0; i < 4; i++)
1614 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1615 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1616 coords[2] = coords[1];
1617
1618 params.coords = coords;
1619 params.outdata = result;
1620 params.img_op = LP_IMG_LOAD;
1621 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ||
1622 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS) {
1623 params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]),
1624 nir_type_uint, 32);
1625 }
1626 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1627 params.image_index_offset = indir_index;
1628 bld_base->image_op(bld_base, ¶ms);
1629 }
1630
1631
1632 static void
visit_store_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1633 visit_store_image(struct lp_build_nir_context *bld_base,
1634 nir_intrinsic_instr *instr)
1635 {
1636 struct gallivm_state *gallivm = bld_base->base.gallivm;
1637 LLVMBuilderRef builder = gallivm->builder;
1638 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1639 nir_variable *var = nir_deref_instr_get_variable(deref);
1640 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1641 LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1642 LLVMValueRef coords[5];
1643 struct lp_img_params params;
1644 const struct glsl_type *type = glsl_without_array(var->type);
1645 unsigned const_index;
1646 LLVMValueRef indir_index;
1647 get_deref_offset(bld_base, deref, false, NULL, NULL,
1648 &const_index, &indir_index);
1649
1650 memset(¶ms, 0, sizeof(params));
1651 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1652 for (unsigned i = 0; i < 4; i++)
1653 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1654 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1655 coords[2] = coords[1];
1656 params.coords = coords;
1657
1658 for (unsigned i = 0; i < 4; i++) {
1659 params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1660 params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1661 }
1662 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1663 params.ms_index = get_src(bld_base, instr->src[2]);
1664 params.img_op = LP_IMG_STORE;
1665 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1666 params.image_index_offset = indir_index;
1667
1668 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1669 coords[2] = coords[1];
1670 bld_base->image_op(bld_base, ¶ms);
1671 }
1672
1673
1674 static void
visit_atomic_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1675 visit_atomic_image(struct lp_build_nir_context *bld_base,
1676 nir_intrinsic_instr *instr,
1677 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1678 {
1679 struct gallivm_state *gallivm = bld_base->base.gallivm;
1680 LLVMBuilderRef builder = gallivm->builder;
1681 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1682 nir_variable *var = nir_deref_instr_get_variable(deref);
1683 struct lp_img_params params;
1684 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1685 LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1686 LLVMValueRef coords[5];
1687 const struct glsl_type *type = glsl_without_array(var->type);
1688 unsigned const_index;
1689 LLVMValueRef indir_index;
1690 get_deref_offset(bld_base, deref, false, NULL, NULL,
1691 &const_index, &indir_index);
1692
1693 memset(¶ms, 0, sizeof(params));
1694
1695 switch (instr->intrinsic) {
1696 case nir_intrinsic_image_deref_atomic_add:
1697 params.op = LLVMAtomicRMWBinOpAdd;
1698 break;
1699 case nir_intrinsic_image_deref_atomic_exchange:
1700 params.op = LLVMAtomicRMWBinOpXchg;
1701 break;
1702 case nir_intrinsic_image_deref_atomic_and:
1703 params.op = LLVMAtomicRMWBinOpAnd;
1704 break;
1705 case nir_intrinsic_image_deref_atomic_or:
1706 params.op = LLVMAtomicRMWBinOpOr;
1707 break;
1708 case nir_intrinsic_image_deref_atomic_xor:
1709 params.op = LLVMAtomicRMWBinOpXor;
1710 break;
1711 case nir_intrinsic_image_deref_atomic_umin:
1712 params.op = LLVMAtomicRMWBinOpUMin;
1713 break;
1714 case nir_intrinsic_image_deref_atomic_umax:
1715 params.op = LLVMAtomicRMWBinOpUMax;
1716 break;
1717 case nir_intrinsic_image_deref_atomic_imin:
1718 params.op = LLVMAtomicRMWBinOpMin;
1719 break;
1720 case nir_intrinsic_image_deref_atomic_imax:
1721 params.op = LLVMAtomicRMWBinOpMax;
1722 break;
1723 default:
1724 break;
1725 }
1726
1727 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1728 glsl_sampler_type_is_array(type));
1729 for (unsigned i = 0; i < 4; i++) {
1730 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1731 }
1732 if (params.target == PIPE_TEXTURE_1D_ARRAY) {
1733 coords[2] = coords[1];
1734 }
1735
1736 params.coords = coords;
1737
1738 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1739 params.ms_index = get_src(bld_base, instr->src[2]);
1740 }
1741 if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1742 LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1743 params.indata[0] = in_val;
1744 params.indata2[0] = cas_val;
1745 } else {
1746 params.indata[0] = in_val;
1747 }
1748
1749 params.outdata = result;
1750 params.img_op =
1751 (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
1752 ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1753 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1754 params.image_index_offset = indir_index;
1755
1756 bld_base->image_op(bld_base, ¶ms);
1757 }
1758
1759
1760 static void
visit_image_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1761 visit_image_size(struct lp_build_nir_context *bld_base,
1762 nir_intrinsic_instr *instr,
1763 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1764 {
1765 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1766 nir_variable *var = nir_deref_instr_get_variable(deref);
1767 struct lp_sampler_size_query_params params = { 0 };
1768 unsigned const_index;
1769 LLVMValueRef indir_index;
1770 const struct glsl_type *type = glsl_without_array(var->type);
1771 get_deref_offset(bld_base, deref, false, NULL, NULL,
1772 &const_index, &indir_index);
1773 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1774 params.texture_unit_offset = indir_index;
1775 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1776 glsl_sampler_type_is_array(type));
1777 params.sizes_out = result;
1778
1779 bld_base->image_size(bld_base, ¶ms);
1780 }
1781
1782
1783 static void
visit_image_samples(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1784 visit_image_samples(struct lp_build_nir_context *bld_base,
1785 nir_intrinsic_instr *instr,
1786 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1787 {
1788 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1789 nir_variable *var = nir_deref_instr_get_variable(deref);
1790 struct lp_sampler_size_query_params params = { 0 };
1791 unsigned const_index;
1792 LLVMValueRef indir_index;
1793 const struct glsl_type *type = glsl_without_array(var->type);
1794 get_deref_offset(bld_base, deref, false, NULL, NULL,
1795 &const_index, &indir_index);
1796
1797 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1798 params.texture_unit_offset = indir_index;
1799 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1800 glsl_sampler_type_is_array(type));
1801 params.sizes_out = result;
1802 params.samples_only = true;
1803
1804 bld_base->image_size(bld_base, ¶ms);
1805 }
1806
1807 static void
visit_shared_load(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1808 visit_shared_load(struct lp_build_nir_context *bld_base,
1809 nir_intrinsic_instr *instr,
1810 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1811 {
1812 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1813 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1814 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1815 offset_is_uniform, NULL, offset, result);
1816 }
1817
1818
1819 static void
visit_shared_store(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1820 visit_shared_store(struct lp_build_nir_context *bld_base,
1821 nir_intrinsic_instr *instr)
1822 {
1823 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1824 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1825 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1826 int writemask = instr->const_index[1];
1827 int nc = nir_src_num_components(instr->src[0]);
1828 int bitsize = nir_src_bit_size(instr->src[0]);
1829 bld_base->store_mem(bld_base, writemask, nc, bitsize, offset_is_uniform, NULL, offset, val);
1830 }
1831
1832
1833 static void
visit_shared_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1834 visit_shared_atomic(struct lp_build_nir_context *bld_base,
1835 nir_intrinsic_instr *instr,
1836 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1837 {
1838 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1839 LLVMValueRef val = get_src(bld_base, instr->src[1]);
1840 LLVMValueRef val2 = NULL;
1841 int bitsize = nir_src_bit_size(instr->src[1]);
1842 if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1843 val2 = get_src(bld_base, instr->src[2]);
1844
1845 bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1846 }
1847
1848
1849 static void
visit_barrier(struct lp_build_nir_context * bld_base)1850 visit_barrier(struct lp_build_nir_context *bld_base)
1851 {
1852 bld_base->barrier(bld_base);
1853 }
1854
1855
1856 static void
visit_discard(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1857 visit_discard(struct lp_build_nir_context *bld_base,
1858 nir_intrinsic_instr *instr)
1859 {
1860 LLVMValueRef cond = NULL;
1861 if (instr->intrinsic == nir_intrinsic_discard_if) {
1862 cond = get_src(bld_base, instr->src[0]);
1863 cond = cast_type(bld_base, cond, nir_type_int, 32);
1864 }
1865 bld_base->discard(bld_base, cond);
1866 }
1867
1868
1869 static void
visit_load_kernel_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1870 visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1871 nir_intrinsic_instr *instr,
1872 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1873 {
1874 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1875
1876 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1877 bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest),
1878 nir_dest_bit_size(instr->dest),
1879 nir_src_bit_size(instr->src[0]),
1880 offset_is_uniform, offset, result);
1881 }
1882
1883
1884 static void
visit_load_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1885 visit_load_global(struct lp_build_nir_context *bld_base,
1886 nir_intrinsic_instr *instr,
1887 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1888 {
1889 LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1890 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1891 bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1892 nir_src_bit_size(instr->src[0]),
1893 offset_is_uniform, addr, result);
1894 }
1895
1896
1897 static void
visit_store_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1898 visit_store_global(struct lp_build_nir_context *bld_base,
1899 nir_intrinsic_instr *instr)
1900 {
1901 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1902 int nc = nir_src_num_components(instr->src[0]);
1903 int bitsize = nir_src_bit_size(instr->src[0]);
1904 LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1905 int addr_bitsize = nir_src_bit_size(instr->src[1]);
1906 int writemask = instr->const_index[0];
1907 bld_base->store_global(bld_base, writemask, nc, bitsize,
1908 addr_bitsize, addr, val);
1909 }
1910
1911
1912 static void
visit_global_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1913 visit_global_atomic(struct lp_build_nir_context *bld_base,
1914 nir_intrinsic_instr *instr,
1915 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1916 {
1917 LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1918 LLVMValueRef val = get_src(bld_base, instr->src[1]);
1919 LLVMValueRef val2 = NULL;
1920 int addr_bitsize = nir_src_bit_size(instr->src[0]);
1921 int val_bitsize = nir_src_bit_size(instr->src[1]);
1922 if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1923 val2 = get_src(bld_base, instr->src[2]);
1924
1925 bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1926 val_bitsize, addr, val, val2, &result[0]);
1927 }
1928
1929 #if LLVM_VERSION_MAJOR >= 10
visit_shuffle(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef dst[4])1930 static void visit_shuffle(struct lp_build_nir_context *bld_base,
1931 nir_intrinsic_instr *instr,
1932 LLVMValueRef dst[4])
1933 {
1934 LLVMValueRef src = get_src(bld_base, instr->src[0]);
1935 src = cast_type(bld_base, src, nir_type_int, nir_src_bit_size(instr->src[0]));
1936 LLVMValueRef index = get_src(bld_base, instr->src[1]);
1937 index = cast_type(bld_base, index, nir_type_uint, nir_src_bit_size(instr->src[1]));
1938
1939 bld_base->shuffle(bld_base, src, index, instr, dst);
1940 }
1941 #endif
1942
1943
1944 static void
visit_interp(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1945 visit_interp(struct lp_build_nir_context *bld_base,
1946 nir_intrinsic_instr *instr,
1947 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1948 {
1949 struct gallivm_state *gallivm = bld_base->base.gallivm;
1950 LLVMBuilderRef builder = gallivm->builder;
1951 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1952 unsigned num_components = nir_dest_num_components(instr->dest);
1953 nir_variable *var = nir_deref_instr_get_variable(deref);
1954 unsigned const_index;
1955 LLVMValueRef indir_index;
1956 LLVMValueRef offsets[2] = { NULL, NULL };
1957 get_deref_offset(bld_base, deref, false, NULL, NULL,
1958 &const_index, &indir_index);
1959 bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1960 bool sample = false;
1961 if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1962 for (unsigned i = 0; i < 2; i++) {
1963 offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1964 offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1965 }
1966 } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1967 offsets[0] = get_src(bld_base, instr->src[1]);
1968 offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1969 sample = true;
1970 }
1971 bld_base->interp_at(bld_base, num_components, var, centroid, sample,
1972 const_index, indir_index, offsets, result);
1973 }
1974
1975
1976 static void
visit_load_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1977 visit_load_scratch(struct lp_build_nir_context *bld_base,
1978 nir_intrinsic_instr *instr,
1979 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1980 {
1981 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1982
1983 bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1984 nir_dest_bit_size(instr->dest), offset, result);
1985 }
1986
1987
1988 static void
visit_store_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1989 visit_store_scratch(struct lp_build_nir_context *bld_base,
1990 nir_intrinsic_instr *instr)
1991 {
1992 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1993 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1994 int writemask = instr->const_index[2];
1995 int nc = nir_src_num_components(instr->src[0]);
1996 int bitsize = nir_src_bit_size(instr->src[0]);
1997 bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1998 }
1999
2000
2001 static void
visit_intrinsic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)2002 visit_intrinsic(struct lp_build_nir_context *bld_base,
2003 nir_intrinsic_instr *instr)
2004 {
2005 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
2006 switch (instr->intrinsic) {
2007 case nir_intrinsic_load_input:
2008 visit_load_input(bld_base, instr, result);
2009 break;
2010 case nir_intrinsic_store_output:
2011 visit_store_output(bld_base, instr);
2012 break;
2013 case nir_intrinsic_load_deref:
2014 visit_load_var(bld_base, instr, result);
2015 break;
2016 case nir_intrinsic_store_deref:
2017 visit_store_var(bld_base, instr);
2018 break;
2019 case nir_intrinsic_load_ubo:
2020 visit_load_ubo(bld_base, instr, result);
2021 break;
2022 case nir_intrinsic_load_push_constant:
2023 visit_load_push_constant(bld_base, instr, result);
2024 break;
2025 case nir_intrinsic_load_ssbo:
2026 visit_load_ssbo(bld_base, instr, result);
2027 break;
2028 case nir_intrinsic_store_ssbo:
2029 visit_store_ssbo(bld_base, instr);
2030 break;
2031 case nir_intrinsic_get_ssbo_size:
2032 visit_get_ssbo_size(bld_base, instr, result);
2033 break;
2034 case nir_intrinsic_load_vertex_id:
2035 case nir_intrinsic_load_primitive_id:
2036 case nir_intrinsic_load_instance_id:
2037 case nir_intrinsic_load_base_instance:
2038 case nir_intrinsic_load_base_vertex:
2039 case nir_intrinsic_load_first_vertex:
2040 case nir_intrinsic_load_workgroup_id:
2041 case nir_intrinsic_load_local_invocation_id:
2042 case nir_intrinsic_load_local_invocation_index:
2043 case nir_intrinsic_load_num_workgroups:
2044 case nir_intrinsic_load_invocation_id:
2045 case nir_intrinsic_load_front_face:
2046 case nir_intrinsic_load_draw_id:
2047 case nir_intrinsic_load_workgroup_size:
2048 case nir_intrinsic_load_work_dim:
2049 case nir_intrinsic_load_tess_coord:
2050 case nir_intrinsic_load_tess_level_outer:
2051 case nir_intrinsic_load_tess_level_inner:
2052 case nir_intrinsic_load_patch_vertices_in:
2053 case nir_intrinsic_load_sample_id:
2054 case nir_intrinsic_load_sample_pos:
2055 case nir_intrinsic_load_sample_mask_in:
2056 case nir_intrinsic_load_view_index:
2057 case nir_intrinsic_load_subgroup_invocation:
2058 case nir_intrinsic_load_subgroup_id:
2059 case nir_intrinsic_load_num_subgroups:
2060 bld_base->sysval_intrin(bld_base, instr, result);
2061 break;
2062 case nir_intrinsic_load_helper_invocation:
2063 bld_base->helper_invocation(bld_base, &result[0]);
2064 break;
2065 case nir_intrinsic_discard_if:
2066 case nir_intrinsic_discard:
2067 visit_discard(bld_base, instr);
2068 break;
2069 case nir_intrinsic_emit_vertex:
2070 bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
2071 break;
2072 case nir_intrinsic_end_primitive:
2073 bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
2074 break;
2075 case nir_intrinsic_ssbo_atomic_add:
2076 case nir_intrinsic_ssbo_atomic_imin:
2077 case nir_intrinsic_ssbo_atomic_imax:
2078 case nir_intrinsic_ssbo_atomic_umin:
2079 case nir_intrinsic_ssbo_atomic_umax:
2080 case nir_intrinsic_ssbo_atomic_and:
2081 case nir_intrinsic_ssbo_atomic_or:
2082 case nir_intrinsic_ssbo_atomic_xor:
2083 case nir_intrinsic_ssbo_atomic_exchange:
2084 case nir_intrinsic_ssbo_atomic_comp_swap:
2085 visit_ssbo_atomic(bld_base, instr, result);
2086 break;
2087 case nir_intrinsic_image_deref_load:
2088 visit_load_image(bld_base, instr, result);
2089 break;
2090 case nir_intrinsic_image_deref_store:
2091 visit_store_image(bld_base, instr);
2092 break;
2093 case nir_intrinsic_image_deref_atomic_add:
2094 case nir_intrinsic_image_deref_atomic_imin:
2095 case nir_intrinsic_image_deref_atomic_imax:
2096 case nir_intrinsic_image_deref_atomic_umin:
2097 case nir_intrinsic_image_deref_atomic_umax:
2098 case nir_intrinsic_image_deref_atomic_and:
2099 case nir_intrinsic_image_deref_atomic_or:
2100 case nir_intrinsic_image_deref_atomic_xor:
2101 case nir_intrinsic_image_deref_atomic_exchange:
2102 case nir_intrinsic_image_deref_atomic_comp_swap:
2103 visit_atomic_image(bld_base, instr, result);
2104 break;
2105 case nir_intrinsic_image_deref_size:
2106 visit_image_size(bld_base, instr, result);
2107 break;
2108 case nir_intrinsic_image_deref_samples:
2109 visit_image_samples(bld_base, instr, result);
2110 break;
2111 case nir_intrinsic_load_shared:
2112 visit_shared_load(bld_base, instr, result);
2113 break;
2114 case nir_intrinsic_store_shared:
2115 visit_shared_store(bld_base, instr);
2116 break;
2117 case nir_intrinsic_shared_atomic_add:
2118 case nir_intrinsic_shared_atomic_imin:
2119 case nir_intrinsic_shared_atomic_umin:
2120 case nir_intrinsic_shared_atomic_imax:
2121 case nir_intrinsic_shared_atomic_umax:
2122 case nir_intrinsic_shared_atomic_and:
2123 case nir_intrinsic_shared_atomic_or:
2124 case nir_intrinsic_shared_atomic_xor:
2125 case nir_intrinsic_shared_atomic_exchange:
2126 case nir_intrinsic_shared_atomic_comp_swap:
2127 visit_shared_atomic(bld_base, instr, result);
2128 break;
2129 case nir_intrinsic_control_barrier:
2130 case nir_intrinsic_scoped_barrier:
2131 visit_barrier(bld_base);
2132 break;
2133 case nir_intrinsic_group_memory_barrier:
2134 case nir_intrinsic_memory_barrier:
2135 case nir_intrinsic_memory_barrier_shared:
2136 case nir_intrinsic_memory_barrier_buffer:
2137 case nir_intrinsic_memory_barrier_image:
2138 case nir_intrinsic_memory_barrier_tcs_patch:
2139 break;
2140 case nir_intrinsic_load_kernel_input:
2141 visit_load_kernel_input(bld_base, instr, result);
2142 break;
2143 case nir_intrinsic_load_global:
2144 case nir_intrinsic_load_global_constant:
2145 visit_load_global(bld_base, instr, result);
2146 break;
2147 case nir_intrinsic_store_global:
2148 visit_store_global(bld_base, instr);
2149 break;
2150 case nir_intrinsic_global_atomic_add:
2151 case nir_intrinsic_global_atomic_imin:
2152 case nir_intrinsic_global_atomic_umin:
2153 case nir_intrinsic_global_atomic_imax:
2154 case nir_intrinsic_global_atomic_umax:
2155 case nir_intrinsic_global_atomic_and:
2156 case nir_intrinsic_global_atomic_or:
2157 case nir_intrinsic_global_atomic_xor:
2158 case nir_intrinsic_global_atomic_exchange:
2159 case nir_intrinsic_global_atomic_comp_swap:
2160 visit_global_atomic(bld_base, instr, result);
2161 break;
2162 case nir_intrinsic_vote_all:
2163 case nir_intrinsic_vote_any:
2164 case nir_intrinsic_vote_ieq:
2165 case nir_intrinsic_vote_feq:
2166 bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
2167 break;
2168 case nir_intrinsic_elect:
2169 bld_base->elect(bld_base, result);
2170 break;
2171 case nir_intrinsic_reduce:
2172 case nir_intrinsic_inclusive_scan:
2173 case nir_intrinsic_exclusive_scan:
2174 bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
2175 break;
2176 case nir_intrinsic_ballot:
2177 bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
2178 break;
2179 #if LLVM_VERSION_MAJOR >= 10
2180 case nir_intrinsic_shuffle:
2181 visit_shuffle(bld_base, instr, result);
2182 break;
2183 #endif
2184 case nir_intrinsic_read_invocation:
2185 case nir_intrinsic_read_first_invocation: {
2186 LLVMValueRef src1 = NULL;
2187 LLVMValueRef src0 = get_src(bld_base, instr->src[0]);
2188 if (instr->intrinsic == nir_intrinsic_read_invocation) {
2189 src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
2190 src0 = cast_type(bld_base, src0, nir_type_int, nir_src_bit_size(instr->src[0]));
2191 }
2192 bld_base->read_invocation(bld_base, src0, nir_src_bit_size(instr->src[0]), src1, result);
2193 break;
2194 }
2195 case nir_intrinsic_interp_deref_at_offset:
2196 case nir_intrinsic_interp_deref_at_centroid:
2197 case nir_intrinsic_interp_deref_at_sample:
2198 visit_interp(bld_base, instr, result);
2199 break;
2200 case nir_intrinsic_load_scratch:
2201 visit_load_scratch(bld_base, instr, result);
2202 break;
2203 case nir_intrinsic_store_scratch:
2204 visit_store_scratch(bld_base, instr);
2205 break;
2206 default:
2207 fprintf(stderr, "Unsupported intrinsic: ");
2208 nir_print_instr(&instr->instr, stderr);
2209 fprintf(stderr, "\n");
2210 assert(0);
2211 break;
2212 }
2213 if (result[0]) {
2214 assign_dest(bld_base, &instr->dest, result);
2215 }
2216 }
2217
2218
2219 static void
visit_txs(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)2220 visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2221 {
2222 struct lp_sampler_size_query_params params = { 0 };
2223 LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
2224 LLVMValueRef explicit_lod = NULL;
2225 LLVMValueRef texture_unit_offset = NULL;
2226 for (unsigned i = 0; i < instr->num_srcs; i++) {
2227 switch (instr->src[i].src_type) {
2228 case nir_tex_src_lod:
2229 explicit_lod = cast_type(bld_base,
2230 get_src(bld_base, instr->src[i].src),
2231 nir_type_int, 32);
2232 break;
2233 case nir_tex_src_texture_offset:
2234 texture_unit_offset = get_src(bld_base, instr->src[i].src);
2235 break;
2236 default:
2237 break;
2238 }
2239 }
2240
2241 params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
2242 params.texture_unit = instr->texture_index;
2243 params.explicit_lod = explicit_lod;
2244 params.is_sviewinfo = TRUE;
2245 params.sizes_out = sizes_out;
2246 params.samples_only = (instr->op == nir_texop_texture_samples);
2247 params.texture_unit_offset = texture_unit_offset;
2248
2249 if (instr->op == nir_texop_query_levels)
2250 params.explicit_lod = bld_base->uint_bld.zero;
2251 bld_base->tex_size(bld_base, ¶ms);
2252 assign_dest(bld_base, &instr->dest,
2253 &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
2254 }
2255
2256
2257 static enum lp_sampler_lod_property
lp_build_nir_lod_property(struct lp_build_nir_context * bld_base,nir_src lod_src)2258 lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
2259 nir_src lod_src)
2260 {
2261 enum lp_sampler_lod_property lod_property;
2262
2263 if (nir_src_is_always_uniform(lod_src)) {
2264 lod_property = LP_SAMPLER_LOD_SCALAR;
2265 } else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2266 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2267 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2268 else
2269 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2270 } else {
2271 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2272 }
2273 return lod_property;
2274 }
2275
2276
2277 static void
visit_tex(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)2278 visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2279 {
2280 struct gallivm_state *gallivm = bld_base->base.gallivm;
2281 LLVMBuilderRef builder = gallivm->builder;
2282 LLVMValueRef coords[5];
2283 LLVMValueRef offsets[3] = { NULL };
2284 LLVMValueRef explicit_lod = NULL, ms_index = NULL;
2285 struct lp_sampler_params params;
2286 struct lp_derivatives derivs;
2287 unsigned sample_key = 0;
2288 nir_deref_instr *texture_deref_instr = NULL;
2289 nir_deref_instr *sampler_deref_instr = NULL;
2290 LLVMValueRef texture_unit_offset = NULL;
2291 LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
2292 unsigned lod_src = 0;
2293 LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
2294 unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components;
2295 memset(¶ms, 0, sizeof(params));
2296 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2297
2298 if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
2299 visit_txs(bld_base, instr);
2300 return;
2301 }
2302 if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2303 sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2304 else if (instr->op == nir_texop_tg4) {
2305 sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
2306 sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
2307 } else if (instr->op == nir_texop_lod)
2308 sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
2309 for (unsigned i = 0; i < instr->num_srcs; i++) {
2310 switch (instr->src[i].src_type) {
2311 case nir_tex_src_coord: {
2312 LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
2313 if (coord_vals == 1)
2314 coords[0] = coord;
2315 else {
2316 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2317 coords[chan] = LLVMBuildExtractValue(builder, coord,
2318 chan, "");
2319 }
2320 for (unsigned chan = coord_vals; chan < 5; chan++)
2321 coords[chan] = coord_undef;
2322
2323 break;
2324 }
2325 case nir_tex_src_texture_deref:
2326 texture_deref_instr = nir_src_as_deref(instr->src[i].src);
2327 break;
2328 case nir_tex_src_sampler_deref:
2329 sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
2330 break;
2331 case nir_tex_src_comparator:
2332 sample_key |= LP_SAMPLER_SHADOW;
2333 coords[4] = get_src(bld_base, instr->src[i].src);
2334 coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
2335 break;
2336 case nir_tex_src_bias:
2337 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2338 lod_src = i;
2339 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2340 break;
2341 case nir_tex_src_lod:
2342 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2343 lod_src = i;
2344 if (instr->op == nir_texop_txf)
2345 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2346 else
2347 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2348 break;
2349 case nir_tex_src_ddx: {
2350 int deriv_cnt = instr->coord_components;
2351 if (instr->is_array)
2352 deriv_cnt--;
2353 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2354 if (deriv_cnt == 1)
2355 derivs.ddx[0] = deriv_val;
2356 else
2357 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2358 derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
2359 chan, "");
2360 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2361 derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2362 break;
2363 }
2364 case nir_tex_src_ddy: {
2365 int deriv_cnt = instr->coord_components;
2366 if (instr->is_array)
2367 deriv_cnt--;
2368 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2369 if (deriv_cnt == 1)
2370 derivs.ddy[0] = deriv_val;
2371 else
2372 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2373 derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2374 chan, "");
2375 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2376 derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2377 break;
2378 }
2379 case nir_tex_src_offset: {
2380 int offset_cnt = instr->coord_components;
2381 if (instr->is_array)
2382 offset_cnt--;
2383 LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2384 sample_key |= LP_SAMPLER_OFFSETS;
2385 if (offset_cnt == 1)
2386 offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2387 else {
2388 for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2389 offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2390 chan, "");
2391 offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2392 }
2393 }
2394 break;
2395 }
2396 case nir_tex_src_ms_index:
2397 sample_key |= LP_SAMPLER_FETCH_MS;
2398 ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2399 break;
2400
2401 case nir_tex_src_texture_offset:
2402 texture_unit_offset = get_src(bld_base, instr->src[i].src);
2403 break;
2404 case nir_tex_src_sampler_offset:
2405 break;
2406 default:
2407 assert(0);
2408 break;
2409 }
2410 }
2411 if (!sampler_deref_instr)
2412 sampler_deref_instr = texture_deref_instr;
2413
2414 if (explicit_lod)
2415 lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2416
2417 if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2418 instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2419 for (unsigned chan = 0; chan < coord_vals; ++chan)
2420 coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2421 else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2422 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2423 coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2424
2425 if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2426 /* move layer coord for 1d arrays. */
2427 coords[2] = coords[1];
2428 coords[1] = coord_undef;
2429 }
2430
2431 uint32_t samp_base_index = 0, tex_base_index = 0;
2432 if (!sampler_deref_instr) {
2433 int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2434 if (samp_src_index == -1) {
2435 samp_base_index = instr->sampler_index;
2436 }
2437 }
2438 if (!texture_deref_instr) {
2439 int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2440 if (tex_src_index == -1) {
2441 tex_base_index = instr->texture_index;
2442 }
2443 }
2444
2445 if (instr->op == nir_texop_txd) {
2446 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2447 params.derivs = &derivs;
2448 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2449 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2450 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2451 else
2452 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2453 } else
2454 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2455 }
2456
2457 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2458 params.sample_key = sample_key;
2459 params.offsets = offsets;
2460 params.texture_index = tex_base_index;
2461 params.texture_index_offset = texture_unit_offset;
2462 params.sampler_index = samp_base_index;
2463 params.coords = coords;
2464 params.texel = texel;
2465 params.lod = explicit_lod;
2466 params.ms_index = ms_index;
2467 params.aniso_filter_table = bld_base->aniso_filter_table;
2468 bld_base->tex(bld_base, ¶ms);
2469
2470 if (nir_dest_bit_size(instr->dest) != 32) {
2471 assert(nir_dest_bit_size(instr->dest) == 16);
2472 LLVMTypeRef vec_type = NULL;
2473 bool is_float = false;
2474 switch (nir_alu_type_get_base_type(instr->dest_type)) {
2475 case nir_type_float:
2476 is_float = true;
2477 break;
2478 case nir_type_int:
2479 vec_type = bld_base->int16_bld.vec_type;
2480 break;
2481 case nir_type_uint:
2482 vec_type = bld_base->uint16_bld.vec_type;
2483 break;
2484 default:
2485 unreachable("unexpected alu type");
2486 }
2487 for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
2488 if (is_float) {
2489 texel[i] = lp_build_float_to_half(gallivm, texel[i]);
2490 } else {
2491 texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
2492 texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
2493 }
2494 }
2495 }
2496
2497 assign_dest(bld_base, &instr->dest, texel);
2498 }
2499
2500
2501 static void
visit_ssa_undef(struct lp_build_nir_context * bld_base,const nir_ssa_undef_instr * instr)2502 visit_ssa_undef(struct lp_build_nir_context *bld_base,
2503 const nir_ssa_undef_instr *instr)
2504 {
2505 unsigned num_components = instr->def.num_components;
2506 LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2507 struct lp_build_context *undef_bld = get_int_bld(bld_base, true,
2508 instr->def.bit_size);
2509 for (unsigned i = 0; i < num_components; i++)
2510 undef[i] = LLVMGetUndef(undef_bld->vec_type);
2511 memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2512 assign_ssa_dest(bld_base, &instr->def, undef);
2513 }
2514
2515
2516 static void
visit_jump(struct lp_build_nir_context * bld_base,const nir_jump_instr * instr)2517 visit_jump(struct lp_build_nir_context *bld_base,
2518 const nir_jump_instr *instr)
2519 {
2520 switch (instr->type) {
2521 case nir_jump_break:
2522 bld_base->break_stmt(bld_base);
2523 break;
2524 case nir_jump_continue:
2525 bld_base->continue_stmt(bld_base);
2526 break;
2527 default:
2528 unreachable("Unknown jump instr\n");
2529 }
2530 }
2531
2532
2533 static void
visit_deref(struct lp_build_nir_context * bld_base,nir_deref_instr * instr)2534 visit_deref(struct lp_build_nir_context *bld_base,
2535 nir_deref_instr *instr)
2536 {
2537 if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2538 nir_var_mem_global)) {
2539 return;
2540 }
2541
2542 LLVMValueRef result = NULL;
2543 switch(instr->deref_type) {
2544 case nir_deref_type_var: {
2545 struct hash_entry *entry =
2546 _mesa_hash_table_search(bld_base->vars, instr->var);
2547 result = entry->data;
2548 break;
2549 }
2550 default:
2551 unreachable("Unhandled deref_instr deref type");
2552 }
2553
2554 assign_ssa(bld_base, instr->dest.ssa.index, result);
2555 }
2556
2557
2558 static void
visit_block(struct lp_build_nir_context * bld_base,nir_block * block)2559 visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2560 {
2561 nir_foreach_instr(instr, block)
2562 {
2563 switch (instr->type) {
2564 case nir_instr_type_alu:
2565 visit_alu(bld_base, nir_instr_as_alu(instr));
2566 break;
2567 case nir_instr_type_load_const:
2568 visit_load_const(bld_base, nir_instr_as_load_const(instr));
2569 break;
2570 case nir_instr_type_intrinsic:
2571 visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2572 break;
2573 case nir_instr_type_tex:
2574 visit_tex(bld_base, nir_instr_as_tex(instr));
2575 break;
2576 case nir_instr_type_phi:
2577 assert(0);
2578 break;
2579 case nir_instr_type_ssa_undef:
2580 visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2581 break;
2582 case nir_instr_type_jump:
2583 visit_jump(bld_base, nir_instr_as_jump(instr));
2584 break;
2585 case nir_instr_type_deref:
2586 visit_deref(bld_base, nir_instr_as_deref(instr));
2587 break;
2588 default:
2589 fprintf(stderr, "Unknown NIR instr type: ");
2590 nir_print_instr(instr, stderr);
2591 fprintf(stderr, "\n");
2592 abort();
2593 }
2594 }
2595 }
2596
2597
2598 static void
visit_if(struct lp_build_nir_context * bld_base,nir_if * if_stmt)2599 visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2600 {
2601 LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2602
2603 bld_base->if_cond(bld_base, cond);
2604 visit_cf_list(bld_base, &if_stmt->then_list);
2605
2606 if (!exec_list_is_empty(&if_stmt->else_list)) {
2607 bld_base->else_stmt(bld_base);
2608 visit_cf_list(bld_base, &if_stmt->else_list);
2609 }
2610 bld_base->endif_stmt(bld_base);
2611 }
2612
2613
2614 static void
visit_loop(struct lp_build_nir_context * bld_base,nir_loop * loop)2615 visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2616 {
2617 bld_base->bgnloop(bld_base);
2618 visit_cf_list(bld_base, &loop->body);
2619 bld_base->endloop(bld_base);
2620 }
2621
2622
2623 static void
visit_cf_list(struct lp_build_nir_context * bld_base,struct exec_list * list)2624 visit_cf_list(struct lp_build_nir_context *bld_base,
2625 struct exec_list *list)
2626 {
2627 foreach_list_typed(nir_cf_node, node, node, list)
2628 {
2629 switch (node->type) {
2630 case nir_cf_node_block:
2631 visit_block(bld_base, nir_cf_node_as_block(node));
2632 break;
2633 case nir_cf_node_if:
2634 visit_if(bld_base, nir_cf_node_as_if(node));
2635 break;
2636 case nir_cf_node_loop:
2637 visit_loop(bld_base, nir_cf_node_as_loop(node));
2638 break;
2639 default:
2640 assert(0);
2641 }
2642 }
2643 }
2644
2645
2646 static void
handle_shader_output_decl(struct lp_build_nir_context * bld_base,struct nir_shader * nir,struct nir_variable * variable)2647 handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2648 struct nir_shader *nir,
2649 struct nir_variable *variable)
2650 {
2651 bld_base->emit_var_decl(bld_base, variable);
2652 }
2653
2654
2655 /* vector registers are stored as arrays in LLVM side,
2656 so we can use GEP on them, as to do exec mask stores
2657 we need to operate on a single components.
2658 arrays are:
2659 0.x, 1.x, 2.x, 3.x
2660 0.y, 1.y, 2.y, 3.y
2661 ....
2662 */
2663 static LLVMTypeRef
get_register_type(struct lp_build_nir_context * bld_base,nir_register * reg)2664 get_register_type(struct lp_build_nir_context *bld_base,
2665 nir_register *reg)
2666 {
2667 if (is_aos(bld_base))
2668 return bld_base->base.int_vec_type;
2669
2670 struct lp_build_context *int_bld =
2671 get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2672
2673 LLVMTypeRef type = int_bld->vec_type;
2674 if (reg->num_array_elems)
2675 type = LLVMArrayType(type, reg->num_array_elems);
2676 if (reg->num_components > 1)
2677 type = LLVMArrayType(type, reg->num_components);
2678
2679 return type;
2680 }
2681
2682
lp_build_nir_llvm(struct lp_build_nir_context * bld_base,struct nir_shader * nir)2683 bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
2684 struct nir_shader *nir)
2685 {
2686 struct nir_function *func;
2687
2688 nir_convert_from_ssa(nir, true);
2689 nir_lower_locals_to_regs(nir);
2690 nir_remove_dead_derefs(nir);
2691 nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2692
2693 if (is_aos(bld_base)) {
2694 nir_move_vec_src_uses_to_dest(nir);
2695 nir_lower_vec_to_movs(nir, NULL, NULL);
2696 }
2697
2698 nir_foreach_shader_out_variable(variable, nir)
2699 handle_shader_output_decl(bld_base, nir, variable);
2700
2701 if (nir->info.io_lowered) {
2702 uint64_t outputs_written = nir->info.outputs_written;
2703
2704 while (outputs_written) {
2705 unsigned location = u_bit_scan64(&outputs_written);
2706 nir_variable var = {0};
2707
2708 var.type = glsl_vec4_type();
2709 var.data.mode = nir_var_shader_out;
2710 var.data.location = location;
2711 var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2712 BITFIELD64_MASK(location));
2713 bld_base->emit_var_decl(bld_base, &var);
2714 }
2715 }
2716
2717 bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2718 _mesa_key_pointer_equal);
2719 bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2720 _mesa_key_pointer_equal);
2721 bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2722
2723 func = (struct nir_function *)exec_list_get_head(&nir->functions);
2724
2725 nir_foreach_register(reg, &func->impl->registers) {
2726 LLVMTypeRef type = get_register_type(bld_base, reg);
2727 LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2728 type, "reg");
2729 _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2730 }
2731 nir_index_ssa_defs(func->impl);
2732 bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2733 visit_cf_list(bld_base, &func->impl->body);
2734
2735 free(bld_base->ssa_defs);
2736 ralloc_free(bld_base->vars);
2737 ralloc_free(bld_base->regs);
2738 ralloc_free(bld_base->range_ht);
2739 return true;
2740 }
2741
2742
2743 /* do some basic opts to remove some things we don't want to see. */
2744 void
lp_build_opt_nir(struct nir_shader * nir)2745 lp_build_opt_nir(struct nir_shader *nir)
2746 {
2747 bool progress;
2748
2749 static const struct nir_lower_tex_options lower_tex_options = {
2750 .lower_tg4_offsets = true,
2751 .lower_txp = ~0u,
2752 .lower_invalid_implicit_lod = true,
2753 };
2754 NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2755 NIR_PASS_V(nir, nir_lower_frexp);
2756
2757 NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2758 NIR_PASS_V(nir, nir_lower_fp16_casts);
2759 do {
2760 progress = false;
2761 NIR_PASS(progress, nir, nir_opt_constant_folding);
2762 NIR_PASS(progress, nir, nir_opt_algebraic);
2763 NIR_PASS(progress, nir, nir_lower_pack);
2764
2765 nir_lower_tex_options options = { .lower_invalid_implicit_lod = true, };
2766 NIR_PASS_V(nir, nir_lower_tex, &options);
2767
2768 const nir_lower_subgroups_options subgroups_options = {
2769 .subgroup_size = lp_native_vector_width / 32,
2770 .ballot_bit_size = 32,
2771 .ballot_components = 1,
2772 .lower_to_scalar = true,
2773 .lower_subgroup_masks = true,
2774 .lower_relative_shuffle = true,
2775 };
2776 NIR_PASS(progress, nir, nir_lower_subgroups, &subgroups_options);
2777 } while (progress);
2778
2779 do {
2780 progress = false;
2781 NIR_PASS(progress, nir, nir_opt_algebraic_late);
2782 if (progress) {
2783 NIR_PASS_V(nir, nir_copy_prop);
2784 NIR_PASS_V(nir, nir_opt_dce);
2785 NIR_PASS_V(nir, nir_opt_cse);
2786 }
2787 } while (progress);
2788
2789 if (nir_lower_bool_to_int32(nir)) {
2790 NIR_PASS_V(nir, nir_copy_prop);
2791 NIR_PASS_V(nir, nir_opt_dce);
2792 }
2793 }
2794