1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_arit.h"
28 #include "lp_bld_bitarit.h"
29 #include "lp_bld_const.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_logic.h"
32 #include "lp_bld_quad.h"
33 #include "lp_bld_flow.h"
34 #include "lp_bld_struct.h"
35 #include "lp_bld_debug.h"
36 #include "lp_bld_printf.h"
37 #include "nir_deref.h"
38
39 static void visit_cf_list(struct lp_build_nir_context *bld_base,
40 struct exec_list *list);
41
cast_type(struct lp_build_nir_context * bld_base,LLVMValueRef val,nir_alu_type alu_type,unsigned bit_size)42 static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
43 nir_alu_type alu_type, unsigned bit_size)
44 {
45 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
46 switch (alu_type) {
47 case nir_type_float:
48 switch (bit_size) {
49 case 16:
50 return LLVMBuildBitCast(builder, val, LLVMVectorType(LLVMHalfTypeInContext(bld_base->base.gallivm->context), bld_base->base.type.length), "");
51 case 32:
52 return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
53 case 64:
54 return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
55 default:
56 assert(0);
57 break;
58 }
59 break;
60 case nir_type_int:
61 switch (bit_size) {
62 case 8:
63 return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
64 case 16:
65 return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
66 case 32:
67 return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
68 case 64:
69 return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
70 default:
71 assert(0);
72 break;
73 }
74 break;
75 case nir_type_uint:
76 switch (bit_size) {
77 case 8:
78 return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
79 case 16:
80 return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
81 case 32:
82 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
83 case 64:
84 return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
85 default:
86 assert(0);
87 break;
88 }
89 break;
90 case nir_type_uint32:
91 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
92 default:
93 return val;
94 }
95 return NULL;
96 }
97
98
get_flt_bld(struct lp_build_nir_context * bld_base,unsigned op_bit_size)99 static struct lp_build_context *get_flt_bld(struct lp_build_nir_context *bld_base,
100 unsigned op_bit_size)
101 {
102 if (op_bit_size == 64)
103 return &bld_base->dbl_bld;
104 else
105 return &bld_base->base;
106 }
107
glsl_sampler_to_pipe(int sampler_dim,bool is_array)108 static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
109 {
110 unsigned pipe_target = PIPE_BUFFER;
111 switch (sampler_dim) {
112 case GLSL_SAMPLER_DIM_1D:
113 pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
114 break;
115 case GLSL_SAMPLER_DIM_2D:
116 case GLSL_SAMPLER_DIM_SUBPASS:
117 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
118 break;
119 case GLSL_SAMPLER_DIM_3D:
120 pipe_target = PIPE_TEXTURE_3D;
121 break;
122 case GLSL_SAMPLER_DIM_MS:
123 case GLSL_SAMPLER_DIM_SUBPASS_MS:
124 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
125 break;
126 case GLSL_SAMPLER_DIM_CUBE:
127 pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
128 break;
129 case GLSL_SAMPLER_DIM_RECT:
130 pipe_target = PIPE_TEXTURE_RECT;
131 break;
132 case GLSL_SAMPLER_DIM_BUF:
133 pipe_target = PIPE_BUFFER;
134 break;
135 default:
136 break;
137 }
138 return pipe_target;
139 }
140
get_ssa_src(struct lp_build_nir_context * bld_base,nir_ssa_def * ssa)141 static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
142 {
143 return bld_base->ssa_defs[ssa->index];
144 }
145
146 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src);
147
get_reg_src(struct lp_build_nir_context * bld_base,nir_reg_src src)148 static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
149 {
150 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
151 LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
152 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
153 LLVMValueRef indir_src = NULL;
154 if (src.indirect)
155 indir_src = get_src(bld_base, *src.indirect);
156 return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
157 }
158
get_src(struct lp_build_nir_context * bld_base,nir_src src)159 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src)
160 {
161 if (src.is_ssa)
162 return get_ssa_src(bld_base, src.ssa);
163 else
164 return get_reg_src(bld_base, src.reg);
165 }
166
assign_ssa(struct lp_build_nir_context * bld_base,int idx,LLVMValueRef ptr)167 static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
168 {
169 bld_base->ssa_defs[idx] = ptr;
170 }
171
assign_ssa_dest(struct lp_build_nir_context * bld_base,const nir_ssa_def * ssa,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])172 static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
173 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
174 {
175 assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
176 }
177
assign_reg(struct lp_build_nir_context * bld_base,const nir_reg_dest * reg,unsigned write_mask,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])178 static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
179 unsigned write_mask,
180 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
181 {
182 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
183 LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
184 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
185 LLVMValueRef indir_src = NULL;
186 if (reg->indirect)
187 indir_src = get_src(bld_base, *reg->indirect);
188 bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
189 }
190
assign_dest(struct lp_build_nir_context * bld_base,const nir_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])191 static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
192 {
193 if (dest->is_ssa)
194 assign_ssa_dest(bld_base, &dest->ssa, vals);
195 else
196 assign_reg(bld_base, &dest->reg, 0, vals);
197 }
198
assign_alu_dest(struct lp_build_nir_context * bld_base,const nir_alu_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])199 static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
200 {
201 if (dest->dest.is_ssa)
202 assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
203 else
204 assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
205 }
206
int_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,bool is_unsigned,LLVMValueRef val)207 static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base,
208 uint32_t src_bit_size,
209 bool is_unsigned,
210 LLVMValueRef val)
211 {
212 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
213 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
214 LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero);
215 if (src_bit_size == 64)
216 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
217 return result;
218 }
219
flt_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,LLVMValueRef val)220 static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
221 uint32_t src_bit_size,
222 LLVMValueRef val)
223 {
224 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
225 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
226 LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
227 if (src_bit_size == 64)
228 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
229 return result;
230 }
231
fcmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])232 static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
233 enum pipe_compare_func compare,
234 uint32_t src_bit_size,
235 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
236 {
237 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
238 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
239 LLVMValueRef result;
240
241 if (compare != PIPE_FUNC_NOTEQUAL)
242 result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
243 else
244 result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
245 if (src_bit_size == 64)
246 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
247 return result;
248 }
249
icmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,bool is_unsigned,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])250 static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
251 enum pipe_compare_func compare,
252 bool is_unsigned,
253 uint32_t src_bit_size,
254 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
255 {
256 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
257 struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
258 LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
259 if (src_bit_size < 32)
260 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
261 else if (src_bit_size == 64)
262 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
263 return result;
264 }
265
get_alu_src(struct lp_build_nir_context * bld_base,nir_alu_src src,unsigned num_components)266 static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
267 nir_alu_src src,
268 unsigned num_components)
269 {
270 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
271 struct gallivm_state *gallivm = bld_base->base.gallivm;
272 LLVMValueRef value = get_src(bld_base, src.src);
273 bool need_swizzle = false;
274
275 assert(value);
276 unsigned src_components = nir_src_num_components(src.src);
277 for (unsigned i = 0; i < num_components; ++i) {
278 assert(src.swizzle[i] < src_components);
279 if (src.swizzle[i] != i)
280 need_swizzle = true;
281 }
282
283 if (need_swizzle || num_components != src_components) {
284 if (src_components > 1 && num_components == 1) {
285 value = LLVMBuildExtractValue(gallivm->builder, value,
286 src.swizzle[0], "");
287 } else if (src_components == 1 && num_components > 1) {
288 LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
289 value = lp_nir_array_build_gather_values(builder, values, num_components);
290 } else {
291 LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
292 for (unsigned i = 0; i < num_components; i++)
293 arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
294 value = arr;
295 }
296 }
297 assert(!src.negate);
298 assert(!src.abs);
299 return value;
300 }
301
emit_b2f(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)302 static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
303 LLVMValueRef src0,
304 unsigned bitsize)
305 {
306 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
307 LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
308 LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type,
309 1.0), bld_base->int_bld.vec_type, ""),
310 "");
311 result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
312 switch (bitsize) {
313 case 32:
314 break;
315 case 64:
316 result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, "");
317 break;
318 default:
319 unreachable("unsupported bit size.");
320 }
321 return result;
322 }
323
emit_b2i(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)324 static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
325 LLVMValueRef src0,
326 unsigned bitsize)
327 {
328 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
329 LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
330 lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), "");
331 switch (bitsize) {
332 case 8:
333 return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
334 case 16:
335 return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
336 case 32:
337 return result;
338 case 64:
339 return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
340 default:
341 unreachable("unsupported bit size.");
342 }
343 }
344
emit_b32csel(struct lp_build_nir_context * bld_base,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])345 static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
346 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
347 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
348 {
349 LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
350 LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
351 struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
352 return lp_build_select(bld, v, src[1], src[2]);
353 }
354
split_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)355 static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
356 LLVMValueRef src,
357 bool hi)
358 {
359 struct gallivm_state *gallivm = bld_base->base.gallivm;
360 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
361 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
362 int len = bld_base->base.type.length * 2;
363 for (unsigned i = 0; i < bld_base->base.type.length; i++) {
364 #if UTIL_ARCH_LITTLE_ENDIAN
365 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
366 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
367 #else
368 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
369 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
370 #endif
371 }
372
373 src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
374 return LLVMBuildShuffleVector(gallivm->builder, src,
375 LLVMGetUndef(LLVMTypeOf(src)),
376 LLVMConstVector(hi ? shuffles2 : shuffles,
377 bld_base->base.type.length),
378 "");
379 }
380
381 static LLVMValueRef
merge_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)382 merge_64bit(struct lp_build_nir_context *bld_base,
383 LLVMValueRef input,
384 LLVMValueRef input2)
385 {
386 struct gallivm_state *gallivm = bld_base->base.gallivm;
387 LLVMBuilderRef builder = gallivm->builder;
388 int i;
389 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
390 int len = bld_base->base.type.length * 2;
391 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
392
393 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
394 #if UTIL_ARCH_LITTLE_ENDIAN
395 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
396 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
397 #else
398 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
399 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
400 #endif
401 }
402 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
403 }
404
split_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)405 static LLVMValueRef split_16bit(struct lp_build_nir_context *bld_base,
406 LLVMValueRef src,
407 bool hi)
408 {
409 struct gallivm_state *gallivm = bld_base->base.gallivm;
410 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
411 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
412 int len = bld_base->base.type.length * 2;
413 for (unsigned i = 0; i < bld_base->base.type.length; i++) {
414 #if UTIL_ARCH_LITTLE_ENDIAN
415 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
416 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
417 #else
418 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
419 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
420 #endif
421 }
422
423 src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
424 return LLVMBuildShuffleVector(gallivm->builder, src,
425 LLVMGetUndef(LLVMTypeOf(src)),
426 LLVMConstVector(hi ? shuffles2 : shuffles,
427 bld_base->base.type.length),
428 "");
429 }
430 static LLVMValueRef
merge_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)431 merge_16bit(struct lp_build_nir_context *bld_base,
432 LLVMValueRef input,
433 LLVMValueRef input2)
434 {
435 struct gallivm_state *gallivm = bld_base->base.gallivm;
436 LLVMBuilderRef builder = gallivm->builder;
437 int i;
438 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
439 int len = bld_base->int16_bld.type.length * 2;
440 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
441
442 for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
443 #if UTIL_ARCH_LITTLE_ENDIAN
444 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
445 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
446 #else
447 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
448 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
449 #endif
450 }
451 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
452 }
453
454 static LLVMValueRef
do_int_divide(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)455 do_int_divide(struct lp_build_nir_context *bld_base,
456 bool is_unsigned, unsigned src_bit_size,
457 LLVMValueRef src, LLVMValueRef src2)
458 {
459 struct gallivm_state *gallivm = bld_base->base.gallivm;
460 LLVMBuilderRef builder = gallivm->builder;
461 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
462 struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
463 LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
464 mask_bld->zero);
465
466 if (!is_unsigned) {
467 /* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */
468 div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), "");
469 }
470 LLVMValueRef divisor = LLVMBuildOr(builder,
471 div_mask,
472 src2, "");
473 LLVMValueRef result = lp_build_div(int_bld, src, divisor);
474
475 if (!is_unsigned) {
476 LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
477 return LLVMBuildAnd(builder, not_div_mask, result, "");
478 } else
479 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
480 * may as well do same for idiv */
481 return LLVMBuildOr(builder, div_mask, result, "");
482 }
483
484 static LLVMValueRef
do_int_mod(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)485 do_int_mod(struct lp_build_nir_context *bld_base,
486 bool is_unsigned, unsigned src_bit_size,
487 LLVMValueRef src, LLVMValueRef src2)
488 {
489 struct gallivm_state *gallivm = bld_base->base.gallivm;
490 LLVMBuilderRef builder = gallivm->builder;
491 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
492 LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2,
493 int_bld->zero);
494 LLVMValueRef divisor = LLVMBuildOr(builder,
495 div_mask,
496 src2, "");
497 LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
498 return LLVMBuildOr(builder, div_mask, result, "");
499 }
500
501 static LLVMValueRef
do_quantize_to_f16(struct lp_build_nir_context * bld_base,LLVMValueRef src)502 do_quantize_to_f16(struct lp_build_nir_context *bld_base,
503 LLVMValueRef src)
504 {
505 struct gallivm_state *gallivm = bld_base->base.gallivm;
506 LLVMBuilderRef builder = gallivm->builder;
507 LLVMValueRef result, cond, cond2, temp;
508
509 result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
510 result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
511
512 temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
513 cond = LLVMBuildFCmp(builder, LLVMRealOGT,
514 LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
515 temp, "");
516 cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
517 cond = LLVMBuildAnd(builder, cond, cond2, "");
518 result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
519 return result;
520 }
521
do_alu_action(struct lp_build_nir_context * bld_base,nir_op op,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])522 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
523 nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
524 {
525 struct gallivm_state *gallivm = bld_base->base.gallivm;
526 LLVMBuilderRef builder = gallivm->builder;
527 LLVMValueRef result;
528 enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED;
529 switch (op) {
530 case nir_op_b2f32:
531 result = emit_b2f(bld_base, src[0], 32);
532 break;
533 case nir_op_b2f64:
534 result = emit_b2f(bld_base, src[0], 64);
535 break;
536 case nir_op_b2i8:
537 result = emit_b2i(bld_base, src[0], 8);
538 break;
539 case nir_op_b2i16:
540 result = emit_b2i(bld_base, src[0], 16);
541 break;
542 case nir_op_b2i32:
543 result = emit_b2i(bld_base, src[0], 32);
544 break;
545 case nir_op_b2i64:
546 result = emit_b2i(bld_base, src[0], 64);
547 break;
548 case nir_op_b32csel:
549 result = emit_b32csel(bld_base, src_bit_size, src);
550 break;
551 case nir_op_bit_count:
552 result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
553 break;
554 case nir_op_bitfield_select:
555 result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
556 break;
557 case nir_op_bitfield_reverse:
558 result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
559 break;
560 case nir_op_f2b32:
561 result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
562 break;
563 case nir_op_f2f16:
564 if (src_bit_size[0] == 64)
565 src[0] = LLVMBuildFPTrunc(builder, src[0],
566 bld_base->base.vec_type, "");
567 result = LLVMBuildFPTrunc(builder, src[0],
568 LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
569 break;
570 case nir_op_f2f32:
571 if (src_bit_size[0] < 32)
572 result = LLVMBuildFPExt(builder, src[0],
573 bld_base->base.vec_type, "");
574 else
575 result = LLVMBuildFPTrunc(builder, src[0],
576 bld_base->base.vec_type, "");
577 break;
578 case nir_op_f2f64:
579 result = LLVMBuildFPExt(builder, src[0],
580 bld_base->dbl_bld.vec_type, "");
581 break;
582 case nir_op_f2i32:
583 result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
584 break;
585 case nir_op_f2u32:
586 result = LLVMBuildFPToUI(builder,
587 src[0],
588 bld_base->base.int_vec_type, "");
589 break;
590 case nir_op_f2i64:
591 result = LLVMBuildFPToSI(builder,
592 src[0],
593 bld_base->int64_bld.vec_type, "");
594 break;
595 case nir_op_f2u64:
596 result = LLVMBuildFPToUI(builder,
597 src[0],
598 bld_base->uint64_bld.vec_type, "");
599 break;
600 case nir_op_fabs:
601 result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
602 break;
603 case nir_op_fadd:
604 result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
605 src[0], src[1]);
606 break;
607 case nir_op_fceil:
608 result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
609 break;
610 case nir_op_fcos:
611 result = lp_build_cos(&bld_base->base, src[0]);
612 break;
613 case nir_op_fddx:
614 case nir_op_fddx_coarse:
615 case nir_op_fddx_fine:
616 result = lp_build_ddx(&bld_base->base, src[0]);
617 break;
618 case nir_op_fddy:
619 case nir_op_fddy_coarse:
620 case nir_op_fddy_fine:
621 result = lp_build_ddy(&bld_base->base, src[0]);
622 break;
623 case nir_op_fdiv:
624 result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
625 src[0], src[1]);
626 break;
627 case nir_op_feq32:
628 result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
629 break;
630 case nir_op_fexp2:
631 result = lp_build_exp2(&bld_base->base, src[0]);
632 break;
633 case nir_op_ffloor:
634 result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
635 break;
636 case nir_op_ffma:
637 result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
638 break;
639 case nir_op_ffract: {
640 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
641 LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
642 result = lp_build_sub(flt_bld, src[0], tmp);
643 break;
644 }
645 case nir_op_fge32:
646 result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
647 break;
648 case nir_op_find_lsb: {
649 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
650 result = lp_build_cttz(int_bld, src[0]);
651 if (src_bit_size[0] < 32)
652 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
653 else if (src_bit_size[0] > 32)
654 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
655 break;
656 }
657 case nir_op_flog2:
658 result = lp_build_log2_safe(&bld_base->base, src[0]);
659 break;
660 case nir_op_flt:
661 case nir_op_flt32:
662 result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
663 break;
664 case nir_op_fmin:
665 result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
666 break;
667 case nir_op_fmod: {
668 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
669 result = lp_build_div(flt_bld, src[0], src[1]);
670 result = lp_build_floor(flt_bld, result);
671 result = lp_build_mul(flt_bld, src[1], result);
672 result = lp_build_sub(flt_bld, src[0], result);
673 break;
674 }
675 case nir_op_fmul:
676 result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
677 src[0], src[1]);
678 break;
679 case nir_op_fmax:
680 result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
681 break;
682 case nir_op_fneu32:
683 result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
684 break;
685 case nir_op_fneg:
686 result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
687 break;
688 case nir_op_fpow:
689 result = lp_build_pow(&bld_base->base, src[0], src[1]);
690 break;
691 case nir_op_fquantize2f16:
692 result = do_quantize_to_f16(bld_base, src[0]);
693 break;
694 case nir_op_frcp:
695 result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
696 break;
697 case nir_op_fround_even:
698 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
699 break;
700 case nir_op_frsq:
701 result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
702 break;
703 case nir_op_fsat:
704 result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
705 break;
706 case nir_op_fsign:
707 result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
708 break;
709 case nir_op_fsin:
710 result = lp_build_sin(&bld_base->base, src[0]);
711 break;
712 case nir_op_fsqrt:
713 result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
714 break;
715 case nir_op_ftrunc:
716 result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
717 break;
718 case nir_op_i2b32:
719 result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
720 break;
721 case nir_op_i2f32:
722 result = lp_build_int_to_float(&bld_base->base, src[0]);
723 break;
724 case nir_op_i2f64:
725 result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
726 break;
727 case nir_op_i2i8:
728 result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
729 break;
730 case nir_op_i2i16:
731 if (src_bit_size[0] < 16)
732 result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
733 else
734 result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
735 break;
736 case nir_op_i2i32:
737 if (src_bit_size[0] < 32)
738 result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
739 else
740 result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
741 break;
742 case nir_op_i2i64:
743 result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
744 break;
745 case nir_op_iabs:
746 result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
747 break;
748 case nir_op_iadd:
749 result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
750 src[0], src[1]);
751 break;
752 case nir_op_iand:
753 result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
754 src[0], src[1]);
755 break;
756 case nir_op_idiv:
757 result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
758 break;
759 case nir_op_ieq32:
760 result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
761 break;
762 case nir_op_ige32:
763 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
764 break;
765 case nir_op_ilt32:
766 result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
767 break;
768 case nir_op_imax:
769 result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
770 break;
771 case nir_op_imin:
772 result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
773 break;
774 case nir_op_imul:
775 case nir_op_imul24:
776 result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
777 src[0], src[1]);
778 break;
779 case nir_op_imul_high: {
780 LLVMValueRef hi_bits;
781 lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
782 result = hi_bits;
783 break;
784 }
785 case nir_op_ine32:
786 result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
787 break;
788 case nir_op_ineg:
789 result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
790 break;
791 case nir_op_inot:
792 result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
793 break;
794 case nir_op_ior:
795 result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
796 src[0], src[1]);
797 break;
798 case nir_op_imod:
799 case nir_op_irem:
800 result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
801 break;
802 case nir_op_ishl: {
803 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
804 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
805 if (src_bit_size[0] == 64)
806 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
807 if (src_bit_size[0] < 32)
808 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
809 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
810 result = lp_build_shl(int_bld, src[0], src[1]);
811 break;
812 }
813 case nir_op_ishr: {
814 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
815 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
816 if (src_bit_size[0] == 64)
817 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
818 if (src_bit_size[0] < 32)
819 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
820 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
821 result = lp_build_shr(int_bld, src[0], src[1]);
822 break;
823 }
824 case nir_op_isign:
825 result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
826 break;
827 case nir_op_isub:
828 result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
829 src[0], src[1]);
830 break;
831 case nir_op_ixor:
832 result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
833 src[0], src[1]);
834 break;
835 case nir_op_mov:
836 result = src[0];
837 break;
838 case nir_op_unpack_64_2x32_split_x:
839 result = split_64bit(bld_base, src[0], false);
840 break;
841 case nir_op_unpack_64_2x32_split_y:
842 result = split_64bit(bld_base, src[0], true);
843 break;
844
845 case nir_op_pack_32_2x16_split: {
846 LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
847 result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
848 break;
849 }
850 case nir_op_unpack_32_2x16_split_x:
851 result = split_16bit(bld_base, src[0], false);
852 break;
853 case nir_op_unpack_32_2x16_split_y:
854 result = split_16bit(bld_base, src[0], true);
855 break;
856 case nir_op_pack_64_2x32_split: {
857 LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
858 result = LLVMBuildBitCast(builder, tmp, bld_base->dbl_bld.vec_type, "");
859 break;
860 }
861 case nir_op_u2f32:
862 result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
863 break;
864 case nir_op_u2f64:
865 result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
866 break;
867 case nir_op_u2u8:
868 result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
869 break;
870 case nir_op_u2u16:
871 if (src_bit_size[0] < 16)
872 result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
873 else
874 result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
875 break;
876 case nir_op_u2u32:
877 if (src_bit_size[0] < 32)
878 result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
879 else
880 result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
881 break;
882 case nir_op_u2u64:
883 result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
884 break;
885 case nir_op_udiv:
886 result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
887 break;
888 case nir_op_ufind_msb: {
889 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
890 result = lp_build_ctlz(uint_bld, src[0]);
891 result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
892 if (src_bit_size[0] < 32)
893 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
894 else
895 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
896 break;
897 }
898 case nir_op_uge32:
899 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
900 break;
901 case nir_op_ult32:
902 result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
903 break;
904 case nir_op_umax:
905 result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
906 break;
907 case nir_op_umin:
908 result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
909 break;
910 case nir_op_umod:
911 result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
912 break;
913 case nir_op_umul_high: {
914 LLVMValueRef hi_bits;
915 lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
916 result = hi_bits;
917 break;
918 }
919 case nir_op_ushr: {
920 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
921 if (src_bit_size[0] == 64)
922 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
923 if (src_bit_size[0] < 32)
924 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
925 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
926 result = lp_build_shr(uint_bld, src[0], src[1]);
927 break;
928 }
929 default:
930 assert(0);
931 break;
932 }
933 return result;
934 }
935
visit_alu(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr)936 static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
937 {
938 struct gallivm_state *gallivm = bld_base->base.gallivm;
939 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
940 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
941 unsigned num_components = nir_dest_num_components(instr->dest.dest);
942 unsigned src_components;
943 switch (instr->op) {
944 case nir_op_vec2:
945 case nir_op_vec3:
946 case nir_op_vec4:
947 case nir_op_vec8:
948 case nir_op_vec16:
949 src_components = 1;
950 break;
951 case nir_op_pack_half_2x16:
952 src_components = 2;
953 break;
954 case nir_op_unpack_half_2x16:
955 src_components = 1;
956 break;
957 case nir_op_cube_face_coord:
958 case nir_op_cube_face_index:
959 src_components = 3;
960 break;
961 default:
962 src_components = num_components;
963 break;
964 }
965 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
966 src[i] = get_alu_src(bld_base, instr->src[i], src_components);
967 src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
968 }
969
970 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
971 if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
972 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
973 result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
974 }
975 } else {
976 for (unsigned c = 0; c < num_components; c++) {
977 LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
978
979 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
980 if (num_components > 1) {
981 src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
982 src[i], c, "");
983 } else
984 src_chan[i] = src[i];
985 src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
986 }
987 result[c] = do_alu_action(bld_base, instr->op, src_bit_size, src_chan);
988 result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
989 }
990 }
991 assign_alu_dest(bld_base, &instr->dest, result);
992 }
993
visit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr)994 static void visit_load_const(struct lp_build_nir_context *bld_base,
995 const nir_load_const_instr *instr)
996 {
997 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
998 struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
999 for (unsigned i = 0; i < instr->def.num_components; i++)
1000 result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
1001 assign_ssa_dest(bld_base, &instr->def, result);
1002 }
1003
1004 static void
get_deref_offset(struct lp_build_nir_context * bld_base,nir_deref_instr * instr,bool vs_in,unsigned * vertex_index_out,LLVMValueRef * vertex_index_ref,unsigned * const_out,LLVMValueRef * indir_out)1005 get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1006 bool vs_in, unsigned *vertex_index_out,
1007 LLVMValueRef *vertex_index_ref,
1008 unsigned *const_out, LLVMValueRef *indir_out)
1009 {
1010 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1011 nir_variable *var = nir_deref_instr_get_variable(instr);
1012 nir_deref_path path;
1013 unsigned idx_lvl = 1;
1014
1015 nir_deref_path_init(&path, instr, NULL);
1016
1017 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1018 if (vertex_index_ref) {
1019 *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1020 if (vertex_index_out)
1021 *vertex_index_out = 0;
1022 } else {
1023 *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1024 }
1025 ++idx_lvl;
1026 }
1027
1028 uint32_t const_offset = 0;
1029 LLVMValueRef offset = NULL;
1030
1031 if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1032 assert(instr->deref_type == nir_deref_type_array);
1033 const_offset = nir_src_as_uint(instr->arr.index);
1034 goto out;
1035 }
1036
1037 for (; path.path[idx_lvl]; ++idx_lvl) {
1038 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1039 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1040 unsigned index = path.path[idx_lvl]->strct.index;
1041
1042 for (unsigned i = 0; i < index; i++) {
1043 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1044 const_offset += glsl_count_attribute_slots(ft, vs_in);
1045 }
1046 } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1047 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1048 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1049 const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1050 } else {
1051 LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1052 idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1053 LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1054 idx_src);
1055 if (offset)
1056 offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1057 else
1058 offset = array_off;
1059 }
1060 } else
1061 unreachable("Uhandled deref type in get_deref_instr_offset");
1062 }
1063
1064 out:
1065 nir_deref_path_finish(&path);
1066
1067 if (const_offset && offset)
1068 offset = LLVMBuildAdd(builder, offset,
1069 lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1070 "");
1071 *const_out = const_offset;
1072 *indir_out = offset;
1073 }
1074
1075 static void
visit_load_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1076 visit_load_input(struct lp_build_nir_context *bld_base,
1077 nir_intrinsic_instr *instr,
1078 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1079 {
1080 nir_variable var = {0};
1081 var.data.location = nir_intrinsic_io_semantics(instr).location;
1082 var.data.driver_location = nir_intrinsic_base(instr);
1083 var.data.location_frac = nir_intrinsic_component(instr);
1084
1085 unsigned nc = nir_dest_num_components(instr->dest);
1086 unsigned bit_size = nir_dest_bit_size(instr->dest);
1087
1088 nir_src offset = *nir_get_io_offset_src(instr);
1089 bool indirect = !nir_src_is_const(offset);
1090 if (!indirect)
1091 assert(nir_src_as_uint(offset) == 0);
1092 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1093
1094 bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1095 }
1096
1097 static void
visit_store_output(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1098 visit_store_output(struct lp_build_nir_context *bld_base,
1099 nir_intrinsic_instr *instr)
1100 {
1101 nir_variable var = {0};
1102 var.data.location = nir_intrinsic_io_semantics(instr).location;
1103 var.data.driver_location = nir_intrinsic_base(instr);
1104 var.data.location_frac = nir_intrinsic_component(instr);
1105
1106 unsigned mask = nir_intrinsic_write_mask(instr);
1107
1108 unsigned bit_size = nir_src_bit_size(instr->src[0]);
1109 LLVMValueRef src = get_src(bld_base, instr->src[0]);
1110
1111 nir_src offset = *nir_get_io_offset_src(instr);
1112 bool indirect = !nir_src_is_const(offset);
1113 if (!indirect)
1114 assert(nir_src_as_uint(offset) == 0);
1115 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1116
1117 if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1118 src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1119 src, 0, "");
1120 }
1121
1122 bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1123 bit_size, &var, mask, NULL, 0, indir_index, src);
1124 }
1125
visit_load_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1126 static void visit_load_var(struct lp_build_nir_context *bld_base,
1127 nir_intrinsic_instr *instr,
1128 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1129 {
1130 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1131 nir_variable *var = nir_deref_instr_get_variable(deref);
1132 assert(util_bitcount(deref->modes) == 1);
1133 nir_variable_mode mode = deref->modes;
1134 unsigned const_index;
1135 LLVMValueRef indir_index;
1136 LLVMValueRef indir_vertex_index = NULL;
1137 unsigned vertex_index = 0;
1138 unsigned nc = nir_dest_num_components(instr->dest);
1139 unsigned bit_size = nir_dest_bit_size(instr->dest);
1140 if (var) {
1141 bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1142 var->data.mode == nir_var_shader_in;
1143 bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1144 var->data.mode == nir_var_shader_in;
1145 bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1146 var->data.mode == nir_var_shader_in;
1147 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1148 var->data.mode == nir_var_shader_out && !var->data.patch;
1149 bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1150 var->data.mode == nir_var_shader_in && !var->data.patch;
1151
1152 mode = var->data.mode;
1153
1154 get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1155 &const_index, &indir_index);
1156 }
1157 bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
1158 }
1159
1160 static void
visit_store_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1161 visit_store_var(struct lp_build_nir_context *bld_base,
1162 nir_intrinsic_instr *instr)
1163 {
1164 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1165 nir_variable *var = nir_deref_instr_get_variable(deref);
1166 assert(util_bitcount(deref->modes) == 1);
1167 nir_variable_mode mode = deref->modes;
1168 int writemask = instr->const_index[0];
1169 unsigned bit_size = nir_src_bit_size(instr->src[1]);
1170 LLVMValueRef src = get_src(bld_base, instr->src[1]);
1171 unsigned const_index = 0;
1172 LLVMValueRef indir_index, indir_vertex_index = NULL;
1173 if (var) {
1174 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1175 var->data.mode == nir_var_shader_out && !var->data.patch;
1176 get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
1177 &const_index, &indir_index);
1178 }
1179 bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
1180 }
1181
visit_load_ubo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1182 static void visit_load_ubo(struct lp_build_nir_context *bld_base,
1183 nir_intrinsic_instr *instr,
1184 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1185 {
1186 struct gallivm_state *gallivm = bld_base->base.gallivm;
1187 LLVMBuilderRef builder = gallivm->builder;
1188 LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1189 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1190
1191 bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[1]);
1192 idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1193 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1194 offset_is_uniform, idx, offset, result);
1195 }
1196
visit_load_push_constant(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[4])1197 static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
1198 nir_intrinsic_instr *instr,
1199 LLVMValueRef result[4])
1200 {
1201 struct gallivm_state *gallivm = bld_base->base.gallivm;
1202 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1203 LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1204 bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1205
1206 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1207 offset_is_uniform, idx, offset, result);
1208 }
1209
1210
visit_load_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1211 static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
1212 nir_intrinsic_instr *instr,
1213 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1214 {
1215 LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1216 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1217 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1218 idx, offset, result);
1219 }
1220
visit_store_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1221 static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
1222 nir_intrinsic_instr *instr)
1223 {
1224 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1225 LLVMValueRef idx = get_src(bld_base, instr->src[1]);
1226 LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1227 int writemask = instr->const_index[0];
1228 int nc = nir_src_num_components(instr->src[0]);
1229 int bitsize = nir_src_bit_size(instr->src[0]);
1230 bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val);
1231 }
1232
visit_get_ssbo_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1233 static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1234 nir_intrinsic_instr *instr,
1235 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1236 {
1237 LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1238 result[0] = bld_base->get_ssbo_size(bld_base, idx);
1239 }
1240
visit_ssbo_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1241 static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1242 nir_intrinsic_instr *instr,
1243 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1244 {
1245 LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1246 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1247 LLVMValueRef val = get_src(bld_base, instr->src[2]);
1248 LLVMValueRef val2 = NULL;
1249 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1250 val2 = get_src(bld_base, instr->src[3]);
1251
1252 bld_base->atomic_mem(bld_base, instr->intrinsic, idx, offset, val, val2, &result[0]);
1253
1254 }
1255
visit_load_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1256 static void visit_load_image(struct lp_build_nir_context *bld_base,
1257 nir_intrinsic_instr *instr,
1258 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1259 {
1260 struct gallivm_state *gallivm = bld_base->base.gallivm;
1261 LLVMBuilderRef builder = gallivm->builder;
1262 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1263 nir_variable *var = nir_deref_instr_get_variable(deref);
1264 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1265 LLVMValueRef coords[5];
1266 struct lp_img_params params;
1267 const struct glsl_type *type = glsl_without_array(var->type);
1268 unsigned const_index;
1269 LLVMValueRef indir_index;
1270 get_deref_offset(bld_base, deref, false, NULL, NULL,
1271 &const_index, &indir_index);
1272
1273 memset(¶ms, 0, sizeof(params));
1274 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1275 for (unsigned i = 0; i < 4; i++)
1276 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1277 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1278 coords[2] = coords[1];
1279
1280 params.coords = coords;
1281 params.outdata = result;
1282 params.img_op = LP_IMG_LOAD;
1283 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS)
1284 params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), nir_type_uint, 32);
1285 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1286 params.image_index_offset = indir_index;
1287 bld_base->image_op(bld_base, ¶ms);
1288 }
1289
visit_store_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1290 static void visit_store_image(struct lp_build_nir_context *bld_base,
1291 nir_intrinsic_instr *instr)
1292 {
1293 struct gallivm_state *gallivm = bld_base->base.gallivm;
1294 LLVMBuilderRef builder = gallivm->builder;
1295 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1296 nir_variable *var = nir_deref_instr_get_variable(deref);
1297 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1298 LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1299 LLVMValueRef coords[5];
1300 struct lp_img_params params;
1301 const struct glsl_type *type = glsl_without_array(var->type);
1302 unsigned const_index;
1303 LLVMValueRef indir_index;
1304 get_deref_offset(bld_base, deref, false, NULL, NULL,
1305 &const_index, &indir_index);
1306
1307 memset(¶ms, 0, sizeof(params));
1308 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1309 for (unsigned i = 0; i < 4; i++)
1310 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1311 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1312 coords[2] = coords[1];
1313 params.coords = coords;
1314
1315 for (unsigned i = 0; i < 4; i++) {
1316 params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1317 params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1318 }
1319 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1320 params.ms_index = get_src(bld_base, instr->src[2]);
1321 params.img_op = LP_IMG_STORE;
1322 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1323 params.image_index_offset = indir_index;
1324
1325 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1326 coords[2] = coords[1];
1327 bld_base->image_op(bld_base, ¶ms);
1328 }
1329
visit_atomic_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1330 static void visit_atomic_image(struct lp_build_nir_context *bld_base,
1331 nir_intrinsic_instr *instr,
1332 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1333 {
1334 struct gallivm_state *gallivm = bld_base->base.gallivm;
1335 LLVMBuilderRef builder = gallivm->builder;
1336 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1337 nir_variable *var = nir_deref_instr_get_variable(deref);
1338 struct lp_img_params params;
1339 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1340 LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1341 LLVMValueRef coords[5];
1342 const struct glsl_type *type = glsl_without_array(var->type);
1343 unsigned const_index;
1344 LLVMValueRef indir_index;
1345 get_deref_offset(bld_base, deref, false, NULL, NULL,
1346 &const_index, &indir_index);
1347
1348 memset(¶ms, 0, sizeof(params));
1349
1350 switch (instr->intrinsic) {
1351 case nir_intrinsic_image_deref_atomic_add:
1352 params.op = LLVMAtomicRMWBinOpAdd;
1353 break;
1354 case nir_intrinsic_image_deref_atomic_exchange:
1355 params.op = LLVMAtomicRMWBinOpXchg;
1356 break;
1357 case nir_intrinsic_image_deref_atomic_and:
1358 params.op = LLVMAtomicRMWBinOpAnd;
1359 break;
1360 case nir_intrinsic_image_deref_atomic_or:
1361 params.op = LLVMAtomicRMWBinOpOr;
1362 break;
1363 case nir_intrinsic_image_deref_atomic_xor:
1364 params.op = LLVMAtomicRMWBinOpXor;
1365 break;
1366 case nir_intrinsic_image_deref_atomic_umin:
1367 params.op = LLVMAtomicRMWBinOpUMin;
1368 break;
1369 case nir_intrinsic_image_deref_atomic_umax:
1370 params.op = LLVMAtomicRMWBinOpUMax;
1371 break;
1372 case nir_intrinsic_image_deref_atomic_imin:
1373 params.op = LLVMAtomicRMWBinOpMin;
1374 break;
1375 case nir_intrinsic_image_deref_atomic_imax:
1376 params.op = LLVMAtomicRMWBinOpMax;
1377 break;
1378 default:
1379 break;
1380 }
1381
1382 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1383 for (unsigned i = 0; i < 4; i++)
1384 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1385 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1386 coords[2] = coords[1];
1387 params.coords = coords;
1388 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1389 params.ms_index = get_src(bld_base, instr->src[2]);
1390 if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1391 LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1392 params.indata[0] = in_val;
1393 params.indata2[0] = cas_val;
1394 } else
1395 params.indata[0] = in_val;
1396
1397 params.outdata = result;
1398 params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1399 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1400 params.image_index_offset = indir_index;
1401
1402 bld_base->image_op(bld_base, ¶ms);
1403 }
1404
1405
visit_image_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1406 static void visit_image_size(struct lp_build_nir_context *bld_base,
1407 nir_intrinsic_instr *instr,
1408 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1409 {
1410 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1411 nir_variable *var = nir_deref_instr_get_variable(deref);
1412 struct lp_sampler_size_query_params params = { 0 };
1413 unsigned const_index;
1414 LLVMValueRef indir_index;
1415 const struct glsl_type *type = glsl_without_array(var->type);
1416 get_deref_offset(bld_base, deref, false, NULL, NULL,
1417 &const_index, &indir_index);
1418 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1419 params.texture_unit_offset = indir_index;
1420 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1421 params.sizes_out = result;
1422
1423 bld_base->image_size(bld_base, ¶ms);
1424 }
1425
visit_image_samples(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1426 static void visit_image_samples(struct lp_build_nir_context *bld_base,
1427 nir_intrinsic_instr *instr,
1428 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1429 {
1430 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1431 nir_variable *var = nir_deref_instr_get_variable(deref);
1432 struct lp_sampler_size_query_params params = { 0 };
1433 unsigned const_index;
1434 LLVMValueRef indir_index;
1435 const struct glsl_type *type = glsl_without_array(var->type);
1436 get_deref_offset(bld_base, deref, false, NULL, NULL,
1437 &const_index, &indir_index);
1438
1439 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1440 params.texture_unit_offset = indir_index;
1441 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1442 params.sizes_out = result;
1443 params.samples_only = true;
1444
1445 bld_base->image_size(bld_base, ¶ms);
1446 }
1447
visit_shared_load(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1448 static void visit_shared_load(struct lp_build_nir_context *bld_base,
1449 nir_intrinsic_instr *instr,
1450 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1451 {
1452 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1453 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1454 NULL, offset, result);
1455 }
1456
visit_shared_store(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1457 static void visit_shared_store(struct lp_build_nir_context *bld_base,
1458 nir_intrinsic_instr *instr)
1459 {
1460 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1461 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1462 int writemask = instr->const_index[1];
1463 int nc = nir_src_num_components(instr->src[0]);
1464 int bitsize = nir_src_bit_size(instr->src[0]);
1465 bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val);
1466 }
1467
visit_shared_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1468 static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
1469 nir_intrinsic_instr *instr,
1470 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1471 {
1472 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1473 LLVMValueRef val = get_src(bld_base, instr->src[1]);
1474 LLVMValueRef val2 = NULL;
1475 if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1476 val2 = get_src(bld_base, instr->src[2]);
1477
1478 bld_base->atomic_mem(bld_base, instr->intrinsic, NULL, offset, val, val2, &result[0]);
1479
1480 }
1481
visit_barrier(struct lp_build_nir_context * bld_base)1482 static void visit_barrier(struct lp_build_nir_context *bld_base)
1483 {
1484 bld_base->barrier(bld_base);
1485 }
1486
visit_discard(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1487 static void visit_discard(struct lp_build_nir_context *bld_base,
1488 nir_intrinsic_instr *instr)
1489 {
1490 LLVMValueRef cond = NULL;
1491 if (instr->intrinsic == nir_intrinsic_discard_if) {
1492 cond = get_src(bld_base, instr->src[0]);
1493 cond = cast_type(bld_base, cond, nir_type_int, 32);
1494 }
1495 bld_base->discard(bld_base, cond);
1496 }
1497
visit_load_kernel_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1498 static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1499 nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1500 {
1501 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1502
1503 bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1504 bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1505 nir_src_bit_size(instr->src[0]),
1506 offset_is_uniform, offset, result);
1507 }
1508
visit_load_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1509 static void visit_load_global(struct lp_build_nir_context *bld_base,
1510 nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1511 {
1512 LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1513 bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1514 nir_src_bit_size(instr->src[0]),
1515 addr, result);
1516 }
1517
visit_store_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1518 static void visit_store_global(struct lp_build_nir_context *bld_base,
1519 nir_intrinsic_instr *instr)
1520 {
1521 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1522 int nc = nir_src_num_components(instr->src[0]);
1523 int bitsize = nir_src_bit_size(instr->src[0]);
1524 LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1525 int addr_bitsize = nir_src_bit_size(instr->src[1]);
1526 int writemask = instr->const_index[0];
1527 bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
1528 }
1529
visit_global_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1530 static void visit_global_atomic(struct lp_build_nir_context *bld_base,
1531 nir_intrinsic_instr *instr,
1532 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1533 {
1534 LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1535 LLVMValueRef val = get_src(bld_base, instr->src[1]);
1536 LLVMValueRef val2 = NULL;
1537 int addr_bitsize = nir_src_bit_size(instr->src[0]);
1538 if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1539 val2 = get_src(bld_base, instr->src[2]);
1540
1541 bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize, addr, val, val2, &result[0]);
1542 }
1543
visit_interp(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1544 static void visit_interp(struct lp_build_nir_context *bld_base,
1545 nir_intrinsic_instr *instr,
1546 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1547 {
1548 struct gallivm_state *gallivm = bld_base->base.gallivm;
1549 LLVMBuilderRef builder = gallivm->builder;
1550 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1551 unsigned num_components = nir_dest_num_components(instr->dest);
1552 nir_variable *var = nir_deref_instr_get_variable(deref);
1553 unsigned const_index;
1554 LLVMValueRef indir_index;
1555 LLVMValueRef offsets[2] = { NULL, NULL };
1556 get_deref_offset(bld_base, deref, false, NULL, NULL,
1557 &const_index, &indir_index);
1558 bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1559 bool sample = false;
1560 if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1561 for (unsigned i = 0; i < 2; i++) {
1562 offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1563 offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1564 }
1565 } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1566 offsets[0] = get_src(bld_base, instr->src[1]);
1567 offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1568 sample = true;
1569 }
1570 bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
1571 }
1572
visit_load_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1573 static void visit_load_scratch(struct lp_build_nir_context *bld_base,
1574 nir_intrinsic_instr *instr,
1575 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1576 {
1577 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1578
1579 bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1580 nir_dest_bit_size(instr->dest), offset, result);
1581 }
1582
visit_store_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1583 static void visit_store_scratch(struct lp_build_nir_context *bld_base,
1584 nir_intrinsic_instr *instr)
1585 {
1586 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1587 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1588 int writemask = instr->const_index[2];
1589 int nc = nir_src_num_components(instr->src[0]);
1590 int bitsize = nir_src_bit_size(instr->src[0]);
1591 bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1592 }
1593
1594
visit_intrinsic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1595 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
1596 nir_intrinsic_instr *instr)
1597 {
1598 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
1599 switch (instr->intrinsic) {
1600 case nir_intrinsic_load_input:
1601 visit_load_input(bld_base, instr, result);
1602 break;
1603 case nir_intrinsic_store_output:
1604 visit_store_output(bld_base, instr);
1605 break;
1606 case nir_intrinsic_load_deref:
1607 visit_load_var(bld_base, instr, result);
1608 break;
1609 case nir_intrinsic_store_deref:
1610 visit_store_var(bld_base, instr);
1611 break;
1612 case nir_intrinsic_load_ubo:
1613 visit_load_ubo(bld_base, instr, result);
1614 break;
1615 case nir_intrinsic_load_push_constant:
1616 visit_load_push_constant(bld_base, instr, result);
1617 break;
1618 case nir_intrinsic_load_ssbo:
1619 visit_load_ssbo(bld_base, instr, result);
1620 break;
1621 case nir_intrinsic_store_ssbo:
1622 visit_store_ssbo(bld_base, instr);
1623 break;
1624 case nir_intrinsic_get_ssbo_size:
1625 visit_get_ssbo_size(bld_base, instr, result);
1626 break;
1627 case nir_intrinsic_load_vertex_id:
1628 case nir_intrinsic_load_primitive_id:
1629 case nir_intrinsic_load_instance_id:
1630 case nir_intrinsic_load_base_instance:
1631 case nir_intrinsic_load_base_vertex:
1632 case nir_intrinsic_load_work_group_id:
1633 case nir_intrinsic_load_local_invocation_id:
1634 case nir_intrinsic_load_num_work_groups:
1635 case nir_intrinsic_load_invocation_id:
1636 case nir_intrinsic_load_front_face:
1637 case nir_intrinsic_load_draw_id:
1638 case nir_intrinsic_load_local_group_size:
1639 case nir_intrinsic_load_work_dim:
1640 case nir_intrinsic_load_tess_coord:
1641 case nir_intrinsic_load_tess_level_outer:
1642 case nir_intrinsic_load_tess_level_inner:
1643 case nir_intrinsic_load_patch_vertices_in:
1644 case nir_intrinsic_load_sample_id:
1645 case nir_intrinsic_load_sample_pos:
1646 case nir_intrinsic_load_sample_mask_in:
1647 bld_base->sysval_intrin(bld_base, instr, result);
1648 break;
1649 case nir_intrinsic_load_helper_invocation:
1650 bld_base->helper_invocation(bld_base, &result[0]);
1651 break;
1652 case nir_intrinsic_discard_if:
1653 case nir_intrinsic_discard:
1654 visit_discard(bld_base, instr);
1655 break;
1656 case nir_intrinsic_emit_vertex:
1657 bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
1658 break;
1659 case nir_intrinsic_end_primitive:
1660 bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
1661 break;
1662 case nir_intrinsic_ssbo_atomic_add:
1663 case nir_intrinsic_ssbo_atomic_imin:
1664 case nir_intrinsic_ssbo_atomic_imax:
1665 case nir_intrinsic_ssbo_atomic_umin:
1666 case nir_intrinsic_ssbo_atomic_umax:
1667 case nir_intrinsic_ssbo_atomic_and:
1668 case nir_intrinsic_ssbo_atomic_or:
1669 case nir_intrinsic_ssbo_atomic_xor:
1670 case nir_intrinsic_ssbo_atomic_exchange:
1671 case nir_intrinsic_ssbo_atomic_comp_swap:
1672 visit_ssbo_atomic(bld_base, instr, result);
1673 break;
1674 case nir_intrinsic_image_deref_load:
1675 visit_load_image(bld_base, instr, result);
1676 break;
1677 case nir_intrinsic_image_deref_store:
1678 visit_store_image(bld_base, instr);
1679 break;
1680 case nir_intrinsic_image_deref_atomic_add:
1681 case nir_intrinsic_image_deref_atomic_imin:
1682 case nir_intrinsic_image_deref_atomic_imax:
1683 case nir_intrinsic_image_deref_atomic_umin:
1684 case nir_intrinsic_image_deref_atomic_umax:
1685 case nir_intrinsic_image_deref_atomic_and:
1686 case nir_intrinsic_image_deref_atomic_or:
1687 case nir_intrinsic_image_deref_atomic_xor:
1688 case nir_intrinsic_image_deref_atomic_exchange:
1689 case nir_intrinsic_image_deref_atomic_comp_swap:
1690 visit_atomic_image(bld_base, instr, result);
1691 break;
1692 case nir_intrinsic_image_deref_size:
1693 visit_image_size(bld_base, instr, result);
1694 break;
1695 case nir_intrinsic_image_deref_samples:
1696 visit_image_samples(bld_base, instr, result);
1697 break;
1698 case nir_intrinsic_load_shared:
1699 visit_shared_load(bld_base, instr, result);
1700 break;
1701 case nir_intrinsic_store_shared:
1702 visit_shared_store(bld_base, instr);
1703 break;
1704 case nir_intrinsic_shared_atomic_add:
1705 case nir_intrinsic_shared_atomic_imin:
1706 case nir_intrinsic_shared_atomic_umin:
1707 case nir_intrinsic_shared_atomic_imax:
1708 case nir_intrinsic_shared_atomic_umax:
1709 case nir_intrinsic_shared_atomic_and:
1710 case nir_intrinsic_shared_atomic_or:
1711 case nir_intrinsic_shared_atomic_xor:
1712 case nir_intrinsic_shared_atomic_exchange:
1713 case nir_intrinsic_shared_atomic_comp_swap:
1714 visit_shared_atomic(bld_base, instr, result);
1715 break;
1716 case nir_intrinsic_control_barrier:
1717 visit_barrier(bld_base);
1718 break;
1719 case nir_intrinsic_group_memory_barrier:
1720 case nir_intrinsic_memory_barrier:
1721 case nir_intrinsic_memory_barrier_shared:
1722 case nir_intrinsic_memory_barrier_buffer:
1723 case nir_intrinsic_memory_barrier_image:
1724 case nir_intrinsic_memory_barrier_tcs_patch:
1725 break;
1726 case nir_intrinsic_load_kernel_input:
1727 visit_load_kernel_input(bld_base, instr, result);
1728 break;
1729 case nir_intrinsic_load_global:
1730 case nir_intrinsic_load_global_constant:
1731 visit_load_global(bld_base, instr, result);
1732 break;
1733 case nir_intrinsic_store_global:
1734 visit_store_global(bld_base, instr);
1735 break;
1736 case nir_intrinsic_global_atomic_add:
1737 case nir_intrinsic_global_atomic_imin:
1738 case nir_intrinsic_global_atomic_umin:
1739 case nir_intrinsic_global_atomic_imax:
1740 case nir_intrinsic_global_atomic_umax:
1741 case nir_intrinsic_global_atomic_and:
1742 case nir_intrinsic_global_atomic_or:
1743 case nir_intrinsic_global_atomic_xor:
1744 case nir_intrinsic_global_atomic_exchange:
1745 case nir_intrinsic_global_atomic_comp_swap:
1746 visit_global_atomic(bld_base, instr, result);
1747 break;
1748 case nir_intrinsic_vote_all:
1749 case nir_intrinsic_vote_any:
1750 case nir_intrinsic_vote_ieq:
1751 bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
1752 break;
1753 case nir_intrinsic_interp_deref_at_offset:
1754 case nir_intrinsic_interp_deref_at_centroid:
1755 case nir_intrinsic_interp_deref_at_sample:
1756 visit_interp(bld_base, instr, result);
1757 break;
1758 case nir_intrinsic_load_scratch:
1759 visit_load_scratch(bld_base, instr, result);
1760 break;
1761 case nir_intrinsic_store_scratch:
1762 visit_store_scratch(bld_base, instr);
1763 break;
1764 default:
1765 fprintf(stderr, "Unsupported intrinsic: ");
1766 nir_print_instr(&instr->instr, stderr);
1767 fprintf(stderr, "\n");
1768 assert(0);
1769 break;
1770 }
1771 if (result[0]) {
1772 assign_dest(bld_base, &instr->dest, result);
1773 }
1774 }
1775
visit_txs(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1776 static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1777 {
1778 struct lp_sampler_size_query_params params = { 0 };
1779 LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
1780 LLVMValueRef explicit_lod = NULL;
1781 LLVMValueRef texture_unit_offset = NULL;
1782 for (unsigned i = 0; i < instr->num_srcs; i++) {
1783 switch (instr->src[i].src_type) {
1784 case nir_tex_src_lod:
1785 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1786 break;
1787 case nir_tex_src_texture_offset:
1788 texture_unit_offset = get_src(bld_base, instr->src[i].src);
1789 break;
1790 default:
1791 break;
1792 }
1793 }
1794
1795 params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
1796 params.texture_unit = instr->texture_index;
1797 params.explicit_lod = explicit_lod;
1798 params.is_sviewinfo = TRUE;
1799 params.sizes_out = sizes_out;
1800 params.samples_only = (instr->op == nir_texop_texture_samples);
1801 params.texture_unit_offset = texture_unit_offset;
1802
1803 if (instr->op == nir_texop_query_levels)
1804 params.explicit_lod = bld_base->uint_bld.zero;
1805 bld_base->tex_size(bld_base, ¶ms);
1806 assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
1807 }
1808
lp_build_nir_lod_property(struct lp_build_nir_context * bld_base,nir_src lod_src)1809 static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
1810 nir_src lod_src)
1811 {
1812 enum lp_sampler_lod_property lod_property;
1813
1814 if (nir_src_is_dynamically_uniform(lod_src))
1815 lod_property = LP_SAMPLER_LOD_SCALAR;
1816 else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1817 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
1818 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1819 else
1820 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1821 }
1822 else
1823 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1824 return lod_property;
1825 }
1826
visit_tex(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1827 static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1828 {
1829 struct gallivm_state *gallivm = bld_base->base.gallivm;
1830 LLVMBuilderRef builder = gallivm->builder;
1831 LLVMValueRef coords[5];
1832 LLVMValueRef offsets[3] = { NULL };
1833 LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL;
1834 struct lp_sampler_params params;
1835 struct lp_derivatives derivs;
1836 unsigned sample_key = 0;
1837 nir_deref_instr *texture_deref_instr = NULL;
1838 nir_deref_instr *sampler_deref_instr = NULL;
1839 LLVMValueRef texture_unit_offset = NULL;
1840 LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
1841 unsigned lod_src = 0;
1842 LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
1843
1844 memset(¶ms, 0, sizeof(params));
1845 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1846
1847 if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
1848 visit_txs(bld_base, instr);
1849 return;
1850 }
1851 if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
1852 sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
1853 else if (instr->op == nir_texop_tg4) {
1854 sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
1855 sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
1856 } else if (instr->op == nir_texop_lod)
1857 sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
1858 for (unsigned i = 0; i < instr->num_srcs; i++) {
1859 switch (instr->src[i].src_type) {
1860 case nir_tex_src_coord: {
1861 LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
1862 if (instr->coord_components == 1)
1863 coords[0] = coord;
1864 else {
1865 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1866 coords[chan] = LLVMBuildExtractValue(builder, coord,
1867 chan, "");
1868 }
1869 for (unsigned chan = instr->coord_components; chan < 5; chan++)
1870 coords[chan] = coord_undef;
1871
1872 break;
1873 }
1874 case nir_tex_src_texture_deref:
1875 texture_deref_instr = nir_src_as_deref(instr->src[i].src);
1876 break;
1877 case nir_tex_src_sampler_deref:
1878 sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
1879 break;
1880 case nir_tex_src_projector:
1881 projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32));
1882 break;
1883 case nir_tex_src_comparator:
1884 sample_key |= LP_SAMPLER_SHADOW;
1885 coords[4] = get_src(bld_base, instr->src[i].src);
1886 coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
1887 break;
1888 case nir_tex_src_bias:
1889 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
1890 lod_src = i;
1891 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
1892 break;
1893 case nir_tex_src_lod:
1894 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
1895 lod_src = i;
1896 if (instr->op == nir_texop_txf)
1897 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1898 else
1899 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
1900 break;
1901 case nir_tex_src_ddx: {
1902 int deriv_cnt = instr->coord_components;
1903 if (instr->is_array)
1904 deriv_cnt--;
1905 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
1906 if (deriv_cnt == 1)
1907 derivs.ddx[0] = deriv_val;
1908 else
1909 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1910 derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
1911 chan, "");
1912 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1913 derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
1914 break;
1915 }
1916 case nir_tex_src_ddy: {
1917 int deriv_cnt = instr->coord_components;
1918 if (instr->is_array)
1919 deriv_cnt--;
1920 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
1921 if (deriv_cnt == 1)
1922 derivs.ddy[0] = deriv_val;
1923 else
1924 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1925 derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
1926 chan, "");
1927 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1928 derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
1929 break;
1930 }
1931 case nir_tex_src_offset: {
1932 int offset_cnt = instr->coord_components;
1933 if (instr->is_array)
1934 offset_cnt--;
1935 LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
1936 sample_key |= LP_SAMPLER_OFFSETS;
1937 if (offset_cnt == 1)
1938 offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
1939 else {
1940 for (unsigned chan = 0; chan < offset_cnt; ++chan) {
1941 offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
1942 chan, "");
1943 offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
1944 }
1945 }
1946 break;
1947 }
1948 case nir_tex_src_ms_index:
1949 sample_key |= LP_SAMPLER_FETCH_MS;
1950 ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1951 break;
1952
1953 case nir_tex_src_texture_offset:
1954 texture_unit_offset = get_src(bld_base, instr->src[i].src);
1955 break;
1956 case nir_tex_src_sampler_offset:
1957 break;
1958 default:
1959 assert(0);
1960 break;
1961 }
1962 }
1963 if (!sampler_deref_instr)
1964 sampler_deref_instr = texture_deref_instr;
1965
1966 if (explicit_lod)
1967 lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
1968
1969 if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
1970 instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
1971 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1972 coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
1973 else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
1974 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1975 coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
1976
1977 if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
1978 /* move layer coord for 1d arrays. */
1979 coords[2] = coords[1];
1980 coords[1] = coord_undef;
1981 }
1982
1983 if (projector) {
1984 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1985 coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector);
1986 if (sample_key & LP_SAMPLER_SHADOW)
1987 coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
1988 }
1989
1990 uint32_t samp_base_index = 0, tex_base_index = 0;
1991 if (!sampler_deref_instr) {
1992 int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
1993 if (samp_src_index == -1) {
1994 samp_base_index = instr->sampler_index;
1995 }
1996 }
1997 if (!texture_deref_instr) {
1998 int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
1999 if (tex_src_index == -1) {
2000 tex_base_index = instr->texture_index;
2001 }
2002 }
2003
2004 if (instr->op == nir_texop_txd) {
2005 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2006 params.derivs = &derivs;
2007 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2008 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2009 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2010 else
2011 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2012 } else
2013 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2014 }
2015
2016 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2017 params.sample_key = sample_key;
2018 params.offsets = offsets;
2019 params.texture_index = tex_base_index;
2020 params.texture_index_offset = texture_unit_offset;
2021 params.sampler_index = samp_base_index;
2022 params.coords = coords;
2023 params.texel = texel;
2024 params.lod = explicit_lod;
2025 params.ms_index = ms_index;
2026 bld_base->tex(bld_base, ¶ms);
2027 assign_dest(bld_base, &instr->dest, texel);
2028 }
2029
visit_ssa_undef(struct lp_build_nir_context * bld_base,const nir_ssa_undef_instr * instr)2030 static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
2031 const nir_ssa_undef_instr *instr)
2032 {
2033 unsigned num_components = instr->def.num_components;
2034 LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2035 struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
2036 for (unsigned i = 0; i < num_components; i++)
2037 undef[i] = LLVMGetUndef(undef_bld->vec_type);
2038 assign_ssa_dest(bld_base, &instr->def, undef);
2039 }
2040
visit_jump(struct lp_build_nir_context * bld_base,const nir_jump_instr * instr)2041 static void visit_jump(struct lp_build_nir_context *bld_base,
2042 const nir_jump_instr *instr)
2043 {
2044 switch (instr->type) {
2045 case nir_jump_break:
2046 bld_base->break_stmt(bld_base);
2047 break;
2048 case nir_jump_continue:
2049 bld_base->continue_stmt(bld_base);
2050 break;
2051 default:
2052 unreachable("Unknown jump instr\n");
2053 }
2054 }
2055
visit_deref(struct lp_build_nir_context * bld_base,nir_deref_instr * instr)2056 static void visit_deref(struct lp_build_nir_context *bld_base,
2057 nir_deref_instr *instr)
2058 {
2059 if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2060 nir_var_mem_global))
2061 return;
2062 LLVMValueRef result = NULL;
2063 switch(instr->deref_type) {
2064 case nir_deref_type_var: {
2065 struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var);
2066 result = entry->data;
2067 break;
2068 }
2069 default:
2070 unreachable("Unhandled deref_instr deref type");
2071 }
2072
2073 assign_ssa(bld_base, instr->dest.ssa.index, result);
2074 }
2075
visit_block(struct lp_build_nir_context * bld_base,nir_block * block)2076 static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2077 {
2078 nir_foreach_instr(instr, block)
2079 {
2080 switch (instr->type) {
2081 case nir_instr_type_alu:
2082 visit_alu(bld_base, nir_instr_as_alu(instr));
2083 break;
2084 case nir_instr_type_load_const:
2085 visit_load_const(bld_base, nir_instr_as_load_const(instr));
2086 break;
2087 case nir_instr_type_intrinsic:
2088 visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2089 break;
2090 case nir_instr_type_tex:
2091 visit_tex(bld_base, nir_instr_as_tex(instr));
2092 break;
2093 case nir_instr_type_phi:
2094 assert(0);
2095 break;
2096 case nir_instr_type_ssa_undef:
2097 visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2098 break;
2099 case nir_instr_type_jump:
2100 visit_jump(bld_base, nir_instr_as_jump(instr));
2101 break;
2102 case nir_instr_type_deref:
2103 visit_deref(bld_base, nir_instr_as_deref(instr));
2104 break;
2105 default:
2106 fprintf(stderr, "Unknown NIR instr type: ");
2107 nir_print_instr(instr, stderr);
2108 fprintf(stderr, "\n");
2109 abort();
2110 }
2111 }
2112 }
2113
visit_if(struct lp_build_nir_context * bld_base,nir_if * if_stmt)2114 static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2115 {
2116 LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2117
2118 bld_base->if_cond(bld_base, cond);
2119 visit_cf_list(bld_base, &if_stmt->then_list);
2120
2121 if (!exec_list_is_empty(&if_stmt->else_list)) {
2122 bld_base->else_stmt(bld_base);
2123 visit_cf_list(bld_base, &if_stmt->else_list);
2124 }
2125 bld_base->endif_stmt(bld_base);
2126 }
2127
visit_loop(struct lp_build_nir_context * bld_base,nir_loop * loop)2128 static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2129 {
2130 bld_base->bgnloop(bld_base);
2131 visit_cf_list(bld_base, &loop->body);
2132 bld_base->endloop(bld_base);
2133 }
2134
visit_cf_list(struct lp_build_nir_context * bld_base,struct exec_list * list)2135 static void visit_cf_list(struct lp_build_nir_context *bld_base,
2136 struct exec_list *list)
2137 {
2138 foreach_list_typed(nir_cf_node, node, node, list)
2139 {
2140 switch (node->type) {
2141 case nir_cf_node_block:
2142 visit_block(bld_base, nir_cf_node_as_block(node));
2143 break;
2144
2145 case nir_cf_node_if:
2146 visit_if(bld_base, nir_cf_node_as_if(node));
2147 break;
2148
2149 case nir_cf_node_loop:
2150 visit_loop(bld_base, nir_cf_node_as_loop(node));
2151 break;
2152
2153 default:
2154 assert(0);
2155 }
2156 }
2157 }
2158
2159 static void
handle_shader_output_decl(struct lp_build_nir_context * bld_base,struct nir_shader * nir,struct nir_variable * variable)2160 handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2161 struct nir_shader *nir,
2162 struct nir_variable *variable)
2163 {
2164 bld_base->emit_var_decl(bld_base, variable);
2165 }
2166
2167 /* vector registers are stored as arrays in LLVM side,
2168 so we can use GEP on them, as to do exec mask stores
2169 we need to operate on a single components.
2170 arrays are:
2171 0.x, 1.x, 2.x, 3.x
2172 0.y, 1.y, 2.y, 3.y
2173 ....
2174 */
get_register_type(struct lp_build_nir_context * bld_base,nir_register * reg)2175 static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
2176 nir_register *reg)
2177 {
2178 struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size);
2179
2180 LLVMTypeRef type = int_bld->vec_type;
2181 if (reg->num_array_elems)
2182 type = LLVMArrayType(type, reg->num_array_elems);
2183 if (reg->num_components > 1)
2184 type = LLVMArrayType(type, reg->num_components);
2185
2186 return type;
2187 }
2188
2189
lp_build_nir_llvm(struct lp_build_nir_context * bld_base,struct nir_shader * nir)2190 bool lp_build_nir_llvm(
2191 struct lp_build_nir_context *bld_base,
2192 struct nir_shader *nir)
2193 {
2194 struct nir_function *func;
2195
2196 nir_convert_from_ssa(nir, true);
2197 nir_lower_locals_to_regs(nir);
2198 nir_remove_dead_derefs(nir);
2199 nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2200
2201 nir_foreach_shader_out_variable(variable, nir)
2202 handle_shader_output_decl(bld_base, nir, variable);
2203
2204 if (nir->info.io_lowered) {
2205 uint64_t outputs_written = nir->info.outputs_written;
2206
2207 while (outputs_written) {
2208 unsigned location = u_bit_scan64(&outputs_written);
2209 nir_variable var = {0};
2210
2211 var.type = glsl_vec4_type();
2212 var.data.mode = nir_var_shader_out;
2213 var.data.location = location;
2214 var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2215 BITFIELD64_MASK(location));
2216 bld_base->emit_var_decl(bld_base, &var);
2217 }
2218 }
2219
2220 bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2221 _mesa_key_pointer_equal);
2222 bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2223 _mesa_key_pointer_equal);
2224
2225 func = (struct nir_function *)exec_list_get_head(&nir->functions);
2226
2227 nir_foreach_register(reg, &func->impl->registers) {
2228 LLVMTypeRef type = get_register_type(bld_base, reg);
2229 LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2230 type, "reg");
2231 _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2232 }
2233 nir_index_ssa_defs(func->impl);
2234 bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2235 visit_cf_list(bld_base, &func->impl->body);
2236
2237 free(bld_base->ssa_defs);
2238 ralloc_free(bld_base->vars);
2239 ralloc_free(bld_base->regs);
2240 return true;
2241 }
2242
2243 /* do some basic opts to remove some things we don't want to see. */
lp_build_opt_nir(struct nir_shader * nir)2244 void lp_build_opt_nir(struct nir_shader *nir)
2245 {
2246 bool progress;
2247
2248 static const struct nir_lower_tex_options lower_tex_options = {
2249 .lower_tg4_offsets = true,
2250 };
2251 NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2252 NIR_PASS_V(nir, nir_lower_frexp);
2253
2254 NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2255
2256 do {
2257 progress = false;
2258 NIR_PASS_V(nir, nir_opt_constant_folding);
2259 NIR_PASS_V(nir, nir_opt_algebraic);
2260 NIR_PASS_V(nir, nir_lower_pack);
2261
2262 nir_lower_tex_options options = { .lower_tex_without_implicit_lod = true };
2263 NIR_PASS_V(nir, nir_lower_tex, &options);
2264 } while (progress);
2265 nir_lower_bool_to_int32(nir);
2266 }
2267