1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28 #include "util/u_prim.h"
29
30 #include "nv50_ir.h"
31 #include "nv50_ir_from_common.h"
32 #include "nv50_ir_lowering_helper.h"
33 #include "nv50_ir_target.h"
34 #include "nv50_ir_util.h"
35 #include "tgsi/tgsi_from_mesa.h"
36
37 #include <unordered_map>
38 #include <cstring>
39 #include <list>
40 #include <vector>
41
42 namespace {
43
44 using namespace nv50_ir;
45
46 int
type_size(const struct glsl_type * type,bool bindless)47 type_size(const struct glsl_type *type, bool bindless)
48 {
49 return glsl_count_attribute_slots(type, false);
50 }
51
52 static void
function_temp_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)53 function_temp_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
54 {
55 assert(glsl_type_is_vector_or_scalar(type));
56
57 if (glsl_type_is_scalar(type)) {
58 glsl_get_natural_size_align_bytes(type, size, align);
59 } else {
60 unsigned comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
61 unsigned length = glsl_get_vector_elements(type);
62
63 *size = comp_size * length;
64 *align = 0x10;
65 }
66 }
67
68 class Converter : public ConverterCommon
69 {
70 public:
71 Converter(Program *, nir_shader *, nv50_ir_prog_info *, nv50_ir_prog_info_out *);
72
73 bool run();
74 private:
75 typedef std::vector<LValue*> LValues;
76 typedef std::unordered_map<unsigned, LValues> NirDefMap;
77 typedef std::unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
78 typedef std::unordered_map<unsigned, BasicBlock*> NirBlockMap;
79
80 CacheMode convert(enum gl_access_qualifier);
81 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
82 LValues& convert(nir_alu_dest *);
83 BasicBlock* convert(nir_block *);
84 LValues& convert(nir_dest *);
85 SVSemantic convert(nir_intrinsic_op);
86 Value* convert(nir_load_const_instr*, uint8_t);
87 LValues& convert(nir_register *);
88 LValues& convert(nir_ssa_def *);
89
90 Value* getSrc(nir_alu_src *, uint8_t component = 0);
91 Value* getSrc(nir_register *, uint8_t);
92 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
93 Value* getSrc(nir_ssa_def *, uint8_t);
94
95 // returned value is the constant part of the given source (either the
96 // nir_src or the selected source component of an intrinsic). Even though
97 // this is mostly an optimization to be able to skip indirects in a few
98 // cases, sometimes we require immediate values or set some fileds on
99 // instructions (e.g. tex) in order for codegen to consume those.
100 // If the found value has not a constant part, the Value gets returned
101 // through the Value parameter.
102 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
103 // isScalar indicates that the addressing is scalar, vec4 addressing is
104 // assumed otherwise
105 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
106 bool isScalar = false);
107
108 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
109
110 void setInterpolate(nv50_ir_varying *,
111 uint8_t,
112 bool centroid,
113 unsigned semantics);
114
115 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
116 uint8_t c, Value *indirect0 = NULL,
117 Value *indirect1 = NULL, bool patch = false);
118 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
119 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
120 Value *indirect1 = NULL);
121
122 bool isFloatType(nir_alu_type);
123 bool isSignedType(nir_alu_type);
124 bool isResultFloat(nir_op);
125 bool isResultSigned(nir_op);
126
127 DataType getDType(nir_alu_instr *);
128 DataType getDType(nir_intrinsic_instr *);
129 DataType getDType(nir_op, uint8_t);
130
131 DataFile getFile(nir_intrinsic_op);
132
133 std::vector<DataType> getSTypes(nir_alu_instr *);
134 DataType getSType(nir_src &, bool isFloat, bool isSigned);
135
136 operation getOperation(nir_intrinsic_op);
137 operation getOperation(nir_op);
138 operation getOperation(nir_texop);
139 operation preOperationNeeded(nir_op);
140
141 int getSubOp(nir_intrinsic_op);
142 int getSubOp(nir_op);
143
144 CondCode getCondCode(nir_op);
145
146 bool assignSlots();
147 bool parseNIR();
148
149 bool visit(nir_alu_instr *);
150 bool visit(nir_block *);
151 bool visit(nir_cf_node *);
152 bool visit(nir_function *);
153 bool visit(nir_if *);
154 bool visit(nir_instr *);
155 bool visit(nir_intrinsic_instr *);
156 bool visit(nir_jump_instr *);
157 bool visit(nir_load_const_instr*);
158 bool visit(nir_loop *);
159 bool visit(nir_ssa_undef_instr *);
160 bool visit(nir_tex_instr *);
161
162 // tex stuff
163 unsigned int getNIRArgCount(TexInstruction::Target&);
164
165 nir_shader *nir;
166
167 NirDefMap ssaDefs;
168 NirDefMap regDefs;
169 ImmediateMap immediates;
170 NirBlockMap blocks;
171 unsigned int curLoopDepth;
172 unsigned int curIfDepth;
173
174 BasicBlock *exit;
175 Value *zero;
176 Instruction *immInsertPos;
177
178 int clipVertexOutput;
179
180 union {
181 struct {
182 Value *position;
183 } fp;
184 };
185 };
186
Converter(Program * prog,nir_shader * nir,nv50_ir_prog_info * info,nv50_ir_prog_info_out * info_out)187 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info,
188 nv50_ir_prog_info_out *info_out)
189 : ConverterCommon(prog, info, info_out),
190 nir(nir),
191 curLoopDepth(0),
192 curIfDepth(0),
193 exit(NULL),
194 immInsertPos(NULL),
195 clipVertexOutput(-1)
196 {
197 zero = mkImm((uint32_t)0);
198 }
199
200 BasicBlock *
convert(nir_block * block)201 Converter::convert(nir_block *block)
202 {
203 NirBlockMap::iterator it = blocks.find(block->index);
204 if (it != blocks.end())
205 return it->second;
206
207 BasicBlock *bb = new BasicBlock(func);
208 blocks[block->index] = bb;
209 return bb;
210 }
211
212 bool
isFloatType(nir_alu_type type)213 Converter::isFloatType(nir_alu_type type)
214 {
215 return nir_alu_type_get_base_type(type) == nir_type_float;
216 }
217
218 bool
isSignedType(nir_alu_type type)219 Converter::isSignedType(nir_alu_type type)
220 {
221 return nir_alu_type_get_base_type(type) == nir_type_int;
222 }
223
224 bool
isResultFloat(nir_op op)225 Converter::isResultFloat(nir_op op)
226 {
227 const nir_op_info &info = nir_op_infos[op];
228 if (info.output_type != nir_type_invalid)
229 return isFloatType(info.output_type);
230
231 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
232 assert(false);
233 return true;
234 }
235
236 bool
isResultSigned(nir_op op)237 Converter::isResultSigned(nir_op op)
238 {
239 switch (op) {
240 // there is no umul and we get wrong results if we treat all muls as signed
241 case nir_op_imul:
242 case nir_op_inot:
243 return false;
244 default:
245 const nir_op_info &info = nir_op_infos[op];
246 if (info.output_type != nir_type_invalid)
247 return isSignedType(info.output_type);
248 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
249 assert(false);
250 return true;
251 }
252 }
253
254 DataType
getDType(nir_alu_instr * insn)255 Converter::getDType(nir_alu_instr *insn)
256 {
257 if (insn->dest.dest.is_ssa)
258 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
259 else
260 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
261 }
262
263 DataType
getDType(nir_intrinsic_instr * insn)264 Converter::getDType(nir_intrinsic_instr *insn)
265 {
266 bool isFloat, isSigned;
267 switch (insn->intrinsic) {
268 case nir_intrinsic_bindless_image_atomic_fadd:
269 case nir_intrinsic_global_atomic_fadd:
270 case nir_intrinsic_image_atomic_fadd:
271 case nir_intrinsic_shared_atomic_fadd:
272 case nir_intrinsic_ssbo_atomic_fadd:
273 isFloat = true;
274 isSigned = false;
275 break;
276 case nir_intrinsic_shared_atomic_imax:
277 case nir_intrinsic_shared_atomic_imin:
278 case nir_intrinsic_ssbo_atomic_imax:
279 case nir_intrinsic_ssbo_atomic_imin:
280 isFloat = false;
281 isSigned = true;
282 break;
283 default:
284 isFloat = false;
285 isSigned = false;
286 break;
287 }
288
289 if (insn->dest.is_ssa)
290 return typeOfSize(insn->dest.ssa.bit_size / 8, isFloat, isSigned);
291 else
292 return typeOfSize(insn->dest.reg.reg->bit_size / 8, isFloat, isSigned);
293 }
294
295 DataType
getDType(nir_op op,uint8_t bitSize)296 Converter::getDType(nir_op op, uint8_t bitSize)
297 {
298 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
299 if (ty == TYPE_NONE) {
300 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
301 assert(false);
302 }
303 return ty;
304 }
305
306 std::vector<DataType>
getSTypes(nir_alu_instr * insn)307 Converter::getSTypes(nir_alu_instr *insn)
308 {
309 const nir_op_info &info = nir_op_infos[insn->op];
310 std::vector<DataType> res(info.num_inputs);
311
312 for (uint8_t i = 0; i < info.num_inputs; ++i) {
313 if (info.input_types[i] != nir_type_invalid) {
314 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
315 } else {
316 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
317 assert(false);
318 res[i] = TYPE_NONE;
319 break;
320 }
321 }
322
323 return res;
324 }
325
326 DataType
getSType(nir_src & src,bool isFloat,bool isSigned)327 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
328 {
329 uint8_t bitSize;
330 if (src.is_ssa)
331 bitSize = src.ssa->bit_size;
332 else
333 bitSize = src.reg.reg->bit_size;
334
335 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
336 if (ty == TYPE_NONE) {
337 const char *str;
338 if (isFloat)
339 str = "float";
340 else if (isSigned)
341 str = "int";
342 else
343 str = "uint";
344 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
345 assert(false);
346 }
347 return ty;
348 }
349
350 DataFile
getFile(nir_intrinsic_op op)351 Converter::getFile(nir_intrinsic_op op)
352 {
353 switch (op) {
354 case nir_intrinsic_load_global:
355 case nir_intrinsic_store_global:
356 case nir_intrinsic_load_global_constant:
357 return FILE_MEMORY_GLOBAL;
358 case nir_intrinsic_load_scratch:
359 case nir_intrinsic_store_scratch:
360 return FILE_MEMORY_LOCAL;
361 case nir_intrinsic_load_shared:
362 case nir_intrinsic_store_shared:
363 return FILE_MEMORY_SHARED;
364 case nir_intrinsic_load_kernel_input:
365 return FILE_SHADER_INPUT;
366 default:
367 ERROR("couldn't get DateFile for op %s\n", nir_intrinsic_infos[op].name);
368 assert(false);
369 }
370 return FILE_NULL;
371 }
372
373 operation
getOperation(nir_op op)374 Converter::getOperation(nir_op op)
375 {
376 switch (op) {
377 // basic ops with float and int variants
378 case nir_op_fabs:
379 case nir_op_iabs:
380 return OP_ABS;
381 case nir_op_fadd:
382 case nir_op_iadd:
383 return OP_ADD;
384 case nir_op_iand:
385 return OP_AND;
386 case nir_op_ifind_msb:
387 case nir_op_ufind_msb:
388 return OP_BFIND;
389 case nir_op_fceil:
390 return OP_CEIL;
391 case nir_op_fcos:
392 return OP_COS;
393 case nir_op_f2f32:
394 case nir_op_f2f64:
395 case nir_op_f2i32:
396 case nir_op_f2i64:
397 case nir_op_f2u32:
398 case nir_op_f2u64:
399 case nir_op_i2f32:
400 case nir_op_i2f64:
401 case nir_op_i2i32:
402 case nir_op_i2i64:
403 case nir_op_u2f32:
404 case nir_op_u2f64:
405 case nir_op_u2u32:
406 case nir_op_u2u64:
407 return OP_CVT;
408 case nir_op_fddx:
409 case nir_op_fddx_coarse:
410 case nir_op_fddx_fine:
411 return OP_DFDX;
412 case nir_op_fddy:
413 case nir_op_fddy_coarse:
414 case nir_op_fddy_fine:
415 return OP_DFDY;
416 case nir_op_fdiv:
417 case nir_op_idiv:
418 case nir_op_udiv:
419 return OP_DIV;
420 case nir_op_fexp2:
421 return OP_EX2;
422 case nir_op_ffloor:
423 return OP_FLOOR;
424 case nir_op_ffma:
425 /* No FMA op pre-nvc0 */
426 if (info->target < 0xc0)
427 return OP_MAD;
428 return OP_FMA;
429 case nir_op_flog2:
430 return OP_LG2;
431 case nir_op_fmax:
432 case nir_op_imax:
433 case nir_op_umax:
434 return OP_MAX;
435 case nir_op_pack_64_2x32_split:
436 return OP_MERGE;
437 case nir_op_fmin:
438 case nir_op_imin:
439 case nir_op_umin:
440 return OP_MIN;
441 case nir_op_fmod:
442 case nir_op_imod:
443 case nir_op_umod:
444 case nir_op_frem:
445 case nir_op_irem:
446 return OP_MOD;
447 case nir_op_fmul:
448 case nir_op_imul:
449 case nir_op_imul_high:
450 case nir_op_umul_high:
451 return OP_MUL;
452 case nir_op_fneg:
453 case nir_op_ineg:
454 return OP_NEG;
455 case nir_op_inot:
456 return OP_NOT;
457 case nir_op_ior:
458 return OP_OR;
459 case nir_op_fpow:
460 return OP_POW;
461 case nir_op_frcp:
462 return OP_RCP;
463 case nir_op_frsq:
464 return OP_RSQ;
465 case nir_op_fsat:
466 return OP_SAT;
467 case nir_op_feq32:
468 case nir_op_ieq32:
469 case nir_op_fge32:
470 case nir_op_ige32:
471 case nir_op_uge32:
472 case nir_op_flt32:
473 case nir_op_ilt32:
474 case nir_op_ult32:
475 case nir_op_fneu32:
476 case nir_op_ine32:
477 return OP_SET;
478 case nir_op_ishl:
479 return OP_SHL;
480 case nir_op_ishr:
481 case nir_op_ushr:
482 return OP_SHR;
483 case nir_op_fsin:
484 return OP_SIN;
485 case nir_op_fsqrt:
486 return OP_SQRT;
487 case nir_op_ftrunc:
488 return OP_TRUNC;
489 case nir_op_ixor:
490 return OP_XOR;
491 default:
492 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
493 assert(false);
494 return OP_NOP;
495 }
496 }
497
498 operation
getOperation(nir_texop op)499 Converter::getOperation(nir_texop op)
500 {
501 switch (op) {
502 case nir_texop_tex:
503 return OP_TEX;
504 case nir_texop_lod:
505 return OP_TXLQ;
506 case nir_texop_txb:
507 return OP_TXB;
508 case nir_texop_txd:
509 return OP_TXD;
510 case nir_texop_txf:
511 case nir_texop_txf_ms:
512 return OP_TXF;
513 case nir_texop_tg4:
514 return OP_TXG;
515 case nir_texop_txl:
516 return OP_TXL;
517 case nir_texop_query_levels:
518 case nir_texop_texture_samples:
519 case nir_texop_txs:
520 return OP_TXQ;
521 default:
522 ERROR("couldn't get operation for nir_texop %u\n", op);
523 assert(false);
524 return OP_NOP;
525 }
526 }
527
528 operation
getOperation(nir_intrinsic_op op)529 Converter::getOperation(nir_intrinsic_op op)
530 {
531 switch (op) {
532 case nir_intrinsic_emit_vertex:
533 return OP_EMIT;
534 case nir_intrinsic_end_primitive:
535 return OP_RESTART;
536 case nir_intrinsic_bindless_image_atomic_add:
537 case nir_intrinsic_image_atomic_add:
538 case nir_intrinsic_bindless_image_atomic_and:
539 case nir_intrinsic_image_atomic_and:
540 case nir_intrinsic_bindless_image_atomic_comp_swap:
541 case nir_intrinsic_image_atomic_comp_swap:
542 case nir_intrinsic_bindless_image_atomic_exchange:
543 case nir_intrinsic_image_atomic_exchange:
544 case nir_intrinsic_bindless_image_atomic_imax:
545 case nir_intrinsic_image_atomic_imax:
546 case nir_intrinsic_bindless_image_atomic_umax:
547 case nir_intrinsic_image_atomic_umax:
548 case nir_intrinsic_bindless_image_atomic_imin:
549 case nir_intrinsic_image_atomic_imin:
550 case nir_intrinsic_bindless_image_atomic_umin:
551 case nir_intrinsic_image_atomic_umin:
552 case nir_intrinsic_bindless_image_atomic_or:
553 case nir_intrinsic_image_atomic_or:
554 case nir_intrinsic_bindless_image_atomic_xor:
555 case nir_intrinsic_image_atomic_xor:
556 case nir_intrinsic_bindless_image_atomic_inc_wrap:
557 case nir_intrinsic_image_atomic_inc_wrap:
558 case nir_intrinsic_bindless_image_atomic_dec_wrap:
559 case nir_intrinsic_image_atomic_dec_wrap:
560 return OP_SUREDP;
561 case nir_intrinsic_bindless_image_load:
562 case nir_intrinsic_image_load:
563 return OP_SULDP;
564 case nir_intrinsic_bindless_image_samples:
565 case nir_intrinsic_image_samples:
566 case nir_intrinsic_bindless_image_size:
567 case nir_intrinsic_image_size:
568 return OP_SUQ;
569 case nir_intrinsic_bindless_image_store:
570 case nir_intrinsic_image_store:
571 return OP_SUSTP;
572 default:
573 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
574 assert(false);
575 return OP_NOP;
576 }
577 }
578
579 operation
preOperationNeeded(nir_op op)580 Converter::preOperationNeeded(nir_op op)
581 {
582 switch (op) {
583 case nir_op_fcos:
584 case nir_op_fsin:
585 return OP_PRESIN;
586 default:
587 return OP_NOP;
588 }
589 }
590
591 int
getSubOp(nir_op op)592 Converter::getSubOp(nir_op op)
593 {
594 switch (op) {
595 case nir_op_imul_high:
596 case nir_op_umul_high:
597 return NV50_IR_SUBOP_MUL_HIGH;
598 case nir_op_ishl:
599 case nir_op_ishr:
600 case nir_op_ushr:
601 return NV50_IR_SUBOP_SHIFT_WRAP;
602 default:
603 return 0;
604 }
605 }
606
607 int
getSubOp(nir_intrinsic_op op)608 Converter::getSubOp(nir_intrinsic_op op)
609 {
610 switch (op) {
611 case nir_intrinsic_bindless_image_atomic_add:
612 case nir_intrinsic_global_atomic_add:
613 case nir_intrinsic_image_atomic_add:
614 case nir_intrinsic_shared_atomic_add:
615 case nir_intrinsic_ssbo_atomic_add:
616 return NV50_IR_SUBOP_ATOM_ADD;
617 case nir_intrinsic_bindless_image_atomic_fadd:
618 case nir_intrinsic_global_atomic_fadd:
619 case nir_intrinsic_image_atomic_fadd:
620 case nir_intrinsic_shared_atomic_fadd:
621 case nir_intrinsic_ssbo_atomic_fadd:
622 return NV50_IR_SUBOP_ATOM_ADD;
623 case nir_intrinsic_bindless_image_atomic_and:
624 case nir_intrinsic_global_atomic_and:
625 case nir_intrinsic_image_atomic_and:
626 case nir_intrinsic_shared_atomic_and:
627 case nir_intrinsic_ssbo_atomic_and:
628 return NV50_IR_SUBOP_ATOM_AND;
629 case nir_intrinsic_bindless_image_atomic_comp_swap:
630 case nir_intrinsic_global_atomic_comp_swap:
631 case nir_intrinsic_image_atomic_comp_swap:
632 case nir_intrinsic_shared_atomic_comp_swap:
633 case nir_intrinsic_ssbo_atomic_comp_swap:
634 return NV50_IR_SUBOP_ATOM_CAS;
635 case nir_intrinsic_bindless_image_atomic_exchange:
636 case nir_intrinsic_global_atomic_exchange:
637 case nir_intrinsic_image_atomic_exchange:
638 case nir_intrinsic_shared_atomic_exchange:
639 case nir_intrinsic_ssbo_atomic_exchange:
640 return NV50_IR_SUBOP_ATOM_EXCH;
641 case nir_intrinsic_bindless_image_atomic_or:
642 case nir_intrinsic_global_atomic_or:
643 case nir_intrinsic_image_atomic_or:
644 case nir_intrinsic_shared_atomic_or:
645 case nir_intrinsic_ssbo_atomic_or:
646 return NV50_IR_SUBOP_ATOM_OR;
647 case nir_intrinsic_bindless_image_atomic_imax:
648 case nir_intrinsic_bindless_image_atomic_umax:
649 case nir_intrinsic_global_atomic_imax:
650 case nir_intrinsic_global_atomic_umax:
651 case nir_intrinsic_image_atomic_imax:
652 case nir_intrinsic_image_atomic_umax:
653 case nir_intrinsic_shared_atomic_imax:
654 case nir_intrinsic_shared_atomic_umax:
655 case nir_intrinsic_ssbo_atomic_imax:
656 case nir_intrinsic_ssbo_atomic_umax:
657 return NV50_IR_SUBOP_ATOM_MAX;
658 case nir_intrinsic_bindless_image_atomic_imin:
659 case nir_intrinsic_bindless_image_atomic_umin:
660 case nir_intrinsic_global_atomic_imin:
661 case nir_intrinsic_global_atomic_umin:
662 case nir_intrinsic_image_atomic_imin:
663 case nir_intrinsic_image_atomic_umin:
664 case nir_intrinsic_shared_atomic_imin:
665 case nir_intrinsic_shared_atomic_umin:
666 case nir_intrinsic_ssbo_atomic_imin:
667 case nir_intrinsic_ssbo_atomic_umin:
668 return NV50_IR_SUBOP_ATOM_MIN;
669 case nir_intrinsic_bindless_image_atomic_xor:
670 case nir_intrinsic_global_atomic_xor:
671 case nir_intrinsic_image_atomic_xor:
672 case nir_intrinsic_shared_atomic_xor:
673 case nir_intrinsic_ssbo_atomic_xor:
674 return NV50_IR_SUBOP_ATOM_XOR;
675 case nir_intrinsic_bindless_image_atomic_inc_wrap:
676 case nir_intrinsic_image_atomic_inc_wrap:
677 return NV50_IR_SUBOP_ATOM_INC;
678 case nir_intrinsic_bindless_image_atomic_dec_wrap:
679 case nir_intrinsic_image_atomic_dec_wrap:
680 return NV50_IR_SUBOP_ATOM_DEC;
681
682 case nir_intrinsic_group_memory_barrier:
683 case nir_intrinsic_memory_barrier:
684 case nir_intrinsic_memory_barrier_buffer:
685 case nir_intrinsic_memory_barrier_image:
686 return NV50_IR_SUBOP_MEMBAR(M, GL);
687 case nir_intrinsic_memory_barrier_shared:
688 return NV50_IR_SUBOP_MEMBAR(M, CTA);
689
690 case nir_intrinsic_vote_all:
691 return NV50_IR_SUBOP_VOTE_ALL;
692 case nir_intrinsic_vote_any:
693 return NV50_IR_SUBOP_VOTE_ANY;
694 case nir_intrinsic_vote_ieq:
695 return NV50_IR_SUBOP_VOTE_UNI;
696 default:
697 return 0;
698 }
699 }
700
701 CondCode
getCondCode(nir_op op)702 Converter::getCondCode(nir_op op)
703 {
704 switch (op) {
705 case nir_op_feq32:
706 case nir_op_ieq32:
707 return CC_EQ;
708 case nir_op_fge32:
709 case nir_op_ige32:
710 case nir_op_uge32:
711 return CC_GE;
712 case nir_op_flt32:
713 case nir_op_ilt32:
714 case nir_op_ult32:
715 return CC_LT;
716 case nir_op_fneu32:
717 return CC_NEU;
718 case nir_op_ine32:
719 return CC_NE;
720 default:
721 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
722 assert(false);
723 return CC_FL;
724 }
725 }
726
727 Converter::LValues&
convert(nir_alu_dest * dest)728 Converter::convert(nir_alu_dest *dest)
729 {
730 return convert(&dest->dest);
731 }
732
733 Converter::LValues&
convert(nir_dest * dest)734 Converter::convert(nir_dest *dest)
735 {
736 if (dest->is_ssa)
737 return convert(&dest->ssa);
738 if (dest->reg.indirect) {
739 ERROR("no support for indirects.");
740 assert(false);
741 }
742 return convert(dest->reg.reg);
743 }
744
745 Converter::LValues&
convert(nir_register * reg)746 Converter::convert(nir_register *reg)
747 {
748 assert(!reg->num_array_elems);
749
750 NirDefMap::iterator it = regDefs.find(reg->index);
751 if (it != regDefs.end())
752 return it->second;
753
754 LValues newDef(reg->num_components);
755 for (uint8_t i = 0; i < reg->num_components; i++)
756 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
757 return regDefs[reg->index] = newDef;
758 }
759
760 Converter::LValues&
convert(nir_ssa_def * def)761 Converter::convert(nir_ssa_def *def)
762 {
763 NirDefMap::iterator it = ssaDefs.find(def->index);
764 if (it != ssaDefs.end())
765 return it->second;
766
767 LValues newDef(def->num_components);
768 for (uint8_t i = 0; i < def->num_components; i++)
769 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
770 return ssaDefs[def->index] = newDef;
771 }
772
773 Value*
getSrc(nir_alu_src * src,uint8_t component)774 Converter::getSrc(nir_alu_src *src, uint8_t component)
775 {
776 if (src->abs || src->negate) {
777 ERROR("modifiers currently not supported on nir_alu_src\n");
778 assert(false);
779 }
780 return getSrc(&src->src, src->swizzle[component]);
781 }
782
783 Value*
getSrc(nir_register * reg,uint8_t idx)784 Converter::getSrc(nir_register *reg, uint8_t idx)
785 {
786 NirDefMap::iterator it = regDefs.find(reg->index);
787 if (it == regDefs.end())
788 return convert(reg)[idx];
789 return it->second[idx];
790 }
791
792 Value*
getSrc(nir_src * src,uint8_t idx,bool indirect)793 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
794 {
795 if (src->is_ssa)
796 return getSrc(src->ssa, idx);
797
798 if (src->reg.indirect) {
799 if (indirect)
800 return getSrc(src->reg.indirect, idx);
801 ERROR("no support for indirects.");
802 assert(false);
803 return NULL;
804 }
805
806 return getSrc(src->reg.reg, idx);
807 }
808
809 Value*
getSrc(nir_ssa_def * src,uint8_t idx)810 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
811 {
812 ImmediateMap::iterator iit = immediates.find(src->index);
813 if (iit != immediates.end())
814 return convert((*iit).second, idx);
815
816 NirDefMap::iterator it = ssaDefs.find(src->index);
817 if (it == ssaDefs.end()) {
818 ERROR("SSA value %u not found\n", src->index);
819 assert(false);
820 return NULL;
821 }
822 return it->second[idx];
823 }
824
825 uint32_t
getIndirect(nir_src * src,uint8_t idx,Value * & indirect)826 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
827 {
828 nir_const_value *offset = nir_src_as_const_value(*src);
829
830 if (offset) {
831 indirect = NULL;
832 return offset[0].u32;
833 }
834
835 indirect = getSrc(src, idx, true);
836 return 0;
837 }
838
839 uint32_t
getIndirect(nir_intrinsic_instr * insn,uint8_t s,uint8_t c,Value * & indirect,bool isScalar)840 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
841 {
842 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
843
844 if (indirect && !isScalar)
845 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
846 return idx;
847 }
848
849 static void
vert_attrib_to_tgsi_semantic(gl_vert_attrib slot,unsigned * name,unsigned * index)850 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
851 {
852 assert(name && index);
853
854 if (slot >= VERT_ATTRIB_MAX) {
855 ERROR("invalid varying slot %u\n", slot);
856 assert(false);
857 return;
858 }
859
860 if (slot >= VERT_ATTRIB_GENERIC0 &&
861 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
862 *name = TGSI_SEMANTIC_GENERIC;
863 *index = slot - VERT_ATTRIB_GENERIC0;
864 return;
865 }
866
867 if (slot >= VERT_ATTRIB_TEX0 &&
868 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
869 *name = TGSI_SEMANTIC_TEXCOORD;
870 *index = slot - VERT_ATTRIB_TEX0;
871 return;
872 }
873
874 switch (slot) {
875 case VERT_ATTRIB_COLOR0:
876 *name = TGSI_SEMANTIC_COLOR;
877 *index = 0;
878 break;
879 case VERT_ATTRIB_COLOR1:
880 *name = TGSI_SEMANTIC_COLOR;
881 *index = 1;
882 break;
883 case VERT_ATTRIB_EDGEFLAG:
884 *name = TGSI_SEMANTIC_EDGEFLAG;
885 *index = 0;
886 break;
887 case VERT_ATTRIB_FOG:
888 *name = TGSI_SEMANTIC_FOG;
889 *index = 0;
890 break;
891 case VERT_ATTRIB_NORMAL:
892 *name = TGSI_SEMANTIC_NORMAL;
893 *index = 0;
894 break;
895 case VERT_ATTRIB_POS:
896 *name = TGSI_SEMANTIC_POSITION;
897 *index = 0;
898 break;
899 case VERT_ATTRIB_POINT_SIZE:
900 *name = TGSI_SEMANTIC_PSIZE;
901 *index = 0;
902 break;
903 default:
904 ERROR("unknown vert attrib slot %u\n", slot);
905 assert(false);
906 break;
907 }
908 }
909
910 void
setInterpolate(nv50_ir_varying * var,uint8_t mode,bool centroid,unsigned semantic)911 Converter::setInterpolate(nv50_ir_varying *var,
912 uint8_t mode,
913 bool centroid,
914 unsigned semantic)
915 {
916 switch (mode) {
917 case INTERP_MODE_FLAT:
918 var->flat = 1;
919 break;
920 case INTERP_MODE_NONE:
921 if (semantic == TGSI_SEMANTIC_COLOR)
922 var->sc = 1;
923 else if (semantic == TGSI_SEMANTIC_POSITION)
924 var->linear = 1;
925 break;
926 case INTERP_MODE_NOPERSPECTIVE:
927 var->linear = 1;
928 break;
929 case INTERP_MODE_SMOOTH:
930 break;
931 }
932 var->centroid = centroid;
933 }
934
935 static uint16_t
calcSlots(const glsl_type * type,Program::Type stage,const shader_info & info,bool input,const nir_variable * var)936 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
937 bool input, const nir_variable *var)
938 {
939 if (!type->is_array())
940 return type->count_attribute_slots(false);
941
942 uint16_t slots;
943 switch (stage) {
944 case Program::TYPE_GEOMETRY:
945 slots = type->count_attribute_slots(false);
946 if (input)
947 slots /= info.gs.vertices_in;
948 break;
949 case Program::TYPE_TESSELLATION_CONTROL:
950 case Program::TYPE_TESSELLATION_EVAL:
951 // remove first dimension
952 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
953 slots = type->count_attribute_slots(false);
954 else
955 slots = type->fields.array->count_attribute_slots(false);
956 break;
957 default:
958 slots = type->count_attribute_slots(false);
959 break;
960 }
961
962 return slots;
963 }
964
965 static uint8_t
getMaskForType(const glsl_type * type,uint8_t slot)966 getMaskForType(const glsl_type *type, uint8_t slot) {
967 uint16_t comp = type->without_array()->components();
968 comp = comp ? comp : 4;
969
970 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
971 comp *= 2;
972 if (comp > 4) {
973 if (slot % 2)
974 comp -= 4;
975 else
976 comp = 4;
977 }
978 }
979
980 return (1 << comp) - 1;
981 }
982
assignSlots()983 bool Converter::assignSlots() {
984 unsigned name;
985 unsigned index;
986
987 info->io.viewportId = -1;
988 info->io.mul_zero_wins = nir->info.use_legacy_math_rules;
989 info_out->numInputs = 0;
990 info_out->numOutputs = 0;
991 info_out->numSysVals = 0;
992
993 uint8_t i;
994 BITSET_FOREACH_SET(i, nir->info.system_values_read, SYSTEM_VALUE_MAX) {
995 info_out->sv[info_out->numSysVals].sn = tgsi_get_sysval_semantic(i);
996 info_out->sv[info_out->numSysVals].si = 0;
997 info_out->sv[info_out->numSysVals].input = 0;
998
999 switch (i) {
1000 case SYSTEM_VALUE_VERTEX_ID:
1001 info_out->sv[info_out->numSysVals].input = 1;
1002 info_out->io.vertexId = info_out->numSysVals;
1003 break;
1004 case SYSTEM_VALUE_INSTANCE_ID:
1005 info_out->sv[info_out->numSysVals].input = 1;
1006 info_out->io.instanceId = info_out->numSysVals;
1007 break;
1008 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1009 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1010 info_out->sv[info_out->numSysVals].patch = 1;
1011 break;
1012 default:
1013 break;
1014 }
1015
1016 info_out->numSysVals += 1;
1017 }
1018
1019 if (prog->getType() == Program::TYPE_COMPUTE)
1020 return true;
1021
1022 nir_foreach_shader_in_variable(var, nir) {
1023 const glsl_type *type = var->type;
1024 int slot = var->data.location;
1025 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1026 uint32_t vary = var->data.driver_location;
1027 assert(vary + slots <= NV50_CODEGEN_MAX_VARYINGS);
1028
1029 switch(prog->getType()) {
1030 case Program::TYPE_FRAGMENT:
1031 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1032 &name, &index);
1033 for (uint16_t i = 0; i < slots; ++i) {
1034 setInterpolate(&info_out->in[vary + i], var->data.interpolation,
1035 var->data.centroid | var->data.sample, name);
1036 }
1037 break;
1038 case Program::TYPE_GEOMETRY:
1039 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1040 &name, &index);
1041 break;
1042 case Program::TYPE_TESSELLATION_CONTROL:
1043 case Program::TYPE_TESSELLATION_EVAL:
1044 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1045 &name, &index);
1046 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1047 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
1048 break;
1049 case Program::TYPE_VERTEX:
1050 if (slot >= VERT_ATTRIB_GENERIC0 && slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX)
1051 slot = VERT_ATTRIB_GENERIC0 + vary;
1052 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1053 switch (name) {
1054 case TGSI_SEMANTIC_EDGEFLAG:
1055 info_out->io.edgeFlagIn = vary;
1056 break;
1057 default:
1058 break;
1059 }
1060 break;
1061 default:
1062 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1063 return false;
1064 }
1065
1066 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1067 nv50_ir_varying *v = &info_out->in[vary];
1068
1069 v->patch = var->data.patch;
1070 v->sn = name;
1071 v->si = index + i;
1072 v->mask |= getMaskForType(type, i) << var->data.location_frac;
1073 }
1074 info_out->numInputs = std::max<uint8_t>(info_out->numInputs, vary);
1075 }
1076
1077 nir_foreach_shader_out_variable(var, nir) {
1078 const glsl_type *type = var->type;
1079 int slot = var->data.location;
1080 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1081 uint32_t vary = var->data.driver_location;
1082
1083 assert(vary < NV50_CODEGEN_MAX_VARYINGS);
1084
1085 switch(prog->getType()) {
1086 case Program::TYPE_FRAGMENT:
1087 tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1088 switch (name) {
1089 case TGSI_SEMANTIC_COLOR:
1090 if (!var->data.fb_fetch_output)
1091 info_out->prop.fp.numColourResults++;
1092 if (var->data.location == FRAG_RESULT_COLOR &&
1093 nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
1094 info_out->prop.fp.separateFragData = true;
1095 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1096 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1097 index = index == 0 ? var->data.index : index;
1098 break;
1099 case TGSI_SEMANTIC_POSITION:
1100 info_out->io.fragDepth = vary;
1101 info_out->prop.fp.writesDepth = true;
1102 break;
1103 case TGSI_SEMANTIC_SAMPLEMASK:
1104 info_out->io.sampleMask = vary;
1105 break;
1106 default:
1107 break;
1108 }
1109 break;
1110 case Program::TYPE_GEOMETRY:
1111 case Program::TYPE_TESSELLATION_CONTROL:
1112 case Program::TYPE_TESSELLATION_EVAL:
1113 case Program::TYPE_VERTEX:
1114 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1115 &name, &index);
1116
1117 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1118 name != TGSI_SEMANTIC_TESSOUTER)
1119 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
1120
1121 switch (name) {
1122 case TGSI_SEMANTIC_CLIPDIST:
1123 info_out->io.genUserClip = -1;
1124 break;
1125 case TGSI_SEMANTIC_CLIPVERTEX:
1126 clipVertexOutput = vary;
1127 break;
1128 case TGSI_SEMANTIC_EDGEFLAG:
1129 info_out->io.edgeFlagOut = vary;
1130 break;
1131 case TGSI_SEMANTIC_POSITION:
1132 if (clipVertexOutput < 0)
1133 clipVertexOutput = vary;
1134 break;
1135 default:
1136 break;
1137 }
1138 break;
1139 default:
1140 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1141 return false;
1142 }
1143
1144 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1145 nv50_ir_varying *v = &info_out->out[vary];
1146 v->patch = var->data.patch;
1147 v->sn = name;
1148 v->si = index + i;
1149 v->mask |= getMaskForType(type, i) << var->data.location_frac;
1150
1151 if (nir->info.outputs_read & 1ull << slot)
1152 v->oread = 1;
1153 }
1154 info_out->numOutputs = std::max<uint8_t>(info_out->numOutputs, vary);
1155 }
1156
1157 if (info_out->io.genUserClip > 0) {
1158 info_out->io.clipDistances = info_out->io.genUserClip;
1159
1160 const unsigned int nOut = (info_out->io.genUserClip + 3) / 4;
1161
1162 for (unsigned int n = 0; n < nOut; ++n) {
1163 unsigned int i = info_out->numOutputs++;
1164 info_out->out[i].id = i;
1165 info_out->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1166 info_out->out[i].si = n;
1167 info_out->out[i].mask = ((1 << info_out->io.clipDistances) - 1) >> (n * 4);
1168 }
1169 }
1170
1171 return info->assignSlots(info_out) == 0;
1172 }
1173
1174 uint32_t
getSlotAddress(nir_intrinsic_instr * insn,uint8_t idx,uint8_t slot)1175 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1176 {
1177 DataType ty;
1178 int offset = nir_intrinsic_component(insn);
1179 bool input;
1180
1181 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1182 ty = getDType(insn);
1183 else
1184 ty = getSType(insn->src[0], false, false);
1185
1186 switch (insn->intrinsic) {
1187 case nir_intrinsic_load_input:
1188 case nir_intrinsic_load_interpolated_input:
1189 case nir_intrinsic_load_per_vertex_input:
1190 input = true;
1191 break;
1192 case nir_intrinsic_load_output:
1193 case nir_intrinsic_load_per_vertex_output:
1194 case nir_intrinsic_store_output:
1195 case nir_intrinsic_store_per_vertex_output:
1196 input = false;
1197 break;
1198 default:
1199 ERROR("unknown intrinsic in getSlotAddress %s",
1200 nir_intrinsic_infos[insn->intrinsic].name);
1201 input = false;
1202 assert(false);
1203 break;
1204 }
1205
1206 if (typeSizeof(ty) == 8) {
1207 slot *= 2;
1208 slot += offset;
1209 if (slot >= 4) {
1210 idx += 1;
1211 slot -= 4;
1212 }
1213 } else {
1214 slot += offset;
1215 }
1216
1217 assert(slot < 4);
1218 assert(!input || idx < NV50_CODEGEN_MAX_VARYINGS);
1219 assert(input || idx < NV50_CODEGEN_MAX_VARYINGS);
1220
1221 const nv50_ir_varying *vary = input ? info_out->in : info_out->out;
1222 return vary[idx].slot[slot] * 4;
1223 }
1224
1225 Instruction *
loadFrom(DataFile file,uint8_t i,DataType ty,Value * def,uint32_t base,uint8_t c,Value * indirect0,Value * indirect1,bool patch)1226 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1227 uint32_t base, uint8_t c, Value *indirect0,
1228 Value *indirect1, bool patch)
1229 {
1230 unsigned int tySize = typeSizeof(ty);
1231
1232 if (tySize == 8 &&
1233 (indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) {
1234 Value *lo = getSSA();
1235 Value *hi = getSSA();
1236
1237 Instruction *loi =
1238 mkLoad(TYPE_U32, lo,
1239 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1240 indirect0);
1241 loi->setIndirect(0, 1, indirect1);
1242 loi->perPatch = patch;
1243
1244 Instruction *hii =
1245 mkLoad(TYPE_U32, hi,
1246 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1247 indirect0);
1248 hii->setIndirect(0, 1, indirect1);
1249 hii->perPatch = patch;
1250
1251 return mkOp2(OP_MERGE, ty, def, lo, hi);
1252 } else {
1253 Instruction *ld =
1254 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1255 ld->setIndirect(0, 1, indirect1);
1256 ld->perPatch = patch;
1257 return ld;
1258 }
1259 }
1260
1261 void
storeTo(nir_intrinsic_instr * insn,DataFile file,operation op,DataType ty,Value * src,uint8_t idx,uint8_t c,Value * indirect0,Value * indirect1)1262 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1263 DataType ty, Value *src, uint8_t idx, uint8_t c,
1264 Value *indirect0, Value *indirect1)
1265 {
1266 uint8_t size = typeSizeof(ty);
1267 uint32_t address = getSlotAddress(insn, idx, c);
1268
1269 if (size == 8 && indirect0) {
1270 Value *split[2];
1271 mkSplit(split, 4, src);
1272
1273 if (op == OP_EXPORT) {
1274 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1275 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1276 }
1277
1278 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1279 split[0])->perPatch = info_out->out[idx].patch;
1280 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1281 split[1])->perPatch = info_out->out[idx].patch;
1282 } else {
1283 if (op == OP_EXPORT)
1284 src = mkMov(getSSA(size), src, ty)->getDef(0);
1285 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1286 src)->perPatch = info_out->out[idx].patch;
1287 }
1288 }
1289
1290 bool
parseNIR()1291 Converter::parseNIR()
1292 {
1293 info_out->bin.tlsSpace = nir->scratch_size;
1294 info_out->io.clipDistances = nir->info.clip_distance_array_size;
1295 info_out->io.cullDistances = nir->info.cull_distance_array_size;
1296 info_out->io.layer_viewport_relative = nir->info.layer_viewport_relative;
1297
1298 switch(prog->getType()) {
1299 case Program::TYPE_COMPUTE:
1300 info->prop.cp.numThreads[0] = nir->info.workgroup_size[0];
1301 info->prop.cp.numThreads[1] = nir->info.workgroup_size[1];
1302 info->prop.cp.numThreads[2] = nir->info.workgroup_size[2];
1303 info_out->bin.smemSize = std::max(info_out->bin.smemSize, nir->info.shared_size);
1304
1305 if (info->target < NVISA_GF100_CHIPSET) {
1306 int gmemSlot = 0;
1307
1308 for (unsigned i = 0; i < nir->info.num_ssbos; i++) {
1309 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 0, .slot = i};
1310 assert(gmemSlot < 16);
1311 }
1312 nir_foreach_image_variable(var, nir) {
1313 int image_count = glsl_type_get_image_count(var->type);
1314 for (int i = 0; i < image_count; i++) {
1315 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = var->data.binding + i};
1316 assert(gmemSlot < 16);
1317 }
1318 }
1319 }
1320
1321 break;
1322 case Program::TYPE_FRAGMENT:
1323 info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1324 prog->persampleInvocation =
1325 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
1326 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1327 info_out->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1328 info_out->prop.fp.readsSampleLocations =
1329 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1330 info_out->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote;
1331 info_out->prop.fp.usesSampleMaskIn =
1332 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
1333 break;
1334 case Program::TYPE_GEOMETRY:
1335 info_out->prop.gp.instanceCount = nir->info.gs.invocations;
1336 info_out->prop.gp.maxVertices = nir->info.gs.vertices_out;
1337 info_out->prop.gp.outputPrim = nir->info.gs.output_primitive;
1338 break;
1339 case Program::TYPE_TESSELLATION_CONTROL:
1340 case Program::TYPE_TESSELLATION_EVAL:
1341 info_out->prop.tp.domain = u_tess_prim_from_shader(nir->info.tess._primitive_mode);
1342 info_out->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1343 info_out->prop.tp.outputPrim =
1344 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1345 info_out->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1346 info_out->prop.tp.winding = !nir->info.tess.ccw;
1347 break;
1348 case Program::TYPE_VERTEX:
1349 info_out->prop.vp.usesDrawParameters =
1350 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX) ||
1351 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||
1352 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1353 break;
1354 default:
1355 break;
1356 }
1357
1358 return true;
1359 }
1360
1361 bool
visit(nir_function * function)1362 Converter::visit(nir_function *function)
1363 {
1364 assert(function->impl);
1365
1366 // usually the blocks will set everything up, but main is special
1367 BasicBlock *entry = new BasicBlock(prog->main);
1368 exit = new BasicBlock(prog->main);
1369 blocks[nir_start_block(function->impl)->index] = entry;
1370 prog->main->setEntry(entry);
1371 prog->main->setExit(exit);
1372
1373 setPosition(entry, true);
1374
1375 if (info_out->io.genUserClip > 0) {
1376 for (int c = 0; c < 4; ++c)
1377 clipVtx[c] = getScratch();
1378 }
1379
1380 switch (prog->getType()) {
1381 case Program::TYPE_TESSELLATION_CONTROL:
1382 outBase = mkOp2v(
1383 OP_SUB, TYPE_U32, getSSA(),
1384 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1385 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1386 break;
1387 case Program::TYPE_FRAGMENT: {
1388 Symbol *sv = mkSysVal(SV_POSITION, 3);
1389 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1390 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1391 break;
1392 }
1393 default:
1394 break;
1395 }
1396
1397 nir_index_ssa_defs(function->impl);
1398 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1399 if (!visit(node))
1400 return false;
1401 }
1402
1403 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1404 setPosition(exit, true);
1405
1406 if ((prog->getType() == Program::TYPE_VERTEX ||
1407 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1408 && info_out->io.genUserClip > 0)
1409 handleUserClipPlanes();
1410
1411 // TODO: for non main function this needs to be a OP_RETURN
1412 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1413 return true;
1414 }
1415
1416 bool
visit(nir_cf_node * node)1417 Converter::visit(nir_cf_node *node)
1418 {
1419 switch (node->type) {
1420 case nir_cf_node_block:
1421 return visit(nir_cf_node_as_block(node));
1422 case nir_cf_node_if:
1423 return visit(nir_cf_node_as_if(node));
1424 case nir_cf_node_loop:
1425 return visit(nir_cf_node_as_loop(node));
1426 default:
1427 ERROR("unknown nir_cf_node type %u\n", node->type);
1428 return false;
1429 }
1430 }
1431
1432 bool
visit(nir_block * block)1433 Converter::visit(nir_block *block)
1434 {
1435 if (!block->predecessors->entries && block->instr_list.is_empty())
1436 return true;
1437
1438 BasicBlock *bb = convert(block);
1439
1440 setPosition(bb, true);
1441 nir_foreach_instr(insn, block) {
1442 if (!visit(insn))
1443 return false;
1444 }
1445 return true;
1446 }
1447
1448 bool
visit(nir_if * nif)1449 Converter::visit(nir_if *nif)
1450 {
1451 curIfDepth++;
1452
1453 DataType sType = getSType(nif->condition, false, false);
1454 Value *src = getSrc(&nif->condition, 0);
1455
1456 nir_block *lastThen = nir_if_last_then_block(nif);
1457 nir_block *lastElse = nir_if_last_else_block(nif);
1458
1459 BasicBlock *headBB = bb;
1460 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1461 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1462
1463 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1464 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1465
1466 bool insertJoins = lastThen->successors[0] == lastElse->successors[0];
1467 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1468
1469 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1470 if (!visit(node))
1471 return false;
1472 }
1473
1474 setPosition(convert(lastThen), true);
1475 if (!bb->isTerminated()) {
1476 BasicBlock *tailBB = convert(lastThen->successors[0]);
1477 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1478 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1479 } else {
1480 insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1481 }
1482
1483 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1484 if (!visit(node))
1485 return false;
1486 }
1487
1488 setPosition(convert(lastElse), true);
1489 if (!bb->isTerminated()) {
1490 BasicBlock *tailBB = convert(lastElse->successors[0]);
1491 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1492 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1493 } else {
1494 insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1495 }
1496
1497 if (curIfDepth > 6) {
1498 insertJoins = false;
1499 }
1500
1501 /* we made sure that all threads would converge at the same block */
1502 if (insertJoins) {
1503 BasicBlock *conv = convert(lastThen->successors[0]);
1504 setPosition(headBB->getExit(), false);
1505 headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL);
1506 setPosition(conv, false);
1507 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1508 }
1509
1510 curIfDepth--;
1511
1512 return true;
1513 }
1514
1515 // TODO: add convergency
1516 bool
visit(nir_loop * loop)1517 Converter::visit(nir_loop *loop)
1518 {
1519 curLoopDepth += 1;
1520 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1521
1522 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1523 BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1524
1525 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1526
1527 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1528 setPosition(loopBB, false);
1529 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1530
1531 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1532 if (!visit(node))
1533 return false;
1534 }
1535
1536 if (!bb->isTerminated()) {
1537 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1538 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1539 }
1540
1541 if (tailBB->cfg.incidentCount() == 0)
1542 loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1543
1544 curLoopDepth -= 1;
1545
1546 info_out->loops++;
1547
1548 return true;
1549 }
1550
1551 bool
visit(nir_instr * insn)1552 Converter::visit(nir_instr *insn)
1553 {
1554 // we need an insertion point for on the fly generated immediate loads
1555 immInsertPos = bb->getExit();
1556 switch (insn->type) {
1557 case nir_instr_type_alu:
1558 return visit(nir_instr_as_alu(insn));
1559 case nir_instr_type_intrinsic:
1560 return visit(nir_instr_as_intrinsic(insn));
1561 case nir_instr_type_jump:
1562 return visit(nir_instr_as_jump(insn));
1563 case nir_instr_type_load_const:
1564 return visit(nir_instr_as_load_const(insn));
1565 case nir_instr_type_ssa_undef:
1566 return visit(nir_instr_as_ssa_undef(insn));
1567 case nir_instr_type_tex:
1568 return visit(nir_instr_as_tex(insn));
1569 default:
1570 ERROR("unknown nir_instr type %u\n", insn->type);
1571 return false;
1572 }
1573 return true;
1574 }
1575
1576 SVSemantic
convert(nir_intrinsic_op intr)1577 Converter::convert(nir_intrinsic_op intr)
1578 {
1579 switch (intr) {
1580 case nir_intrinsic_load_base_vertex:
1581 return SV_BASEVERTEX;
1582 case nir_intrinsic_load_base_instance:
1583 return SV_BASEINSTANCE;
1584 case nir_intrinsic_load_draw_id:
1585 return SV_DRAWID;
1586 case nir_intrinsic_load_front_face:
1587 return SV_FACE;
1588 case nir_intrinsic_is_helper_invocation:
1589 case nir_intrinsic_load_helper_invocation:
1590 return SV_THREAD_KILL;
1591 case nir_intrinsic_load_instance_id:
1592 return SV_INSTANCE_ID;
1593 case nir_intrinsic_load_invocation_id:
1594 return SV_INVOCATION_ID;
1595 case nir_intrinsic_load_workgroup_size:
1596 return SV_NTID;
1597 case nir_intrinsic_load_local_invocation_id:
1598 return SV_TID;
1599 case nir_intrinsic_load_num_workgroups:
1600 return SV_NCTAID;
1601 case nir_intrinsic_load_patch_vertices_in:
1602 return SV_VERTEX_COUNT;
1603 case nir_intrinsic_load_primitive_id:
1604 return SV_PRIMITIVE_ID;
1605 case nir_intrinsic_load_sample_id:
1606 return SV_SAMPLE_INDEX;
1607 case nir_intrinsic_load_sample_mask_in:
1608 return SV_SAMPLE_MASK;
1609 case nir_intrinsic_load_sample_pos:
1610 return SV_SAMPLE_POS;
1611 case nir_intrinsic_load_subgroup_eq_mask:
1612 return SV_LANEMASK_EQ;
1613 case nir_intrinsic_load_subgroup_ge_mask:
1614 return SV_LANEMASK_GE;
1615 case nir_intrinsic_load_subgroup_gt_mask:
1616 return SV_LANEMASK_GT;
1617 case nir_intrinsic_load_subgroup_le_mask:
1618 return SV_LANEMASK_LE;
1619 case nir_intrinsic_load_subgroup_lt_mask:
1620 return SV_LANEMASK_LT;
1621 case nir_intrinsic_load_subgroup_invocation:
1622 return SV_LANEID;
1623 case nir_intrinsic_load_tess_coord:
1624 return SV_TESS_COORD;
1625 case nir_intrinsic_load_tess_level_inner:
1626 return SV_TESS_INNER;
1627 case nir_intrinsic_load_tess_level_outer:
1628 return SV_TESS_OUTER;
1629 case nir_intrinsic_load_vertex_id:
1630 return SV_VERTEX_ID;
1631 case nir_intrinsic_load_workgroup_id:
1632 return SV_CTAID;
1633 case nir_intrinsic_load_work_dim:
1634 return SV_WORK_DIM;
1635 default:
1636 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1637 nir_intrinsic_infos[intr].name);
1638 assert(false);
1639 return SV_LAST;
1640 }
1641 }
1642
1643 bool
visit(nir_intrinsic_instr * insn)1644 Converter::visit(nir_intrinsic_instr *insn)
1645 {
1646 nir_intrinsic_op op = insn->intrinsic;
1647 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1648 unsigned dest_components = nir_intrinsic_dest_components(insn);
1649
1650 switch (op) {
1651 case nir_intrinsic_load_uniform: {
1652 LValues &newDefs = convert(&insn->dest);
1653 const DataType dType = getDType(insn);
1654 Value *indirect;
1655 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1656 for (uint8_t i = 0; i < dest_components; ++i) {
1657 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1658 }
1659 break;
1660 }
1661 case nir_intrinsic_store_output:
1662 case nir_intrinsic_store_per_vertex_output: {
1663 Value *indirect;
1664 DataType dType = getSType(insn->src[0], false, false);
1665 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1666
1667 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1668 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1669 continue;
1670
1671 uint8_t offset = 0;
1672 Value *src = getSrc(&insn->src[0], i);
1673 switch (prog->getType()) {
1674 case Program::TYPE_FRAGMENT: {
1675 if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1676 // TGSI uses a different interface than NIR, TGSI stores that
1677 // value in the z component, NIR in X
1678 offset += 2;
1679 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1680 }
1681 break;
1682 }
1683 case Program::TYPE_GEOMETRY:
1684 case Program::TYPE_TESSELLATION_EVAL:
1685 case Program::TYPE_VERTEX: {
1686 if (info_out->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1687 mkMov(clipVtx[i], src);
1688 src = clipVtx[i];
1689 }
1690 break;
1691 }
1692 default:
1693 break;
1694 }
1695
1696 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1697 }
1698 break;
1699 }
1700 case nir_intrinsic_load_input:
1701 case nir_intrinsic_load_interpolated_input:
1702 case nir_intrinsic_load_output: {
1703 LValues &newDefs = convert(&insn->dest);
1704
1705 // FBFetch
1706 if (prog->getType() == Program::TYPE_FRAGMENT &&
1707 op == nir_intrinsic_load_output) {
1708 std::vector<Value*> defs, srcs;
1709 uint8_t mask = 0;
1710
1711 srcs.push_back(getSSA());
1712 srcs.push_back(getSSA());
1713 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1714 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1715 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1716 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1717
1718 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1719 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1720
1721 for (uint8_t i = 0u; i < dest_components; ++i) {
1722 defs.push_back(newDefs[i]);
1723 mask |= 1 << i;
1724 }
1725
1726 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1727 texi->tex.levelZero = true;
1728 texi->tex.mask = mask;
1729 texi->tex.useOffsets = 0;
1730 texi->tex.r = 0xffff;
1731 texi->tex.s = 0xffff;
1732
1733 info_out->prop.fp.readsFramebuffer = true;
1734 break;
1735 }
1736
1737 const DataType dType = getDType(insn);
1738 Value *indirect;
1739 bool input = op != nir_intrinsic_load_output;
1740 operation nvirOp;
1741 uint32_t mode = 0;
1742
1743 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1744 nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx];
1745
1746 // see load_barycentric_* handling
1747 if (prog->getType() == Program::TYPE_FRAGMENT) {
1748 if (op == nir_intrinsic_load_interpolated_input) {
1749 ImmediateValue immMode;
1750 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1751 mode = immMode.reg.data.u32;
1752 }
1753 if (mode == NV50_IR_INTERP_DEFAULT)
1754 mode |= translateInterpMode(&vary, nvirOp);
1755 else {
1756 if (vary.linear) {
1757 nvirOp = OP_LINTERP;
1758 mode |= NV50_IR_INTERP_LINEAR;
1759 } else {
1760 nvirOp = OP_PINTERP;
1761 mode |= NV50_IR_INTERP_PERSPECTIVE;
1762 }
1763 }
1764 }
1765
1766 for (uint8_t i = 0u; i < dest_components; ++i) {
1767 uint32_t address = getSlotAddress(insn, idx, i);
1768 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1769 if (prog->getType() == Program::TYPE_FRAGMENT) {
1770 int s = 1;
1771 if (typeSizeof(dType) == 8) {
1772 Value *lo = getSSA();
1773 Value *hi = getSSA();
1774 Instruction *interp;
1775
1776 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1777 if (nvirOp == OP_PINTERP)
1778 interp->setSrc(s++, fp.position);
1779 if (mode & NV50_IR_INTERP_OFFSET)
1780 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1781 interp->setInterpolate(mode);
1782 interp->setIndirect(0, 0, indirect);
1783
1784 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1785 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1786 if (nvirOp == OP_PINTERP)
1787 interp->setSrc(s++, fp.position);
1788 if (mode & NV50_IR_INTERP_OFFSET)
1789 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1790 interp->setInterpolate(mode);
1791 interp->setIndirect(0, 0, indirect);
1792
1793 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1794 } else {
1795 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1796 if (nvirOp == OP_PINTERP)
1797 interp->setSrc(s++, fp.position);
1798 if (mode & NV50_IR_INTERP_OFFSET)
1799 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1800 interp->setInterpolate(mode);
1801 interp->setIndirect(0, 0, indirect);
1802 }
1803 } else {
1804 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1805 }
1806 }
1807 break;
1808 }
1809 case nir_intrinsic_load_barycentric_at_offset:
1810 case nir_intrinsic_load_barycentric_at_sample:
1811 case nir_intrinsic_load_barycentric_centroid:
1812 case nir_intrinsic_load_barycentric_pixel:
1813 case nir_intrinsic_load_barycentric_sample: {
1814 LValues &newDefs = convert(&insn->dest);
1815 uint32_t mode;
1816
1817 if (op == nir_intrinsic_load_barycentric_centroid ||
1818 op == nir_intrinsic_load_barycentric_sample) {
1819 mode = NV50_IR_INTERP_CENTROID;
1820 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1821 Value *offs[2];
1822 for (uint8_t c = 0; c < 2; c++) {
1823 offs[c] = getScratch();
1824 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1825 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1826 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1827 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1828 }
1829 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1830
1831 mode = NV50_IR_INTERP_OFFSET;
1832 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1833 mode = NV50_IR_INTERP_DEFAULT;
1834 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1835 info_out->prop.fp.readsSampleLocations = true;
1836 Value *sample = getSSA();
1837 mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), getSrc(&insn->src[0], 0), mkImm(0))
1838 ->subOp = 2;
1839 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], sample)->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1840 mode = NV50_IR_INTERP_OFFSET;
1841 } else {
1842 unreachable("all intrinsics already handled above");
1843 }
1844
1845 loadImm(newDefs[1], mode);
1846 break;
1847 }
1848 case nir_intrinsic_demote:
1849 case nir_intrinsic_discard:
1850 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1851 break;
1852 case nir_intrinsic_demote_if:
1853 case nir_intrinsic_discard_if: {
1854 Value *pred = getSSA(1, FILE_PREDICATE);
1855 if (insn->num_components > 1) {
1856 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1857 assert(false);
1858 return false;
1859 }
1860 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1861 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1862 break;
1863 }
1864 case nir_intrinsic_load_base_vertex:
1865 case nir_intrinsic_load_base_instance:
1866 case nir_intrinsic_load_draw_id:
1867 case nir_intrinsic_load_front_face:
1868 case nir_intrinsic_is_helper_invocation:
1869 case nir_intrinsic_load_helper_invocation:
1870 case nir_intrinsic_load_instance_id:
1871 case nir_intrinsic_load_invocation_id:
1872 case nir_intrinsic_load_workgroup_size:
1873 case nir_intrinsic_load_local_invocation_id:
1874 case nir_intrinsic_load_num_workgroups:
1875 case nir_intrinsic_load_patch_vertices_in:
1876 case nir_intrinsic_load_primitive_id:
1877 case nir_intrinsic_load_sample_id:
1878 case nir_intrinsic_load_sample_mask_in:
1879 case nir_intrinsic_load_sample_pos:
1880 case nir_intrinsic_load_subgroup_eq_mask:
1881 case nir_intrinsic_load_subgroup_ge_mask:
1882 case nir_intrinsic_load_subgroup_gt_mask:
1883 case nir_intrinsic_load_subgroup_le_mask:
1884 case nir_intrinsic_load_subgroup_lt_mask:
1885 case nir_intrinsic_load_subgroup_invocation:
1886 case nir_intrinsic_load_tess_coord:
1887 case nir_intrinsic_load_tess_level_inner:
1888 case nir_intrinsic_load_tess_level_outer:
1889 case nir_intrinsic_load_vertex_id:
1890 case nir_intrinsic_load_workgroup_id:
1891 case nir_intrinsic_load_work_dim: {
1892 const DataType dType = getDType(insn);
1893 SVSemantic sv = convert(op);
1894 LValues &newDefs = convert(&insn->dest);
1895
1896 for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
1897 Value *def;
1898 if (typeSizeof(dType) == 8)
1899 def = getSSA();
1900 else
1901 def = newDefs[i];
1902
1903 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1904 loadImm(def, 0u);
1905 } else {
1906 Symbol *sym = mkSysVal(sv, i);
1907 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1908 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1909 rdsv->perPatch = 1;
1910 }
1911
1912 if (typeSizeof(dType) == 8)
1913 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1914 }
1915 break;
1916 }
1917 // constants
1918 case nir_intrinsic_load_subgroup_size: {
1919 LValues &newDefs = convert(&insn->dest);
1920 loadImm(newDefs[0], 32u);
1921 break;
1922 }
1923 case nir_intrinsic_vote_all:
1924 case nir_intrinsic_vote_any:
1925 case nir_intrinsic_vote_ieq: {
1926 LValues &newDefs = convert(&insn->dest);
1927 Value *pred = getScratch(1, FILE_PREDICATE);
1928 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1929 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1930 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1931 break;
1932 }
1933 case nir_intrinsic_ballot: {
1934 LValues &newDefs = convert(&insn->dest);
1935 Value *pred = getSSA(1, FILE_PREDICATE);
1936 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1937 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1938 break;
1939 }
1940 case nir_intrinsic_read_first_invocation:
1941 case nir_intrinsic_read_invocation: {
1942 LValues &newDefs = convert(&insn->dest);
1943 const DataType dType = getDType(insn);
1944 Value *tmp = getScratch();
1945
1946 if (op == nir_intrinsic_read_first_invocation) {
1947 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1948 mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
1949 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1950 } else
1951 tmp = getSrc(&insn->src[1], 0);
1952
1953 for (uint8_t i = 0; i < dest_components; ++i) {
1954 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1955 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1956 }
1957 break;
1958 }
1959 case nir_intrinsic_load_per_vertex_input: {
1960 const DataType dType = getDType(insn);
1961 LValues &newDefs = convert(&insn->dest);
1962 Value *indirectVertex;
1963 Value *indirectOffset;
1964 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1965 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1966
1967 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1968 mkImm(baseVertex), indirectVertex);
1969 for (uint8_t i = 0u; i < dest_components; ++i) {
1970 uint32_t address = getSlotAddress(insn, idx, i);
1971 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
1972 indirectOffset, vtxBase, info_out->in[idx].patch);
1973 }
1974 break;
1975 }
1976 case nir_intrinsic_load_per_vertex_output: {
1977 const DataType dType = getDType(insn);
1978 LValues &newDefs = convert(&insn->dest);
1979 Value *indirectVertex;
1980 Value *indirectOffset;
1981 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1982 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1983 Value *vtxBase = NULL;
1984
1985 if (indirectVertex)
1986 vtxBase = indirectVertex;
1987 else
1988 vtxBase = loadImm(NULL, baseVertex);
1989
1990 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
1991
1992 for (uint8_t i = 0u; i < dest_components; ++i) {
1993 uint32_t address = getSlotAddress(insn, idx, i);
1994 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
1995 indirectOffset, vtxBase, info_out->in[idx].patch);
1996 }
1997 break;
1998 }
1999 case nir_intrinsic_emit_vertex: {
2000 if (info_out->io.genUserClip > 0)
2001 handleUserClipPlanes();
2002 uint32_t idx = nir_intrinsic_stream_id(insn);
2003 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2004 break;
2005 }
2006 case nir_intrinsic_end_primitive: {
2007 uint32_t idx = nir_intrinsic_stream_id(insn);
2008 if (idx)
2009 break;
2010 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2011 break;
2012 }
2013 case nir_intrinsic_load_ubo: {
2014 const DataType dType = getDType(insn);
2015 LValues &newDefs = convert(&insn->dest);
2016 Value *indirectIndex;
2017 Value *indirectOffset;
2018 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2019 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2020 if (indirectOffset)
2021 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2022
2023 for (uint8_t i = 0u; i < dest_components; ++i) {
2024 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2025 indirectOffset, indirectIndex);
2026 }
2027 break;
2028 }
2029 case nir_intrinsic_get_ssbo_size: {
2030 LValues &newDefs = convert(&insn->dest);
2031 const DataType dType = getDType(insn);
2032 Value *indirectBuffer;
2033 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2034
2035 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2036 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2037 break;
2038 }
2039 case nir_intrinsic_store_ssbo: {
2040 DataType sType = getSType(insn->src[0], false, false);
2041 Value *indirectBuffer;
2042 Value *indirectOffset;
2043 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2044 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2045
2046 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2047 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2048 continue;
2049 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2050 offset + i * typeSizeof(sType));
2051 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2052 ->setIndirect(0, 1, indirectBuffer);
2053 }
2054 info_out->io.globalAccess |= 0x2;
2055 break;
2056 }
2057 case nir_intrinsic_load_ssbo: {
2058 const DataType dType = getDType(insn);
2059 LValues &newDefs = convert(&insn->dest);
2060 Value *indirectBuffer;
2061 Value *indirectOffset;
2062 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2063 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2064
2065 for (uint8_t i = 0u; i < dest_components; ++i)
2066 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2067 indirectOffset, indirectBuffer);
2068
2069 info_out->io.globalAccess |= 0x1;
2070 break;
2071 }
2072 case nir_intrinsic_shared_atomic_add:
2073 case nir_intrinsic_shared_atomic_fadd:
2074 case nir_intrinsic_shared_atomic_and:
2075 case nir_intrinsic_shared_atomic_comp_swap:
2076 case nir_intrinsic_shared_atomic_exchange:
2077 case nir_intrinsic_shared_atomic_or:
2078 case nir_intrinsic_shared_atomic_imax:
2079 case nir_intrinsic_shared_atomic_imin:
2080 case nir_intrinsic_shared_atomic_umax:
2081 case nir_intrinsic_shared_atomic_umin:
2082 case nir_intrinsic_shared_atomic_xor: {
2083 const DataType dType = getDType(insn);
2084 LValues &newDefs = convert(&insn->dest);
2085 Value *indirectOffset;
2086 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2087 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2088 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2089 if (op == nir_intrinsic_shared_atomic_comp_swap)
2090 atom->setSrc(2, getSrc(&insn->src[2], 0));
2091 atom->setIndirect(0, 0, indirectOffset);
2092 atom->subOp = getSubOp(op);
2093 break;
2094 }
2095 case nir_intrinsic_ssbo_atomic_add:
2096 case nir_intrinsic_ssbo_atomic_fadd:
2097 case nir_intrinsic_ssbo_atomic_and:
2098 case nir_intrinsic_ssbo_atomic_comp_swap:
2099 case nir_intrinsic_ssbo_atomic_exchange:
2100 case nir_intrinsic_ssbo_atomic_or:
2101 case nir_intrinsic_ssbo_atomic_imax:
2102 case nir_intrinsic_ssbo_atomic_imin:
2103 case nir_intrinsic_ssbo_atomic_umax:
2104 case nir_intrinsic_ssbo_atomic_umin:
2105 case nir_intrinsic_ssbo_atomic_xor: {
2106 const DataType dType = getDType(insn);
2107 LValues &newDefs = convert(&insn->dest);
2108 Value *indirectBuffer;
2109 Value *indirectOffset;
2110 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2111 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2112
2113 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2114 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2115 getSrc(&insn->src[2], 0));
2116 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2117 atom->setSrc(2, getSrc(&insn->src[3], 0));
2118 atom->setIndirect(0, 0, indirectOffset);
2119 atom->setIndirect(0, 1, indirectBuffer);
2120 atom->subOp = getSubOp(op);
2121
2122 info_out->io.globalAccess |= 0x2;
2123 break;
2124 }
2125 case nir_intrinsic_global_atomic_add:
2126 case nir_intrinsic_global_atomic_fadd:
2127 case nir_intrinsic_global_atomic_and:
2128 case nir_intrinsic_global_atomic_comp_swap:
2129 case nir_intrinsic_global_atomic_exchange:
2130 case nir_intrinsic_global_atomic_or:
2131 case nir_intrinsic_global_atomic_imax:
2132 case nir_intrinsic_global_atomic_imin:
2133 case nir_intrinsic_global_atomic_umax:
2134 case nir_intrinsic_global_atomic_umin:
2135 case nir_intrinsic_global_atomic_xor: {
2136 const DataType dType = getDType(insn);
2137 LValues &newDefs = convert(&insn->dest);
2138 Value *address;
2139 uint32_t offset = getIndirect(&insn->src[0], 0, address);
2140
2141 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2142 Instruction *atom =
2143 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2144 if (op == nir_intrinsic_global_atomic_comp_swap)
2145 atom->setSrc(2, getSrc(&insn->src[2], 0));
2146 atom->setIndirect(0, 0, address);
2147 atom->subOp = getSubOp(op);
2148
2149 info_out->io.globalAccess |= 0x2;
2150 break;
2151 }
2152 case nir_intrinsic_bindless_image_atomic_add:
2153 case nir_intrinsic_bindless_image_atomic_fadd:
2154 case nir_intrinsic_bindless_image_atomic_and:
2155 case nir_intrinsic_bindless_image_atomic_comp_swap:
2156 case nir_intrinsic_bindless_image_atomic_exchange:
2157 case nir_intrinsic_bindless_image_atomic_imax:
2158 case nir_intrinsic_bindless_image_atomic_umax:
2159 case nir_intrinsic_bindless_image_atomic_imin:
2160 case nir_intrinsic_bindless_image_atomic_umin:
2161 case nir_intrinsic_bindless_image_atomic_or:
2162 case nir_intrinsic_bindless_image_atomic_xor:
2163 case nir_intrinsic_bindless_image_atomic_inc_wrap:
2164 case nir_intrinsic_bindless_image_atomic_dec_wrap:
2165 case nir_intrinsic_bindless_image_load:
2166 case nir_intrinsic_bindless_image_samples:
2167 case nir_intrinsic_bindless_image_size:
2168 case nir_intrinsic_bindless_image_store:
2169 case nir_intrinsic_image_atomic_add:
2170 case nir_intrinsic_image_atomic_fadd:
2171 case nir_intrinsic_image_atomic_and:
2172 case nir_intrinsic_image_atomic_comp_swap:
2173 case nir_intrinsic_image_atomic_exchange:
2174 case nir_intrinsic_image_atomic_imax:
2175 case nir_intrinsic_image_atomic_umax:
2176 case nir_intrinsic_image_atomic_imin:
2177 case nir_intrinsic_image_atomic_umin:
2178 case nir_intrinsic_image_atomic_or:
2179 case nir_intrinsic_image_atomic_xor:
2180 case nir_intrinsic_image_atomic_inc_wrap:
2181 case nir_intrinsic_image_atomic_dec_wrap:
2182 case nir_intrinsic_image_load:
2183 case nir_intrinsic_image_samples:
2184 case nir_intrinsic_image_size:
2185 case nir_intrinsic_image_store: {
2186 std::vector<Value*> srcs, defs;
2187 Value *indirect;
2188 DataType ty;
2189
2190 uint32_t mask = 0;
2191 TexInstruction::Target target =
2192 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2193 unsigned int argCount = getNIRArgCount(target);
2194 uint16_t location = 0;
2195
2196 if (opInfo.has_dest) {
2197 LValues &newDefs = convert(&insn->dest);
2198 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2199 defs.push_back(newDefs[i]);
2200 mask |= 1 << i;
2201 }
2202 }
2203
2204 int lod_src = -1;
2205 bool bindless = false;
2206 switch (op) {
2207 case nir_intrinsic_bindless_image_atomic_add:
2208 case nir_intrinsic_bindless_image_atomic_fadd:
2209 case nir_intrinsic_bindless_image_atomic_and:
2210 case nir_intrinsic_bindless_image_atomic_comp_swap:
2211 case nir_intrinsic_bindless_image_atomic_exchange:
2212 case nir_intrinsic_bindless_image_atomic_imax:
2213 case nir_intrinsic_bindless_image_atomic_umax:
2214 case nir_intrinsic_bindless_image_atomic_imin:
2215 case nir_intrinsic_bindless_image_atomic_umin:
2216 case nir_intrinsic_bindless_image_atomic_or:
2217 case nir_intrinsic_bindless_image_atomic_xor:
2218 case nir_intrinsic_bindless_image_atomic_inc_wrap:
2219 case nir_intrinsic_bindless_image_atomic_dec_wrap:
2220 ty = getDType(insn);
2221 bindless = true;
2222 info_out->io.globalAccess |= 0x2;
2223 mask = 0x1;
2224 break;
2225 case nir_intrinsic_image_atomic_add:
2226 case nir_intrinsic_image_atomic_fadd:
2227 case nir_intrinsic_image_atomic_and:
2228 case nir_intrinsic_image_atomic_comp_swap:
2229 case nir_intrinsic_image_atomic_exchange:
2230 case nir_intrinsic_image_atomic_imax:
2231 case nir_intrinsic_image_atomic_umax:
2232 case nir_intrinsic_image_atomic_imin:
2233 case nir_intrinsic_image_atomic_umin:
2234 case nir_intrinsic_image_atomic_or:
2235 case nir_intrinsic_image_atomic_xor:
2236 case nir_intrinsic_image_atomic_inc_wrap:
2237 case nir_intrinsic_image_atomic_dec_wrap:
2238 ty = getDType(insn);
2239 bindless = false;
2240 info_out->io.globalAccess |= 0x2;
2241 mask = 0x1;
2242 break;
2243 case nir_intrinsic_bindless_image_load:
2244 case nir_intrinsic_image_load:
2245 ty = TYPE_U32;
2246 bindless = op == nir_intrinsic_bindless_image_load;
2247 info_out->io.globalAccess |= 0x1;
2248 lod_src = 4;
2249 break;
2250 case nir_intrinsic_bindless_image_store:
2251 case nir_intrinsic_image_store:
2252 ty = TYPE_U32;
2253 bindless = op == nir_intrinsic_bindless_image_store;
2254 info_out->io.globalAccess |= 0x2;
2255 lod_src = 5;
2256 mask = 0xf;
2257 break;
2258 case nir_intrinsic_bindless_image_samples:
2259 mask = 0x8;
2260 FALLTHROUGH;
2261 case nir_intrinsic_image_samples:
2262 ty = TYPE_U32;
2263 bindless = op == nir_intrinsic_bindless_image_samples;
2264 mask = 0x8;
2265 break;
2266 case nir_intrinsic_bindless_image_size:
2267 case nir_intrinsic_image_size:
2268 assert(nir_src_as_uint(insn->src[1]) == 0);
2269 ty = TYPE_U32;
2270 bindless = op == nir_intrinsic_bindless_image_size;
2271 break;
2272 default:
2273 unreachable("unhandled image opcode");
2274 break;
2275 }
2276
2277 if (bindless)
2278 indirect = getSrc(&insn->src[0], 0);
2279 else
2280 location = getIndirect(&insn->src[0], 0, indirect);
2281
2282 /* Pre-GF100, SSBOs and images are in the same HW file, managed by
2283 * prop.cp.gmem. images are located after SSBOs.
2284 */
2285 if (info->target < NVISA_GF100_CHIPSET)
2286 location += nir->info.num_ssbos;
2287
2288 // coords
2289 if (opInfo.num_srcs >= 2)
2290 for (unsigned int i = 0u; i < argCount; ++i)
2291 srcs.push_back(getSrc(&insn->src[1], i));
2292
2293 // the sampler is just another src added after coords
2294 if (opInfo.num_srcs >= 3 && target.isMS())
2295 srcs.push_back(getSrc(&insn->src[2], 0));
2296
2297 if (opInfo.num_srcs >= 4 && lod_src != 4) {
2298 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2299 for (uint8_t i = 0u; i < components; ++i)
2300 srcs.push_back(getSrc(&insn->src[3], i));
2301 }
2302
2303 if (opInfo.num_srcs >= 5 && lod_src != 5)
2304 // 1 for aotmic swap
2305 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2306 srcs.push_back(getSrc(&insn->src[4], i));
2307
2308 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2309 texi->tex.bindless = bindless;
2310 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2311 texi->tex.mask = mask;
2312 texi->cache = convert(nir_intrinsic_access(insn));
2313 texi->setType(ty);
2314 texi->subOp = getSubOp(op);
2315
2316 if (indirect)
2317 texi->setIndirectR(indirect);
2318
2319 break;
2320 }
2321 case nir_intrinsic_store_scratch:
2322 case nir_intrinsic_store_shared: {
2323 DataType sType = getSType(insn->src[0], false, false);
2324 Value *indirectOffset;
2325 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2326 if (indirectOffset)
2327 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2328
2329 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2330 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2331 continue;
2332 Symbol *sym = mkSymbol(getFile(op), 0, sType, offset + i * typeSizeof(sType));
2333 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2334 }
2335 break;
2336 }
2337 case nir_intrinsic_load_kernel_input:
2338 case nir_intrinsic_load_scratch:
2339 case nir_intrinsic_load_shared: {
2340 const DataType dType = getDType(insn);
2341 LValues &newDefs = convert(&insn->dest);
2342 Value *indirectOffset;
2343 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2344 if (indirectOffset)
2345 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2346
2347 for (uint8_t i = 0u; i < dest_components; ++i)
2348 loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset);
2349
2350 break;
2351 }
2352 case nir_intrinsic_control_barrier: {
2353 // TODO: add flag to shader_info
2354 info_out->numBarriers = 1;
2355 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2356 bar->fixed = 1;
2357 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2358 break;
2359 }
2360 case nir_intrinsic_group_memory_barrier:
2361 case nir_intrinsic_memory_barrier:
2362 case nir_intrinsic_memory_barrier_buffer:
2363 case nir_intrinsic_memory_barrier_image:
2364 case nir_intrinsic_memory_barrier_shared: {
2365 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2366 bar->fixed = 1;
2367 bar->subOp = getSubOp(op);
2368 break;
2369 }
2370 case nir_intrinsic_memory_barrier_tcs_patch:
2371 break;
2372 case nir_intrinsic_shader_clock: {
2373 const DataType dType = getDType(insn);
2374 LValues &newDefs = convert(&insn->dest);
2375
2376 loadImm(newDefs[0], 0u);
2377 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2378 break;
2379 }
2380 case nir_intrinsic_load_global:
2381 case nir_intrinsic_load_global_constant: {
2382 const DataType dType = getDType(insn);
2383 LValues &newDefs = convert(&insn->dest);
2384 Value *indirectOffset;
2385 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2386
2387 for (auto i = 0u; i < dest_components; ++i)
2388 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2389
2390 info_out->io.globalAccess |= 0x1;
2391 break;
2392 }
2393 case nir_intrinsic_store_global: {
2394 DataType sType = getSType(insn->src[0], false, false);
2395
2396 for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2397 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2398 continue;
2399 if (typeSizeof(sType) == 8) {
2400 Value *split[2];
2401 mkSplit(split, 4, getSrc(&insn->src[0], i));
2402
2403 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2404 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2405
2406 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2407 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2408 } else {
2409 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2410 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2411 }
2412 }
2413
2414 info_out->io.globalAccess |= 0x2;
2415 break;
2416 }
2417 default:
2418 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2419 return false;
2420 }
2421
2422 return true;
2423 }
2424
2425 bool
visit(nir_jump_instr * insn)2426 Converter::visit(nir_jump_instr *insn)
2427 {
2428 switch (insn->type) {
2429 case nir_jump_return:
2430 // TODO: this only works in the main function
2431 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2432 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2433 break;
2434 case nir_jump_break:
2435 case nir_jump_continue: {
2436 bool isBreak = insn->type == nir_jump_break;
2437 nir_block *block = insn->instr.block;
2438 BasicBlock *target = convert(block->successors[0]);
2439 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2440 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2441 break;
2442 }
2443 default:
2444 ERROR("unknown nir_jump_type %u\n", insn->type);
2445 return false;
2446 }
2447
2448 return true;
2449 }
2450
2451 Value*
convert(nir_load_const_instr * insn,uint8_t idx)2452 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2453 {
2454 Value *val;
2455
2456 if (immInsertPos)
2457 setPosition(immInsertPos, true);
2458 else
2459 setPosition(bb, false);
2460
2461 switch (insn->def.bit_size) {
2462 case 64:
2463 val = loadImm(getSSA(8), insn->value[idx].u64);
2464 break;
2465 case 32:
2466 val = loadImm(getSSA(4), insn->value[idx].u32);
2467 break;
2468 case 16:
2469 val = loadImm(getSSA(2), insn->value[idx].u16);
2470 break;
2471 case 8:
2472 val = loadImm(getSSA(1), insn->value[idx].u8);
2473 break;
2474 default:
2475 unreachable("unhandled bit size!\n");
2476 }
2477 setPosition(bb, true);
2478 return val;
2479 }
2480
2481 bool
visit(nir_load_const_instr * insn)2482 Converter::visit(nir_load_const_instr *insn)
2483 {
2484 assert(insn->def.bit_size <= 64);
2485 immediates[insn->def.index] = insn;
2486 return true;
2487 }
2488
2489 #define DEFAULT_CHECKS \
2490 if (insn->dest.dest.ssa.num_components > 1) { \
2491 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2492 return false; \
2493 } \
2494 if (insn->dest.write_mask != 1) { \
2495 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2496 return false; \
2497 }
2498 bool
visit(nir_alu_instr * insn)2499 Converter::visit(nir_alu_instr *insn)
2500 {
2501 const nir_op op = insn->op;
2502 const nir_op_info &info = nir_op_infos[op];
2503 DataType dType = getDType(insn);
2504 const std::vector<DataType> sTypes = getSTypes(insn);
2505
2506 Instruction *oldPos = this->bb->getExit();
2507
2508 switch (op) {
2509 case nir_op_fabs:
2510 case nir_op_iabs:
2511 case nir_op_fadd:
2512 case nir_op_iadd:
2513 case nir_op_iand:
2514 case nir_op_fceil:
2515 case nir_op_fcos:
2516 case nir_op_fddx:
2517 case nir_op_fddx_coarse:
2518 case nir_op_fddx_fine:
2519 case nir_op_fddy:
2520 case nir_op_fddy_coarse:
2521 case nir_op_fddy_fine:
2522 case nir_op_fdiv:
2523 case nir_op_idiv:
2524 case nir_op_udiv:
2525 case nir_op_fexp2:
2526 case nir_op_ffloor:
2527 case nir_op_ffma:
2528 case nir_op_flog2:
2529 case nir_op_fmax:
2530 case nir_op_imax:
2531 case nir_op_umax:
2532 case nir_op_fmin:
2533 case nir_op_imin:
2534 case nir_op_umin:
2535 case nir_op_fmod:
2536 case nir_op_imod:
2537 case nir_op_umod:
2538 case nir_op_fmul:
2539 case nir_op_imul:
2540 case nir_op_imul_high:
2541 case nir_op_umul_high:
2542 case nir_op_fneg:
2543 case nir_op_ineg:
2544 case nir_op_inot:
2545 case nir_op_ior:
2546 case nir_op_pack_64_2x32_split:
2547 case nir_op_fpow:
2548 case nir_op_frcp:
2549 case nir_op_frem:
2550 case nir_op_irem:
2551 case nir_op_frsq:
2552 case nir_op_fsat:
2553 case nir_op_ishr:
2554 case nir_op_ushr:
2555 case nir_op_fsin:
2556 case nir_op_fsqrt:
2557 case nir_op_ftrunc:
2558 case nir_op_ishl:
2559 case nir_op_ixor: {
2560 DEFAULT_CHECKS;
2561 LValues &newDefs = convert(&insn->dest);
2562 operation preOp = preOperationNeeded(op);
2563 if (preOp != OP_NOP) {
2564 assert(info.num_inputs < 2);
2565 Value *tmp = getSSA(typeSizeof(dType));
2566 Instruction *i0 = mkOp(preOp, dType, tmp);
2567 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2568 if (info.num_inputs) {
2569 i0->setSrc(0, getSrc(&insn->src[0]));
2570 i1->setSrc(0, tmp);
2571 }
2572 i1->subOp = getSubOp(op);
2573 } else {
2574 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2575 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2576 i->setSrc(s, getSrc(&insn->src[s]));
2577
2578 if (this->info->io.mul_zero_wins) {
2579 switch (op) {
2580 case nir_op_fmul:
2581 case nir_op_ffma:
2582 i->dnz = true;
2583 break;
2584 default:
2585 break;
2586 }
2587 }
2588 }
2589 i->subOp = getSubOp(op);
2590 }
2591 break;
2592 }
2593 case nir_op_ifind_msb:
2594 case nir_op_ufind_msb: {
2595 DEFAULT_CHECKS;
2596 LValues &newDefs = convert(&insn->dest);
2597 dType = sTypes[0];
2598 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2599 break;
2600 }
2601 case nir_op_fround_even: {
2602 DEFAULT_CHECKS;
2603 LValues &newDefs = convert(&insn->dest);
2604 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2605 break;
2606 }
2607 // convert instructions
2608 case nir_op_f2f32:
2609 case nir_op_f2i32:
2610 case nir_op_f2u32:
2611 case nir_op_i2f32:
2612 case nir_op_i2i32:
2613 case nir_op_u2f32:
2614 case nir_op_u2u32:
2615 case nir_op_f2f64:
2616 case nir_op_f2i64:
2617 case nir_op_f2u64:
2618 case nir_op_i2f64:
2619 case nir_op_i2i64:
2620 case nir_op_u2f64:
2621 case nir_op_u2u64: {
2622 DEFAULT_CHECKS;
2623 LValues &newDefs = convert(&insn->dest);
2624 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2625 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2626 i->rnd = ROUND_Z;
2627 i->sType = sTypes[0];
2628 break;
2629 }
2630 // compare instructions
2631 case nir_op_feq32:
2632 case nir_op_ieq32:
2633 case nir_op_fge32:
2634 case nir_op_ige32:
2635 case nir_op_uge32:
2636 case nir_op_flt32:
2637 case nir_op_ilt32:
2638 case nir_op_ult32:
2639 case nir_op_fneu32:
2640 case nir_op_ine32: {
2641 DEFAULT_CHECKS;
2642 LValues &newDefs = convert(&insn->dest);
2643 Instruction *i = mkCmp(getOperation(op),
2644 getCondCode(op),
2645 dType,
2646 newDefs[0],
2647 dType,
2648 getSrc(&insn->src[0]),
2649 getSrc(&insn->src[1]));
2650 if (info.num_inputs == 3)
2651 i->setSrc(2, getSrc(&insn->src[2]));
2652 i->sType = sTypes[0];
2653 break;
2654 }
2655 case nir_op_mov:
2656 case nir_op_vec2:
2657 case nir_op_vec3:
2658 case nir_op_vec4:
2659 case nir_op_vec8:
2660 case nir_op_vec16: {
2661 LValues &newDefs = convert(&insn->dest);
2662 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2663 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2664 }
2665 break;
2666 }
2667 // (un)pack
2668 case nir_op_pack_64_2x32: {
2669 LValues &newDefs = convert(&insn->dest);
2670 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2671 merge->setSrc(0, getSrc(&insn->src[0], 0));
2672 merge->setSrc(1, getSrc(&insn->src[0], 1));
2673 break;
2674 }
2675 case nir_op_pack_half_2x16_split: {
2676 LValues &newDefs = convert(&insn->dest);
2677 Value *tmpH = getSSA();
2678 Value *tmpL = getSSA();
2679
2680 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2681 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2682 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2683 break;
2684 }
2685 case nir_op_unpack_half_2x16_split_x:
2686 case nir_op_unpack_half_2x16_split_y: {
2687 LValues &newDefs = convert(&insn->dest);
2688 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2689 if (op == nir_op_unpack_half_2x16_split_y)
2690 cvt->subOp = 1;
2691 break;
2692 }
2693 case nir_op_unpack_64_2x32: {
2694 LValues &newDefs = convert(&insn->dest);
2695 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2696 break;
2697 }
2698 case nir_op_unpack_64_2x32_split_x: {
2699 LValues &newDefs = convert(&insn->dest);
2700 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2701 break;
2702 }
2703 case nir_op_unpack_64_2x32_split_y: {
2704 LValues &newDefs = convert(&insn->dest);
2705 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2706 break;
2707 }
2708 // special instructions
2709 case nir_op_fsign:
2710 case nir_op_isign: {
2711 DEFAULT_CHECKS;
2712 DataType iType;
2713 if (::isFloatType(dType))
2714 iType = TYPE_F32;
2715 else
2716 iType = TYPE_S32;
2717
2718 LValues &newDefs = convert(&insn->dest);
2719 LValue *val0 = getScratch();
2720 LValue *val1 = getScratch();
2721 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2722 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2723
2724 if (dType == TYPE_F64) {
2725 mkOp2(OP_SUB, iType, val0, val0, val1);
2726 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2727 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2728 mkOp2(OP_SUB, iType, val0, val1, val0);
2729 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2730 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2731 } else if (::isFloatType(dType))
2732 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2733 else
2734 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2735 break;
2736 }
2737 case nir_op_fcsel:
2738 case nir_op_b32csel: {
2739 DEFAULT_CHECKS;
2740 LValues &newDefs = convert(&insn->dest);
2741 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2742 break;
2743 }
2744 case nir_op_ibitfield_extract:
2745 case nir_op_ubitfield_extract: {
2746 DEFAULT_CHECKS;
2747 Value *tmp = getSSA();
2748 LValues &newDefs = convert(&insn->dest);
2749 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2750 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2751 break;
2752 }
2753 case nir_op_bfm: {
2754 DEFAULT_CHECKS;
2755 LValues &newDefs = convert(&insn->dest);
2756 mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
2757 break;
2758 }
2759 case nir_op_bitfield_insert: {
2760 DEFAULT_CHECKS;
2761 LValues &newDefs = convert(&insn->dest);
2762 LValue *temp = getSSA();
2763 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2764 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2765 break;
2766 }
2767 case nir_op_bit_count: {
2768 DEFAULT_CHECKS;
2769 LValues &newDefs = convert(&insn->dest);
2770 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2771 break;
2772 }
2773 case nir_op_bitfield_reverse: {
2774 DEFAULT_CHECKS;
2775 LValues &newDefs = convert(&insn->dest);
2776 mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
2777 break;
2778 }
2779 case nir_op_find_lsb: {
2780 DEFAULT_CHECKS;
2781 LValues &newDefs = convert(&insn->dest);
2782 Value *tmp = getSSA();
2783 mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
2784 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2785 break;
2786 }
2787 case nir_op_extract_u8: {
2788 DEFAULT_CHECKS;
2789 LValues &newDefs = convert(&insn->dest);
2790 Value *prmt = getSSA();
2791 mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
2792 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2793 break;
2794 }
2795 case nir_op_extract_i8: {
2796 DEFAULT_CHECKS;
2797 LValues &newDefs = convert(&insn->dest);
2798 Value *prmt = getSSA();
2799 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
2800 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2801 break;
2802 }
2803 case nir_op_extract_u16: {
2804 DEFAULT_CHECKS;
2805 LValues &newDefs = convert(&insn->dest);
2806 Value *prmt = getSSA();
2807 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
2808 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2809 break;
2810 }
2811 case nir_op_extract_i16: {
2812 DEFAULT_CHECKS;
2813 LValues &newDefs = convert(&insn->dest);
2814 Value *prmt = getSSA();
2815 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
2816 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2817 break;
2818 }
2819 case nir_op_urol: {
2820 DEFAULT_CHECKS;
2821 LValues &newDefs = convert(&insn->dest);
2822 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2823 getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2824 ->subOp = NV50_IR_SUBOP_SHF_L |
2825 NV50_IR_SUBOP_SHF_W |
2826 NV50_IR_SUBOP_SHF_HI;
2827 break;
2828 }
2829 case nir_op_uror: {
2830 DEFAULT_CHECKS;
2831 LValues &newDefs = convert(&insn->dest);
2832 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2833 getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2834 ->subOp = NV50_IR_SUBOP_SHF_R |
2835 NV50_IR_SUBOP_SHF_W |
2836 NV50_IR_SUBOP_SHF_LO;
2837 break;
2838 }
2839 // boolean conversions
2840 case nir_op_b2f32: {
2841 DEFAULT_CHECKS;
2842 LValues &newDefs = convert(&insn->dest);
2843 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2844 break;
2845 }
2846 case nir_op_b2f64: {
2847 DEFAULT_CHECKS;
2848 LValues &newDefs = convert(&insn->dest);
2849 Value *tmp = getSSA(4);
2850 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2851 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2852 break;
2853 }
2854 case nir_op_f2b32:
2855 case nir_op_i2b32: {
2856 DEFAULT_CHECKS;
2857 LValues &newDefs = convert(&insn->dest);
2858 Value *src1;
2859 if (typeSizeof(sTypes[0]) == 8) {
2860 src1 = loadImm(getSSA(8), 0.0);
2861 } else {
2862 src1 = zero;
2863 }
2864 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2865 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2866 break;
2867 }
2868 case nir_op_b2i32: {
2869 DEFAULT_CHECKS;
2870 LValues &newDefs = convert(&insn->dest);
2871 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2872 break;
2873 }
2874 case nir_op_b2i64: {
2875 DEFAULT_CHECKS;
2876 LValues &newDefs = convert(&insn->dest);
2877 LValue *def = getScratch();
2878 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2879 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2880 break;
2881 }
2882 default:
2883 ERROR("unknown nir_op %s\n", info.name);
2884 assert(false);
2885 return false;
2886 }
2887
2888 if (!oldPos) {
2889 oldPos = this->bb->getEntry();
2890 oldPos->precise = insn->exact;
2891 }
2892
2893 if (unlikely(!oldPos))
2894 return true;
2895
2896 while (oldPos->next) {
2897 oldPos = oldPos->next;
2898 oldPos->precise = insn->exact;
2899 }
2900 oldPos->saturate = insn->dest.saturate;
2901
2902 return true;
2903 }
2904 #undef DEFAULT_CHECKS
2905
2906 bool
visit(nir_ssa_undef_instr * insn)2907 Converter::visit(nir_ssa_undef_instr *insn)
2908 {
2909 LValues &newDefs = convert(&insn->def);
2910 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2911 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2912 }
2913 return true;
2914 }
2915
2916 #define CASE_SAMPLER(ty) \
2917 case GLSL_SAMPLER_DIM_ ## ty : \
2918 if (isArray && !isShadow) \
2919 return TEX_TARGET_ ## ty ## _ARRAY; \
2920 else if (!isArray && isShadow) \
2921 return TEX_TARGET_## ty ## _SHADOW; \
2922 else if (isArray && isShadow) \
2923 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2924 else \
2925 return TEX_TARGET_ ## ty
2926
2927 TexTarget
convert(glsl_sampler_dim dim,bool isArray,bool isShadow)2928 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2929 {
2930 switch (dim) {
2931 CASE_SAMPLER(1D);
2932 CASE_SAMPLER(2D);
2933 CASE_SAMPLER(CUBE);
2934 case GLSL_SAMPLER_DIM_3D:
2935 return TEX_TARGET_3D;
2936 case GLSL_SAMPLER_DIM_MS:
2937 if (isArray)
2938 return TEX_TARGET_2D_MS_ARRAY;
2939 return TEX_TARGET_2D_MS;
2940 case GLSL_SAMPLER_DIM_RECT:
2941 if (isShadow)
2942 return TEX_TARGET_RECT_SHADOW;
2943 return TEX_TARGET_RECT;
2944 case GLSL_SAMPLER_DIM_BUF:
2945 return TEX_TARGET_BUFFER;
2946 case GLSL_SAMPLER_DIM_EXTERNAL:
2947 return TEX_TARGET_2D;
2948 default:
2949 ERROR("unknown glsl_sampler_dim %u\n", dim);
2950 assert(false);
2951 return TEX_TARGET_COUNT;
2952 }
2953 }
2954 #undef CASE_SAMPLER
2955
2956 unsigned int
getNIRArgCount(TexInstruction::Target & target)2957 Converter::getNIRArgCount(TexInstruction::Target& target)
2958 {
2959 unsigned int result = target.getArgCount();
2960 if (target.isCube() && target.isArray())
2961 result--;
2962 if (target.isMS())
2963 result--;
2964 return result;
2965 }
2966
2967 CacheMode
convert(enum gl_access_qualifier access)2968 Converter::convert(enum gl_access_qualifier access)
2969 {
2970 if (access & ACCESS_VOLATILE)
2971 return CACHE_CV;
2972 if (access & ACCESS_COHERENT)
2973 return CACHE_CG;
2974 return CACHE_CA;
2975 }
2976
2977 bool
visit(nir_tex_instr * insn)2978 Converter::visit(nir_tex_instr *insn)
2979 {
2980 switch (insn->op) {
2981 case nir_texop_lod:
2982 case nir_texop_query_levels:
2983 case nir_texop_tex:
2984 case nir_texop_texture_samples:
2985 case nir_texop_tg4:
2986 case nir_texop_txb:
2987 case nir_texop_txd:
2988 case nir_texop_txf:
2989 case nir_texop_txf_ms:
2990 case nir_texop_txl:
2991 case nir_texop_txs: {
2992 LValues &newDefs = convert(&insn->dest);
2993 std::vector<Value*> srcs;
2994 std::vector<Value*> defs;
2995 std::vector<nir_src*> offsets;
2996 uint8_t mask = 0;
2997 bool lz = false;
2998 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2999 operation op = getOperation(insn->op);
3000
3001 int r, s;
3002 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3003 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3004 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3005 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3006 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3007 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3008 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3009 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3010 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3011 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3012 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3013 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3014
3015 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3016 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3017
3018 srcs.resize(insn->coord_components);
3019 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3020 srcs[i] = getSrc(&insn->src[coordsIdx].src, i);
3021
3022 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3023 if (insn->coord_components) {
3024 uint32_t argCount = target.getArgCount();
3025
3026 if (target.isMS())
3027 argCount -= 1;
3028
3029 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3030 srcs.push_back(getSSA());
3031 }
3032
3033 if (insn->op == nir_texop_texture_samples)
3034 srcs.push_back(zero);
3035 else if (!insn->num_srcs)
3036 srcs.push_back(loadImm(NULL, 0));
3037 if (biasIdx != -1)
3038 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3039 if (lodIdx != -1)
3040 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3041 else if (op == OP_TXF)
3042 lz = true;
3043 if (msIdx != -1)
3044 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3045 if (offsetIdx != -1)
3046 offsets.push_back(&insn->src[offsetIdx].src);
3047 if (compIdx != -1)
3048 srcs.push_back(getSrc(&insn->src[compIdx].src, 0));
3049 if (texOffIdx != -1) {
3050 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3051 texOffIdx = srcs.size() - 1;
3052 }
3053 if (sampOffIdx != -1) {
3054 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3055 sampOffIdx = srcs.size() - 1;
3056 }
3057 if (bindless) {
3058 // currently we use the lower bits
3059 Value *split[2];
3060 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3061
3062 mkSplit(split, 4, handle);
3063
3064 srcs.push_back(split[0]);
3065 texOffIdx = srcs.size() - 1;
3066 }
3067
3068 r = bindless ? 0xff : insn->texture_index;
3069 s = bindless ? 0x1f : insn->sampler_index;
3070 if (op == OP_TXF || op == OP_TXQ)
3071 s = 0;
3072
3073 defs.resize(newDefs.size());
3074 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3075 defs[d] = newDefs[d];
3076 mask |= 1 << d;
3077 }
3078 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3079 lz = true;
3080
3081 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3082 texi->tex.levelZero = lz;
3083 texi->tex.mask = mask;
3084 texi->tex.bindless = bindless;
3085
3086 if (texOffIdx != -1)
3087 texi->tex.rIndirectSrc = texOffIdx;
3088 if (sampOffIdx != -1)
3089 texi->tex.sIndirectSrc = sampOffIdx;
3090
3091 switch (insn->op) {
3092 case nir_texop_tg4:
3093 if (!target.isShadow())
3094 texi->tex.gatherComp = insn->component;
3095 break;
3096 case nir_texop_txs:
3097 texi->tex.query = TXQ_DIMS;
3098 break;
3099 case nir_texop_texture_samples:
3100 texi->tex.mask = 0x4;
3101 texi->tex.query = TXQ_TYPE;
3102 break;
3103 case nir_texop_query_levels:
3104 texi->tex.mask = 0x8;
3105 texi->tex.query = TXQ_DIMS;
3106 break;
3107 default:
3108 break;
3109 }
3110
3111 texi->tex.useOffsets = offsets.size();
3112 if (texi->tex.useOffsets) {
3113 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3114 for (uint32_t c = 0u; c < 3; ++c) {
3115 uint8_t s2 = std::min(c, target.getDim() - 1);
3116 texi->offset[s][c].set(getSrc(offsets[s], s2));
3117 texi->offset[s][c].setInsn(texi);
3118 }
3119 }
3120 }
3121
3122 if (op == OP_TXG && offsetIdx == -1) {
3123 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3124 texi->tex.useOffsets = 4;
3125 setPosition(texi, false);
3126 for (uint8_t i = 0; i < 4; ++i) {
3127 for (uint8_t j = 0; j < 2; ++j) {
3128 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3129 texi->offset[i][j].setInsn(texi);
3130 }
3131 }
3132 setPosition(texi, true);
3133 }
3134 }
3135
3136 if (ddxIdx != -1 && ddyIdx != -1) {
3137 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3138 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3139 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3140 }
3141 }
3142
3143 break;
3144 }
3145 default:
3146 ERROR("unknown nir_texop %u\n", insn->op);
3147 return false;
3148 }
3149 return true;
3150 }
3151
3152 /* nouveau's RA doesn't track the liveness of exported registers in the fragment
3153 * shader, so we need all the store_outputs to appear at the end of the shader
3154 * with no other instructions that might generate a temp value in between them.
3155 */
3156 static void
nv_nir_move_stores_to_end(nir_shader * s)3157 nv_nir_move_stores_to_end(nir_shader *s)
3158 {
3159 nir_function_impl *impl = nir_shader_get_entrypoint(s);
3160 nir_block *block = nir_impl_last_block(impl);
3161 nir_instr *first_store = NULL;
3162
3163 nir_foreach_instr_safe(instr, block) {
3164 if (instr == first_store)
3165 break;
3166 if (instr->type != nir_instr_type_intrinsic)
3167 continue;
3168 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3169 if (intrin->intrinsic == nir_intrinsic_store_output) {
3170 nir_instr_remove(instr);
3171 nir_instr_insert(nir_after_block(block), instr);
3172
3173 if (!first_store)
3174 first_store = instr;
3175 }
3176 }
3177 nir_metadata_preserve(impl,
3178 nir_metadata_block_index |
3179 nir_metadata_dominance);
3180 }
3181
3182 bool
run()3183 Converter::run()
3184 {
3185 bool progress;
3186
3187 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3188 nir_print_shader(nir, stderr);
3189
3190 struct nir_lower_subgroups_options subgroup_options = {};
3191 subgroup_options.subgroup_size = 32;
3192 subgroup_options.ballot_bit_size = 32;
3193 subgroup_options.ballot_components = 1;
3194 subgroup_options.lower_elect = true;
3195
3196 /* prepare for IO lowering */
3197 NIR_PASS_V(nir, nir_opt_deref);
3198 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3199 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3200
3201 /* codegen assumes vec4 alignment for memory */
3202 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, function_temp_type_info);
3203 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset);
3204 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3205
3206 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
3207 type_size, (nir_lower_io_options)0);
3208
3209 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3210
3211 struct nir_lower_tex_options tex_options = {};
3212 tex_options.lower_txp = ~0;
3213
3214 NIR_PASS_V(nir, nir_lower_tex, &tex_options);
3215
3216 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3217 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3218 NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
3219
3220 /*TODO: improve this lowering/optimisation loop so that we can use
3221 * nir_opt_idiv_const effectively before this.
3222 */
3223 nir_lower_idiv_options idiv_options = {
3224 .imprecise_32bit_lowering = false,
3225 .allow_fp16 = true,
3226 };
3227 NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
3228
3229 do {
3230 progress = false;
3231 NIR_PASS(progress, nir, nir_copy_prop);
3232 NIR_PASS(progress, nir, nir_opt_remove_phis);
3233 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3234 NIR_PASS(progress, nir, nir_opt_cse);
3235 NIR_PASS(progress, nir, nir_opt_algebraic);
3236 NIR_PASS(progress, nir, nir_opt_constant_folding);
3237 NIR_PASS(progress, nir, nir_copy_prop);
3238 NIR_PASS(progress, nir, nir_opt_dce);
3239 NIR_PASS(progress, nir, nir_opt_dead_cf);
3240 NIR_PASS(progress, nir, nir_lower_64bit_phis);
3241 } while (progress);
3242
3243 nir_move_options move_options =
3244 (nir_move_options)(nir_move_const_undef |
3245 nir_move_load_ubo |
3246 nir_move_load_uniform |
3247 nir_move_load_input);
3248 NIR_PASS_V(nir, nir_opt_sink, move_options);
3249 NIR_PASS_V(nir, nir_opt_move, move_options);
3250
3251 if (nir->info.stage == MESA_SHADER_FRAGMENT)
3252 NIR_PASS_V(nir, nv_nir_move_stores_to_end);
3253
3254 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3255 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3256
3257 // Garbage collect dead instructions
3258 nir_sweep(nir);
3259
3260 if (!parseNIR()) {
3261 ERROR("Couldn't prase NIR!\n");
3262 return false;
3263 }
3264
3265 if (!assignSlots()) {
3266 ERROR("Couldn't assign slots!\n");
3267 return false;
3268 }
3269
3270 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3271 nir_print_shader(nir, stderr);
3272
3273 nir_foreach_function(function, nir) {
3274 if (!visit(function))
3275 return false;
3276 }
3277
3278 return true;
3279 }
3280
3281 } // unnamed namespace
3282
3283 namespace nv50_ir {
3284
3285 bool
makeFromNIR(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)3286 Program::makeFromNIR(struct nv50_ir_prog_info *info,
3287 struct nv50_ir_prog_info_out *info_out)
3288 {
3289 nir_shader *nir = (nir_shader*)info->bin.source;
3290 Converter converter(this, nir, info, info_out);
3291 bool result = converter.run();
3292 if (!result)
3293 return result;
3294 LoweringHelper lowering;
3295 lowering.run(this);
3296 tlsSize = info_out->bin.tlsSpace;
3297 return result;
3298 }
3299
3300 } // namespace nv50_ir
3301
3302 static nir_shader_compiler_options
nvir_nir_shader_compiler_options(int chipset,uint8_t shader_type)3303 nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
3304 {
3305 nir_shader_compiler_options op = {};
3306 op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
3307 op.lower_ffma16 = false;
3308 op.lower_ffma32 = false;
3309 op.lower_ffma64 = false;
3310 op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
3311 op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
3312 op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
3313 op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
3314 op.lower_flrp32 = true;
3315 op.lower_flrp64 = true;
3316 op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it
3317 op.lower_fsat = false;
3318 op.lower_fsqrt = false; // TODO: only before gm200
3319 op.lower_sincos = false;
3320 op.lower_fmod = true;
3321 op.lower_bitfield_extract = false;
3322 op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3323 op.lower_bitfield_insert = false;
3324 op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3325 op.lower_bitfield_insert_to_bitfield_select = false;
3326 op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
3327 op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
3328 op.lower_ifind_msb = (chipset < NVISA_GF100_CHIPSET);
3329 op.lower_find_lsb = (chipset < NVISA_GF100_CHIPSET);
3330 op.lower_uadd_carry = true; // TODO
3331 op.lower_usub_borrow = true; // TODO
3332 op.lower_mul_high = false;
3333 op.lower_fneg = false;
3334 op.lower_ineg = false;
3335 op.lower_scmp = true; // TODO: not implemented yet
3336 op.lower_vector_cmp = false;
3337 op.lower_bitops = false;
3338 op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
3339 op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
3340 op.lower_fdph = false;
3341 op.lower_fdot = false;
3342 op.fdot_replicates = false; // TODO
3343 op.lower_ffloor = false; // TODO
3344 op.lower_ffract = true;
3345 op.lower_fceil = false; // TODO
3346 op.lower_ftrunc = false;
3347 op.lower_ldexp = true;
3348 op.lower_pack_half_2x16 = true;
3349 op.lower_pack_unorm_2x16 = true;
3350 op.lower_pack_snorm_2x16 = true;
3351 op.lower_pack_unorm_4x8 = true;
3352 op.lower_pack_snorm_4x8 = true;
3353 op.lower_unpack_half_2x16 = true;
3354 op.lower_unpack_unorm_2x16 = true;
3355 op.lower_unpack_snorm_2x16 = true;
3356 op.lower_unpack_unorm_4x8 = true;
3357 op.lower_unpack_snorm_4x8 = true;
3358 op.lower_pack_split = false;
3359 op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
3360 op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
3361 op.lower_insert_byte = true;
3362 op.lower_insert_word = true;
3363 op.lower_all_io_to_temps = false;
3364 op.lower_all_io_to_elements = false;
3365 op.vertex_id_zero_based = false;
3366 op.lower_base_vertex = false;
3367 op.lower_helper_invocation = false;
3368 op.optimize_sample_mask_in = false;
3369 op.lower_cs_local_index_to_id = true;
3370 op.lower_cs_local_id_to_index = false;
3371 op.lower_device_index_to_zero = false; // TODO
3372 op.lower_wpos_pntc = false; // TODO
3373 op.lower_hadd = true; // TODO
3374 op.lower_uadd_sat = true; // TODO
3375 op.lower_usub_sat = true; // TODO
3376 op.lower_iadd_sat = true; // TODO
3377 op.vectorize_io = false;
3378 op.lower_to_scalar = false;
3379 op.unify_interfaces = false;
3380 op.use_interpolated_input_intrinsics = true;
3381 op.lower_mul_2x32_64 = true; // TODO
3382 op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
3383 op.has_imul24 = false;
3384 op.intel_vec4 = false;
3385 op.force_indirect_unrolling = (nir_variable_mode) (
3386 ((shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_out : 0) |
3387 /* HW doesn't support indirect addressing of fragment program inputs
3388 * on Volta. The binary driver generates a function to handle every
3389 * possible indirection, and indirectly calls the function to handle
3390 * this instead.
3391 */
3392 ((chipset >= NVISA_GV100_CHIPSET && shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_in : 0)
3393 );
3394 op.force_indirect_unrolling_sampler = (chipset < NVISA_GF100_CHIPSET),
3395 op.max_unroll_iterations = 32;
3396 op.lower_int64_options = (nir_lower_int64_options) (
3397 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
3398 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
3399 nir_lower_divmod64 |
3400 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
3401 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
3402 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
3403 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
3404 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
3405 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
3406 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
3407 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
3408 nir_lower_imul_2x32_64 |
3409 ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
3410 nir_lower_ufind_msb64
3411 );
3412 op.lower_doubles_options = (nir_lower_doubles_options) (
3413 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
3414 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
3415 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
3416 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
3417 nir_lower_dmod |
3418 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
3419 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
3420 );
3421 return op;
3422 }
3423
3424 static const nir_shader_compiler_options g80_nir_shader_compiler_options =
3425 nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_TYPES);
3426 static const nir_shader_compiler_options g80_fs_nir_shader_compiler_options =
3427 nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_FRAGMENT);
3428 static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
3429 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_TYPES);
3430 static const nir_shader_compiler_options gf100_fs_nir_shader_compiler_options =
3431 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_FRAGMENT);
3432 static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
3433 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_TYPES);
3434 static const nir_shader_compiler_options gm107_fs_nir_shader_compiler_options =
3435 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_FRAGMENT);
3436 static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
3437 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_TYPES);
3438 static const nir_shader_compiler_options gv100_fs_nir_shader_compiler_options =
3439 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_FRAGMENT);
3440
3441 const nir_shader_compiler_options *
nv50_ir_nir_shader_compiler_options(int chipset,uint8_t shader_type)3442 nv50_ir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
3443 {
3444 if (chipset >= NVISA_GV100_CHIPSET) {
3445 if (shader_type == PIPE_SHADER_FRAGMENT)
3446 return &gv100_fs_nir_shader_compiler_options;
3447 else
3448 return &gv100_nir_shader_compiler_options;
3449 }
3450
3451 if (chipset >= NVISA_GM107_CHIPSET) {
3452 if (shader_type == PIPE_SHADER_FRAGMENT)
3453 return &gm107_fs_nir_shader_compiler_options;
3454 else
3455 return &gm107_nir_shader_compiler_options;
3456 }
3457
3458 if (chipset >= NVISA_GF100_CHIPSET) {
3459 if (shader_type == PIPE_SHADER_FRAGMENT)
3460 return &gf100_fs_nir_shader_compiler_options;
3461 else
3462 return &gf100_nir_shader_compiler_options;
3463 }
3464
3465 if (shader_type == PIPE_SHADER_FRAGMENT)
3466 return &g80_fs_nir_shader_compiler_options;
3467 else
3468 return &g80_nir_shader_compiler_options;
3469 }
3470