1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26 #include "compiler/nir/nir_builder.h"
27
28 #include "util/u_debug.h"
29 #include "util/u_prim.h"
30
31 #include "nv50_ir.h"
32 #include "nv50_ir_lowering_helper.h"
33 #include "nv50_ir_target.h"
34 #include "nv50_ir_util.h"
35 #include "tgsi/tgsi_from_mesa.h"
36
37 #include <unordered_map>
38 #include <cstring>
39 #include <list>
40 #include <vector>
41
42 namespace {
43
44 using namespace nv50_ir;
45
46 int
type_size(const struct glsl_type * type,bool bindless)47 type_size(const struct glsl_type *type, bool bindless)
48 {
49 return glsl_count_attribute_slots(type, false);
50 }
51
52 static void
function_temp_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)53 function_temp_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
54 {
55 assert(glsl_type_is_vector_or_scalar(type));
56
57 if (glsl_type_is_scalar(type)) {
58 glsl_get_natural_size_align_bytes(type, size, align);
59 } else {
60 unsigned comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
61 unsigned length = glsl_get_vector_elements(type);
62
63 *size = comp_size * length;
64 *align = 0x10;
65 }
66 }
67
68 bool
nv50_nir_lower_load_user_clip_plane_cb(nir_builder * b,nir_intrinsic_instr * intrin,void * params)69 nv50_nir_lower_load_user_clip_plane_cb(nir_builder *b, nir_intrinsic_instr *intrin, void *params)
70 {
71 struct nv50_ir_prog_info *info = (struct nv50_ir_prog_info *)params;
72
73 if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
74 return false;
75
76 uint16_t offset = info->io.ucpBase + nir_intrinsic_ucp_id(intrin) * 16;
77
78 b->cursor = nir_before_instr(&intrin->instr);
79 nir_def *replacement =
80 nir_load_ubo(b, 4, 32, nir_imm_int(b, info->io.auxCBSlot),
81 nir_imm_int(b, offset), .range = ~0u);
82
83 nir_def_rewrite_uses(&intrin->def, replacement);
84 nir_instr_remove(&intrin->instr);
85
86 return true;
87 }
88
89 bool
nv50_nir_lower_load_user_clip_plane(nir_shader * nir,struct nv50_ir_prog_info * info)90 nv50_nir_lower_load_user_clip_plane(nir_shader *nir, struct nv50_ir_prog_info *info) {
91 return nir_shader_intrinsics_pass(nir, nv50_nir_lower_load_user_clip_plane_cb,
92 nir_metadata_block_index | nir_metadata_dominance,
93 info);
94 }
95
96 class Converter : public BuildUtil
97 {
98 public:
99 Converter(Program *, nir_shader *, nv50_ir_prog_info *, nv50_ir_prog_info_out *);
100
101 bool run();
102 private:
103 typedef std::vector<LValue*> LValues;
104 typedef std::unordered_map<unsigned, LValues> NirDefMap;
105 typedef std::unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
106 typedef std::unordered_map<unsigned, BasicBlock*> NirBlockMap;
107
108 CacheMode convert(enum gl_access_qualifier);
109 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
110 BasicBlock* convert(nir_block *);
111 SVSemantic convert(nir_intrinsic_op);
112 Value* convert(nir_load_const_instr*, uint8_t);
113 LValues& convert(nir_def *);
114
115 Value* getSrc(nir_alu_src *, uint8_t component = 0);
116 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
117 Value* getSrc(nir_def *, uint8_t);
118
119 // returned value is the constant part of the given source (either the
120 // nir_src or the selected source component of an intrinsic). Even though
121 // this is mostly an optimization to be able to skip indirects in a few
122 // cases, sometimes we require immediate values or set some fileds on
123 // instructions (e.g. tex) in order for codegen to consume those.
124 // If the found value has not a constant part, the Value gets returned
125 // through the Value parameter.
126 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
127 // isScalar indicates that the addressing is scalar, vec4 addressing is
128 // assumed otherwise
129 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
130 bool isScalar = false);
131
132 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
133
134 uint8_t translateInterpMode(const struct nv50_ir_varying *var, operation& op);
135 void setInterpolate(nv50_ir_varying *,
136 uint8_t,
137 bool centroid,
138 unsigned semantics);
139
140 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
141 uint8_t c, Value *indirect0 = NULL,
142 Value *indirect1 = NULL, bool patch = false,
143 CacheMode cache=CACHE_CA);
144 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
145 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
146 Value *indirect1 = NULL);
147
148 bool isFloatType(nir_alu_type);
149 bool isSignedType(nir_alu_type);
150 bool isResultFloat(nir_op);
151 bool isResultSigned(nir_op);
152
153 DataType getDType(nir_alu_instr *);
154 DataType getDType(nir_intrinsic_instr *);
155 DataType getDType(nir_op, uint8_t);
156
157 DataFile getFile(nir_intrinsic_op);
158
159 std::vector<DataType> getSTypes(nir_alu_instr *);
160 DataType getSType(nir_src &, bool isFloat, bool isSigned);
161
162 operation getOperation(nir_intrinsic_op);
163 operation getOperation(nir_op);
164 operation getOperation(nir_texop);
165 operation preOperationNeeded(nir_op);
166
167 int getSubOp(nir_intrinsic_op);
168 int getSubOp(nir_op);
169 int getAtomicSubOp(nir_atomic_op);
170
171 CondCode getCondCode(nir_op);
172
173 bool assignSlots();
174 bool parseNIR();
175
176 bool visit(nir_alu_instr *);
177 bool visit(nir_block *);
178 bool visit(nir_cf_node *);
179 bool visit(nir_function *);
180 bool visit(nir_if *);
181 bool visit(nir_instr *);
182 bool visit(nir_intrinsic_instr *);
183 bool visit(nir_jump_instr *);
184 bool visit(nir_load_const_instr*);
185 bool visit(nir_loop *);
186 bool visit(nir_undef_instr *);
187 bool visit(nir_tex_instr *);
188
189 static unsigned lowerBitSizeCB(const nir_instr *, void *);
190
191 // tex stuff
192 unsigned int getNIRArgCount(TexInstruction::Target&);
193
194 struct nv50_ir_prog_info *info;
195 struct nv50_ir_prog_info_out *info_out;
196
197 nir_shader *nir;
198
199 NirDefMap ssaDefs;
200 NirDefMap regDefs;
201 ImmediateMap immediates;
202 NirBlockMap blocks;
203 unsigned int curLoopDepth;
204 unsigned int curIfDepth;
205
206 BasicBlock *exit;
207 Value *zero;
208 Instruction *immInsertPos;
209
210 Value *outBase; // base address of vertex out patch (for TCP)
211
212 union {
213 struct {
214 Value *position;
215 } fp;
216 };
217 };
218
Converter(Program * prog,nir_shader * nir,nv50_ir_prog_info * info,nv50_ir_prog_info_out * info_out)219 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info,
220 nv50_ir_prog_info_out *info_out)
221 : BuildUtil(prog),
222 info(info),
223 info_out(info_out),
224 nir(nir),
225 curLoopDepth(0),
226 curIfDepth(0),
227 exit(NULL),
228 immInsertPos(NULL),
229 outBase(nullptr)
230 {
231 zero = mkImm((uint32_t)0);
232 }
233
234 BasicBlock *
convert(nir_block * block)235 Converter::convert(nir_block *block)
236 {
237 NirBlockMap::iterator it = blocks.find(block->index);
238 if (it != blocks.end())
239 return it->second;
240
241 BasicBlock *bb = new BasicBlock(func);
242 blocks[block->index] = bb;
243 return bb;
244 }
245
246 bool
isFloatType(nir_alu_type type)247 Converter::isFloatType(nir_alu_type type)
248 {
249 return nir_alu_type_get_base_type(type) == nir_type_float;
250 }
251
252 bool
isSignedType(nir_alu_type type)253 Converter::isSignedType(nir_alu_type type)
254 {
255 return nir_alu_type_get_base_type(type) == nir_type_int;
256 }
257
258 bool
isResultFloat(nir_op op)259 Converter::isResultFloat(nir_op op)
260 {
261 const nir_op_info &info = nir_op_infos[op];
262 if (info.output_type != nir_type_invalid)
263 return isFloatType(info.output_type);
264
265 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
266 assert(false);
267 return true;
268 }
269
270 bool
isResultSigned(nir_op op)271 Converter::isResultSigned(nir_op op)
272 {
273 switch (op) {
274 // there is no umul and we get wrong results if we treat all muls as signed
275 case nir_op_imul:
276 case nir_op_inot:
277 return false;
278 default:
279 const nir_op_info &info = nir_op_infos[op];
280 if (info.output_type != nir_type_invalid)
281 return isSignedType(info.output_type);
282 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
283 assert(false);
284 return true;
285 }
286 }
287
288 DataType
getDType(nir_alu_instr * insn)289 Converter::getDType(nir_alu_instr *insn)
290 {
291 return getDType(insn->op, insn->def.bit_size);
292 }
293
294 DataType
getDType(nir_intrinsic_instr * insn)295 Converter::getDType(nir_intrinsic_instr *insn)
296 {
297 bool isFloat, isSigned;
298 switch (insn->intrinsic) {
299 case nir_intrinsic_bindless_image_atomic:
300 case nir_intrinsic_global_atomic:
301 case nir_intrinsic_image_atomic:
302 case nir_intrinsic_shared_atomic:
303 case nir_intrinsic_ssbo_atomic: {
304 nir_alu_type type = nir_atomic_op_type(nir_intrinsic_atomic_op(insn));
305 isFloat = type == nir_type_float;
306 isSigned = type == nir_type_int;
307 break;
308 }
309 default:
310 isFloat = false;
311 isSigned = false;
312 break;
313 }
314
315 return typeOfSize(insn->def.bit_size / 8, isFloat, isSigned);
316 }
317
318 DataType
getDType(nir_op op,uint8_t bitSize)319 Converter::getDType(nir_op op, uint8_t bitSize)
320 {
321 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
322 if (ty == TYPE_NONE) {
323 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
324 assert(false);
325 }
326 return ty;
327 }
328
329 std::vector<DataType>
getSTypes(nir_alu_instr * insn)330 Converter::getSTypes(nir_alu_instr *insn)
331 {
332 const nir_op_info &info = nir_op_infos[insn->op];
333 std::vector<DataType> res(info.num_inputs);
334
335 for (uint8_t i = 0; i < info.num_inputs; ++i) {
336 if (info.input_types[i] != nir_type_invalid) {
337 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
338 } else {
339 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
340 assert(false);
341 res[i] = TYPE_NONE;
342 break;
343 }
344 }
345
346 return res;
347 }
348
349 DataType
getSType(nir_src & src,bool isFloat,bool isSigned)350 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
351 {
352 const uint8_t bitSize = src.ssa->bit_size;
353
354 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
355 if (ty == TYPE_NONE) {
356 const char *str;
357 if (isFloat)
358 str = "float";
359 else if (isSigned)
360 str = "int";
361 else
362 str = "uint";
363 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
364 assert(false);
365 }
366 return ty;
367 }
368
369 DataFile
getFile(nir_intrinsic_op op)370 Converter::getFile(nir_intrinsic_op op)
371 {
372 switch (op) {
373 case nir_intrinsic_load_global:
374 case nir_intrinsic_store_global:
375 case nir_intrinsic_load_global_constant:
376 return FILE_MEMORY_GLOBAL;
377 case nir_intrinsic_load_scratch:
378 case nir_intrinsic_store_scratch:
379 return FILE_MEMORY_LOCAL;
380 case nir_intrinsic_load_shared:
381 case nir_intrinsic_store_shared:
382 return FILE_MEMORY_SHARED;
383 case nir_intrinsic_load_kernel_input:
384 return FILE_SHADER_INPUT;
385 default:
386 ERROR("couldn't get DateFile for op %s\n", nir_intrinsic_infos[op].name);
387 assert(false);
388 }
389 return FILE_NULL;
390 }
391
392 operation
getOperation(nir_op op)393 Converter::getOperation(nir_op op)
394 {
395 switch (op) {
396 // basic ops with float and int variants
397 case nir_op_fabs:
398 case nir_op_iabs:
399 return OP_ABS;
400 case nir_op_fadd:
401 case nir_op_iadd:
402 return OP_ADD;
403 case nir_op_iand:
404 return OP_AND;
405 case nir_op_ifind_msb:
406 case nir_op_ufind_msb:
407 return OP_BFIND;
408 case nir_op_fceil:
409 return OP_CEIL;
410 case nir_op_fcos:
411 return OP_COS;
412 case nir_op_f2f32:
413 case nir_op_f2f64:
414 case nir_op_f2i8:
415 case nir_op_f2i16:
416 case nir_op_f2i32:
417 case nir_op_f2i64:
418 case nir_op_f2u8:
419 case nir_op_f2u16:
420 case nir_op_f2u32:
421 case nir_op_f2u64:
422 case nir_op_i2f32:
423 case nir_op_i2f64:
424 case nir_op_i2i8:
425 case nir_op_i2i16:
426 case nir_op_i2i32:
427 case nir_op_i2i64:
428 case nir_op_u2f32:
429 case nir_op_u2f64:
430 case nir_op_u2u8:
431 case nir_op_u2u16:
432 case nir_op_u2u32:
433 case nir_op_u2u64:
434 return OP_CVT;
435 case nir_op_fddx:
436 case nir_op_fddx_coarse:
437 case nir_op_fddx_fine:
438 return OP_DFDX;
439 case nir_op_fddy:
440 case nir_op_fddy_coarse:
441 case nir_op_fddy_fine:
442 return OP_DFDY;
443 case nir_op_fdiv:
444 case nir_op_idiv:
445 case nir_op_udiv:
446 return OP_DIV;
447 case nir_op_fexp2:
448 return OP_EX2;
449 case nir_op_ffloor:
450 return OP_FLOOR;
451 case nir_op_ffma:
452 case nir_op_ffmaz:
453 /* No FMA op pre-nvc0 */
454 if (info->target < 0xc0)
455 return OP_MAD;
456 return OP_FMA;
457 case nir_op_flog2:
458 return OP_LG2;
459 case nir_op_fmax:
460 case nir_op_imax:
461 case nir_op_umax:
462 return OP_MAX;
463 case nir_op_pack_64_2x32_split:
464 return OP_MERGE;
465 case nir_op_fmin:
466 case nir_op_imin:
467 case nir_op_umin:
468 return OP_MIN;
469 case nir_op_fmod:
470 case nir_op_imod:
471 case nir_op_umod:
472 case nir_op_frem:
473 case nir_op_irem:
474 return OP_MOD;
475 case nir_op_fmul:
476 case nir_op_fmulz:
477 case nir_op_amul:
478 case nir_op_imul:
479 case nir_op_imul_high:
480 case nir_op_umul_high:
481 return OP_MUL;
482 case nir_op_fneg:
483 case nir_op_ineg:
484 return OP_NEG;
485 case nir_op_inot:
486 return OP_NOT;
487 case nir_op_ior:
488 return OP_OR;
489 case nir_op_frcp:
490 return OP_RCP;
491 case nir_op_frsq:
492 return OP_RSQ;
493 case nir_op_fsat:
494 return OP_SAT;
495 case nir_op_ieq8:
496 case nir_op_ige8:
497 case nir_op_uge8:
498 case nir_op_ilt8:
499 case nir_op_ult8:
500 case nir_op_ine8:
501 case nir_op_ieq16:
502 case nir_op_ige16:
503 case nir_op_uge16:
504 case nir_op_ilt16:
505 case nir_op_ult16:
506 case nir_op_ine16:
507 case nir_op_feq32:
508 case nir_op_ieq32:
509 case nir_op_fge32:
510 case nir_op_ige32:
511 case nir_op_uge32:
512 case nir_op_flt32:
513 case nir_op_ilt32:
514 case nir_op_ult32:
515 case nir_op_fneu32:
516 case nir_op_ine32:
517 return OP_SET;
518 case nir_op_ishl:
519 return OP_SHL;
520 case nir_op_ishr:
521 case nir_op_ushr:
522 return OP_SHR;
523 case nir_op_fsin:
524 return OP_SIN;
525 case nir_op_fsqrt:
526 return OP_SQRT;
527 case nir_op_ftrunc:
528 return OP_TRUNC;
529 case nir_op_ixor:
530 return OP_XOR;
531 default:
532 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
533 assert(false);
534 return OP_NOP;
535 }
536 }
537
538 operation
getOperation(nir_texop op)539 Converter::getOperation(nir_texop op)
540 {
541 switch (op) {
542 case nir_texop_tex:
543 return OP_TEX;
544 case nir_texop_lod:
545 return OP_TXLQ;
546 case nir_texop_txb:
547 return OP_TXB;
548 case nir_texop_txd:
549 return OP_TXD;
550 case nir_texop_txf:
551 case nir_texop_txf_ms:
552 return OP_TXF;
553 case nir_texop_tg4:
554 return OP_TXG;
555 case nir_texop_txl:
556 return OP_TXL;
557 case nir_texop_query_levels:
558 case nir_texop_texture_samples:
559 case nir_texop_txs:
560 return OP_TXQ;
561 default:
562 ERROR("couldn't get operation for nir_texop %u\n", op);
563 assert(false);
564 return OP_NOP;
565 }
566 }
567
568 operation
getOperation(nir_intrinsic_op op)569 Converter::getOperation(nir_intrinsic_op op)
570 {
571 switch (op) {
572 case nir_intrinsic_emit_vertex:
573 return OP_EMIT;
574 case nir_intrinsic_end_primitive:
575 return OP_RESTART;
576 case nir_intrinsic_bindless_image_atomic:
577 case nir_intrinsic_image_atomic:
578 case nir_intrinsic_bindless_image_atomic_swap:
579 case nir_intrinsic_image_atomic_swap:
580 return OP_SUREDP;
581 case nir_intrinsic_bindless_image_load:
582 case nir_intrinsic_image_load:
583 return OP_SULDP;
584 case nir_intrinsic_bindless_image_samples:
585 case nir_intrinsic_image_samples:
586 case nir_intrinsic_bindless_image_size:
587 case nir_intrinsic_image_size:
588 return OP_SUQ;
589 case nir_intrinsic_bindless_image_store:
590 case nir_intrinsic_image_store:
591 return OP_SUSTP;
592 default:
593 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
594 assert(false);
595 return OP_NOP;
596 }
597 }
598
599 operation
preOperationNeeded(nir_op op)600 Converter::preOperationNeeded(nir_op op)
601 {
602 switch (op) {
603 case nir_op_fcos:
604 case nir_op_fsin:
605 return OP_PRESIN;
606 default:
607 return OP_NOP;
608 }
609 }
610
611 int
getSubOp(nir_op op)612 Converter::getSubOp(nir_op op)
613 {
614 switch (op) {
615 case nir_op_imul_high:
616 case nir_op_umul_high:
617 return NV50_IR_SUBOP_MUL_HIGH;
618 case nir_op_ishl:
619 case nir_op_ishr:
620 case nir_op_ushr:
621 return NV50_IR_SUBOP_SHIFT_WRAP;
622 default:
623 return 0;
624 }
625 }
626
627 int
getAtomicSubOp(nir_atomic_op op)628 Converter::getAtomicSubOp(nir_atomic_op op)
629 {
630 switch (op) {
631 case nir_atomic_op_fadd:
632 case nir_atomic_op_iadd:
633 return NV50_IR_SUBOP_ATOM_ADD;
634 case nir_atomic_op_iand:
635 return NV50_IR_SUBOP_ATOM_AND;
636 case nir_atomic_op_cmpxchg:
637 return NV50_IR_SUBOP_ATOM_CAS;
638 case nir_atomic_op_imax:
639 case nir_atomic_op_umax:
640 return NV50_IR_SUBOP_ATOM_MAX;
641 case nir_atomic_op_imin:
642 case nir_atomic_op_umin:
643 return NV50_IR_SUBOP_ATOM_MIN;
644 case nir_atomic_op_xchg:
645 return NV50_IR_SUBOP_ATOM_EXCH;
646 case nir_atomic_op_ior:
647 return NV50_IR_SUBOP_ATOM_OR;
648 case nir_atomic_op_ixor:
649 return NV50_IR_SUBOP_ATOM_XOR;
650 case nir_atomic_op_dec_wrap:
651 return NV50_IR_SUBOP_ATOM_DEC;
652 case nir_atomic_op_inc_wrap:
653 return NV50_IR_SUBOP_ATOM_INC;
654 default:
655 ERROR("couldn't get SubOp for atomic\n");
656 assert(false);
657 return 0;
658 }
659 }
660
661 int
getSubOp(nir_intrinsic_op op)662 Converter::getSubOp(nir_intrinsic_op op)
663 {
664 switch (op) {
665 case nir_intrinsic_vote_all:
666 return NV50_IR_SUBOP_VOTE_ALL;
667 case nir_intrinsic_vote_any:
668 return NV50_IR_SUBOP_VOTE_ANY;
669 case nir_intrinsic_vote_ieq:
670 return NV50_IR_SUBOP_VOTE_UNI;
671 default:
672 return 0;
673 }
674 }
675
676 CondCode
getCondCode(nir_op op)677 Converter::getCondCode(nir_op op)
678 {
679 switch (op) {
680 case nir_op_ieq8:
681 case nir_op_ieq16:
682 case nir_op_feq32:
683 case nir_op_ieq32:
684 return CC_EQ;
685 case nir_op_ige8:
686 case nir_op_uge8:
687 case nir_op_ige16:
688 case nir_op_uge16:
689 case nir_op_fge32:
690 case nir_op_ige32:
691 case nir_op_uge32:
692 return CC_GE;
693 case nir_op_ilt8:
694 case nir_op_ult8:
695 case nir_op_ilt16:
696 case nir_op_ult16:
697 case nir_op_flt32:
698 case nir_op_ilt32:
699 case nir_op_ult32:
700 return CC_LT;
701 case nir_op_fneu32:
702 return CC_NEU;
703 case nir_op_ine8:
704 case nir_op_ine16:
705 case nir_op_ine32:
706 return CC_NE;
707 default:
708 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
709 assert(false);
710 return CC_FL;
711 }
712 }
713
714 Converter::LValues&
convert(nir_def * def)715 Converter::convert(nir_def *def)
716 {
717 NirDefMap::iterator it = ssaDefs.find(def->index);
718 if (it != ssaDefs.end())
719 return it->second;
720
721 LValues newDef(def->num_components);
722 for (uint8_t i = 0; i < def->num_components; i++)
723 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
724 return ssaDefs[def->index] = newDef;
725 }
726
727 Value*
getSrc(nir_alu_src * src,uint8_t component)728 Converter::getSrc(nir_alu_src *src, uint8_t component)
729 {
730 return getSrc(&src->src, src->swizzle[component]);
731 }
732
733 Value*
getSrc(nir_src * src,uint8_t idx,bool indirect)734 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
735 {
736 return getSrc(src->ssa, idx);
737 }
738
739 Value*
getSrc(nir_def * src,uint8_t idx)740 Converter::getSrc(nir_def *src, uint8_t idx)
741 {
742 ImmediateMap::iterator iit = immediates.find(src->index);
743 if (iit != immediates.end())
744 return convert((*iit).second, idx);
745
746 NirDefMap::iterator it = ssaDefs.find(src->index);
747 if (it == ssaDefs.end()) {
748 ERROR("SSA value %u not found\n", src->index);
749 assert(false);
750 return NULL;
751 }
752 return it->second[idx];
753 }
754
755 uint32_t
getIndirect(nir_src * src,uint8_t idx,Value * & indirect)756 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
757 {
758 nir_const_value *offset = nir_src_as_const_value(*src);
759
760 if (offset) {
761 indirect = NULL;
762 return offset[0].u32;
763 }
764
765 indirect = getSrc(src, idx, true);
766 return 0;
767 }
768
769 uint32_t
getIndirect(nir_intrinsic_instr * insn,uint8_t s,uint8_t c,Value * & indirect,bool isScalar)770 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
771 {
772 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
773
774 if (indirect && !isScalar)
775 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
776 return idx;
777 }
778
779 static void
vert_attrib_to_tgsi_semantic(gl_vert_attrib slot,unsigned * name,unsigned * index)780 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
781 {
782 assert(name && index);
783
784 if (slot >= VERT_ATTRIB_MAX) {
785 ERROR("invalid varying slot %u\n", slot);
786 assert(false);
787 return;
788 }
789
790 if (slot >= VERT_ATTRIB_GENERIC0 &&
791 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
792 *name = TGSI_SEMANTIC_GENERIC;
793 *index = slot - VERT_ATTRIB_GENERIC0;
794 return;
795 }
796
797 if (slot >= VERT_ATTRIB_TEX0 &&
798 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
799 *name = TGSI_SEMANTIC_TEXCOORD;
800 *index = slot - VERT_ATTRIB_TEX0;
801 return;
802 }
803
804 switch (slot) {
805 case VERT_ATTRIB_COLOR0:
806 *name = TGSI_SEMANTIC_COLOR;
807 *index = 0;
808 break;
809 case VERT_ATTRIB_COLOR1:
810 *name = TGSI_SEMANTIC_COLOR;
811 *index = 1;
812 break;
813 case VERT_ATTRIB_EDGEFLAG:
814 *name = TGSI_SEMANTIC_EDGEFLAG;
815 *index = 0;
816 break;
817 case VERT_ATTRIB_FOG:
818 *name = TGSI_SEMANTIC_FOG;
819 *index = 0;
820 break;
821 case VERT_ATTRIB_NORMAL:
822 *name = TGSI_SEMANTIC_NORMAL;
823 *index = 0;
824 break;
825 case VERT_ATTRIB_POS:
826 *name = TGSI_SEMANTIC_POSITION;
827 *index = 0;
828 break;
829 case VERT_ATTRIB_POINT_SIZE:
830 *name = TGSI_SEMANTIC_PSIZE;
831 *index = 0;
832 break;
833 default:
834 ERROR("unknown vert attrib slot %u\n", slot);
835 assert(false);
836 break;
837 }
838 }
839
840 uint8_t
translateInterpMode(const struct nv50_ir_varying * var,operation & op)841 Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
842 {
843 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
844
845 if (var->flat)
846 mode = NV50_IR_INTERP_FLAT;
847 else
848 if (var->linear)
849 mode = NV50_IR_INTERP_LINEAR;
850 else
851 if (var->sc)
852 mode = NV50_IR_INTERP_SC;
853
854 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
855 ? OP_PINTERP : OP_LINTERP;
856
857 if (var->centroid)
858 mode |= NV50_IR_INTERP_CENTROID;
859
860 return mode;
861 }
862
863 void
setInterpolate(nv50_ir_varying * var,uint8_t mode,bool centroid,unsigned semantic)864 Converter::setInterpolate(nv50_ir_varying *var,
865 uint8_t mode,
866 bool centroid,
867 unsigned semantic)
868 {
869 switch (mode) {
870 case INTERP_MODE_FLAT:
871 var->flat = 1;
872 break;
873 case INTERP_MODE_NONE:
874 if (semantic == TGSI_SEMANTIC_COLOR)
875 var->sc = 1;
876 else if (semantic == TGSI_SEMANTIC_POSITION)
877 var->linear = 1;
878 break;
879 case INTERP_MODE_NOPERSPECTIVE:
880 var->linear = 1;
881 break;
882 case INTERP_MODE_SMOOTH:
883 break;
884 }
885 var->centroid = centroid;
886 }
887
888 static uint16_t
calcSlots(const glsl_type * type,Program::Type stage,const shader_info & info,bool input,const nir_variable * var)889 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
890 bool input, const nir_variable *var)
891 {
892 if (!glsl_type_is_array(type))
893 return glsl_count_attribute_slots(type, false);
894
895 uint16_t slots;
896 switch (stage) {
897 case Program::TYPE_GEOMETRY:
898 slots = glsl_count_attribute_slots(type, false);
899 if (input)
900 slots /= info.gs.vertices_in;
901 break;
902 case Program::TYPE_TESSELLATION_CONTROL:
903 case Program::TYPE_TESSELLATION_EVAL:
904 // remove first dimension
905 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
906 slots = glsl_count_attribute_slots(type, false);
907 else
908 slots = glsl_count_attribute_slots(type->fields.array, false);
909 break;
910 default:
911 slots = glsl_count_attribute_slots(type, false);
912 break;
913 }
914
915 return slots;
916 }
917
918 static uint8_t
getMaskForType(const glsl_type * type,uint8_t slot)919 getMaskForType(const glsl_type *type, uint8_t slot) {
920 uint16_t comp = glsl_get_components(glsl_without_array(type));
921 comp = comp ? comp : 4;
922
923 if (glsl_base_type_is_64bit(glsl_without_array(type)->base_type)) {
924 comp *= 2;
925 if (comp > 4) {
926 if (slot % 2)
927 comp -= 4;
928 else
929 comp = 4;
930 }
931 }
932
933 return (1 << comp) - 1;
934 }
935
assignSlots()936 bool Converter::assignSlots() {
937 unsigned name;
938 unsigned index;
939
940 info->io.viewportId = -1;
941 info->io.mul_zero_wins = nir->info.use_legacy_math_rules;
942 info_out->numInputs = 0;
943 info_out->numOutputs = 0;
944 info_out->numSysVals = 0;
945
946 uint8_t i;
947 BITSET_FOREACH_SET(i, nir->info.system_values_read, SYSTEM_VALUE_MAX) {
948 switch (i) {
949 case SYSTEM_VALUE_VERTEX_ID:
950 info_out->io.vertexId = info_out->numSysVals;
951 break;
952 case SYSTEM_VALUE_INSTANCE_ID:
953 info_out->io.instanceId = info_out->numSysVals;
954 break;
955 default:
956 break;
957 }
958
959 info_out->sv[info_out->numSysVals].sn = (gl_system_value)i;
960 info_out->numSysVals += 1;
961 }
962
963 if (prog->getType() == Program::TYPE_COMPUTE)
964 return true;
965
966 nir_foreach_shader_in_variable(var, nir) {
967 const glsl_type *type = var->type;
968 int slot = var->data.location;
969 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
970 uint32_t vary = var->data.driver_location;
971 assert(vary + slots <= NV50_CODEGEN_MAX_VARYINGS);
972
973 switch(prog->getType()) {
974 case Program::TYPE_FRAGMENT:
975 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
976 &name, &index);
977 for (uint16_t i = 0; i < slots; ++i) {
978 setInterpolate(&info_out->in[vary + i], var->data.interpolation,
979 var->data.centroid | var->data.sample, name);
980 }
981 break;
982 case Program::TYPE_GEOMETRY:
983 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
984 &name, &index);
985 break;
986 case Program::TYPE_TESSELLATION_CONTROL:
987 case Program::TYPE_TESSELLATION_EVAL:
988 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
989 &name, &index);
990 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
991 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
992 break;
993 case Program::TYPE_VERTEX:
994 if (slot >= VERT_ATTRIB_GENERIC0 && slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX)
995 slot = VERT_ATTRIB_GENERIC0 + vary;
996 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
997 switch (name) {
998 case TGSI_SEMANTIC_EDGEFLAG:
999 info_out->io.edgeFlagIn = vary;
1000 break;
1001 default:
1002 break;
1003 }
1004 break;
1005 default:
1006 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1007 return false;
1008 }
1009
1010 if (var->data.compact) {
1011 assert(!(nir->info.outputs_read & 1ull << slot));
1012 if (nir_is_arrayed_io(var, nir->info.stage)) {
1013 assert(glsl_type_is_array(type->fields.array));
1014 assert(glsl_type_is_scalar(type->fields.array->fields.array));
1015 assert(slots == glsl_get_length(type->fields.array));
1016 } else {
1017 assert(glsl_type_is_array(type));
1018 assert(glsl_type_is_scalar(type->fields.array));
1019 assert(slots == glsl_get_length(type));
1020 }
1021 assert(!glsl_base_type_is_64bit(glsl_without_array(type)->base_type));
1022
1023 uint32_t comps = BITFIELD_RANGE(var->data.location_frac, slots);
1024 assert(!(comps & ~0xff));
1025
1026 if (comps & 0x0f) {
1027 nv50_ir_varying *v = &info_out->in[vary];
1028 v->patch = var->data.patch;
1029 v->sn = name;
1030 v->si = index;
1031 v->mask |= comps & 0x0f;
1032 info_out->numInputs =
1033 std::max<uint8_t>(info_out->numInputs, vary + 1);
1034 }
1035 if (comps & 0xf0) {
1036 nv50_ir_varying *v = &info_out->in[vary + 1];
1037 v->patch = var->data.patch;
1038 v->sn = name;
1039 v->si = index + 1;
1040 v->mask |= (comps & 0xf0) >> 4;
1041 info_out->numInputs =
1042 std::max<uint8_t>(info_out->numInputs, vary + 2);
1043 }
1044 } else {
1045 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1046 nv50_ir_varying *v = &info_out->in[vary];
1047
1048 v->patch = var->data.patch;
1049 v->sn = name;
1050 v->si = index + i;
1051 v->mask |= getMaskForType(type, i) << var->data.location_frac;
1052 }
1053 info_out->numInputs = std::max<uint8_t>(info_out->numInputs, vary);
1054 }
1055 }
1056
1057 nir_foreach_shader_out_variable(var, nir) {
1058 const glsl_type *type = var->type;
1059 int slot = var->data.location;
1060 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1061 uint32_t vary = var->data.driver_location;
1062
1063 assert(vary < NV50_CODEGEN_MAX_VARYINGS);
1064
1065 switch(prog->getType()) {
1066 case Program::TYPE_FRAGMENT:
1067 tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1068 switch (name) {
1069 case TGSI_SEMANTIC_COLOR:
1070 if (!var->data.fb_fetch_output)
1071 info_out->prop.fp.numColourResults++;
1072 if (var->data.location == FRAG_RESULT_COLOR &&
1073 nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
1074 info_out->prop.fp.separateFragData = true;
1075 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1076 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1077 index = index == 0 ? var->data.index : index;
1078 break;
1079 case TGSI_SEMANTIC_POSITION:
1080 info_out->io.fragDepth = vary;
1081 info_out->prop.fp.writesDepth = true;
1082 break;
1083 case TGSI_SEMANTIC_SAMPLEMASK:
1084 info_out->io.sampleMask = vary;
1085 break;
1086 default:
1087 break;
1088 }
1089 break;
1090 case Program::TYPE_GEOMETRY:
1091 case Program::TYPE_TESSELLATION_CONTROL:
1092 case Program::TYPE_TESSELLATION_EVAL:
1093 case Program::TYPE_VERTEX:
1094 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1095 &name, &index);
1096
1097 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1098 name != TGSI_SEMANTIC_TESSOUTER)
1099 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
1100
1101 switch (name) {
1102 case TGSI_SEMANTIC_EDGEFLAG:
1103 info_out->io.edgeFlagOut = vary;
1104 break;
1105 default:
1106 break;
1107 }
1108 break;
1109 default:
1110 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1111 return false;
1112 }
1113
1114 if (var->data.compact) {
1115 assert(!(nir->info.outputs_read & 1ull << slot));
1116 if (nir_is_arrayed_io(var, nir->info.stage)) {
1117 assert(glsl_type_is_array(type->fields.array));
1118 assert(glsl_type_is_scalar(type->fields.array->fields.array));
1119 assert(slots == glsl_get_length(type->fields.array));
1120 } else {
1121 assert(glsl_type_is_array(type));
1122 assert(glsl_type_is_scalar(type->fields.array));
1123 assert(slots == glsl_get_length(type));
1124 }
1125 assert(!glsl_base_type_is_64bit(glsl_without_array(type)->base_type));
1126
1127 uint32_t comps = BITFIELD_RANGE(var->data.location_frac, slots);
1128 assert(!(comps & ~0xff));
1129
1130 if (comps & 0x0f) {
1131 nv50_ir_varying *v = &info_out->out[vary];
1132 v->patch = var->data.patch;
1133 v->sn = name;
1134 v->si = index;
1135 v->mask |= comps & 0x0f;
1136 info_out->numOutputs =
1137 std::max<uint8_t>(info_out->numOutputs, vary + 1);
1138 }
1139 if (comps & 0xf0) {
1140 nv50_ir_varying *v = &info_out->out[vary + 1];
1141 v->patch = var->data.patch;
1142 v->sn = name;
1143 v->si = index + 1;
1144 v->mask |= (comps & 0xf0) >> 4;
1145 info_out->numOutputs =
1146 std::max<uint8_t>(info_out->numOutputs, vary + 2);
1147 }
1148 } else {
1149 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1150 nv50_ir_varying *v = &info_out->out[vary];
1151 v->patch = var->data.patch;
1152 v->sn = name;
1153 v->si = index + i;
1154 v->mask |= getMaskForType(type, i) << var->data.location_frac;
1155
1156 if (nir->info.outputs_read & 1ull << slot)
1157 v->oread = 1;
1158 }
1159 info_out->numOutputs = std::max<uint8_t>(info_out->numOutputs, vary);
1160 }
1161 }
1162
1163 return info->assignSlots(info_out) == 0;
1164 }
1165
1166 uint32_t
getSlotAddress(nir_intrinsic_instr * insn,uint8_t idx,uint8_t slot)1167 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1168 {
1169 DataType ty;
1170 int offset = nir_intrinsic_component(insn);
1171 bool input;
1172
1173 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1174 ty = getDType(insn);
1175 else
1176 ty = getSType(insn->src[0], false, false);
1177
1178 switch (insn->intrinsic) {
1179 case nir_intrinsic_load_input:
1180 case nir_intrinsic_load_interpolated_input:
1181 case nir_intrinsic_load_per_vertex_input:
1182 input = true;
1183 break;
1184 case nir_intrinsic_load_output:
1185 case nir_intrinsic_load_per_vertex_output:
1186 case nir_intrinsic_store_output:
1187 case nir_intrinsic_store_per_vertex_output:
1188 input = false;
1189 break;
1190 default:
1191 ERROR("unknown intrinsic in getSlotAddress %s",
1192 nir_intrinsic_infos[insn->intrinsic].name);
1193 input = false;
1194 assert(false);
1195 break;
1196 }
1197
1198 if (typeSizeof(ty) == 8) {
1199 slot *= 2;
1200 slot += offset;
1201 if (slot >= 4) {
1202 idx += 1;
1203 slot -= 4;
1204 }
1205 } else {
1206 slot += offset;
1207 }
1208
1209 assert(slot < 4);
1210 assert(!input || idx < NV50_CODEGEN_MAX_VARYINGS);
1211 assert(input || idx < NV50_CODEGEN_MAX_VARYINGS);
1212
1213 const nv50_ir_varying *vary = input ? info_out->in : info_out->out;
1214 return vary[idx].slot[slot] * 4;
1215 }
1216
1217 Instruction *
loadFrom(DataFile file,uint8_t i,DataType ty,Value * def,uint32_t base,uint8_t c,Value * indirect0,Value * indirect1,bool patch,CacheMode cache)1218 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1219 uint32_t base, uint8_t c, Value *indirect0,
1220 Value *indirect1, bool patch, CacheMode cache)
1221 {
1222 unsigned int tySize = typeSizeof(ty);
1223
1224 if (tySize == 8 &&
1225 (indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) {
1226 Value *lo = getSSA();
1227 Value *hi = getSSA();
1228
1229 Instruction *loi =
1230 mkLoad(TYPE_U32, lo,
1231 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1232 indirect0);
1233 loi->setIndirect(0, 1, indirect1);
1234 loi->perPatch = patch;
1235 loi->cache = cache;
1236
1237 Instruction *hii =
1238 mkLoad(TYPE_U32, hi,
1239 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1240 indirect0);
1241 hii->setIndirect(0, 1, indirect1);
1242 hii->perPatch = patch;
1243 hii->cache = cache;
1244
1245 return mkOp2(OP_MERGE, ty, def, lo, hi);
1246 } else {
1247 Instruction *ld =
1248 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1249 ld->setIndirect(0, 1, indirect1);
1250 ld->perPatch = patch;
1251 ld->cache = cache;
1252 return ld;
1253 }
1254 }
1255
1256 void
storeTo(nir_intrinsic_instr * insn,DataFile file,operation op,DataType ty,Value * src,uint8_t idx,uint8_t c,Value * indirect0,Value * indirect1)1257 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1258 DataType ty, Value *src, uint8_t idx, uint8_t c,
1259 Value *indirect0, Value *indirect1)
1260 {
1261 uint8_t size = typeSizeof(ty);
1262 uint32_t address = getSlotAddress(insn, idx, c);
1263
1264 if (size == 8 && indirect0) {
1265 Value *split[2];
1266 mkSplit(split, 4, src);
1267
1268 if (op == OP_EXPORT) {
1269 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1270 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1271 }
1272
1273 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1274 split[0])->perPatch = info_out->out[idx].patch;
1275 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1276 split[1])->perPatch = info_out->out[idx].patch;
1277 } else {
1278 if (op == OP_EXPORT)
1279 src = mkMov(getSSA(size), src, ty)->getDef(0);
1280 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1281 src)->perPatch = info_out->out[idx].patch;
1282 }
1283 }
1284
1285 bool
parseNIR()1286 Converter::parseNIR()
1287 {
1288 info_out->bin.tlsSpace = nir->scratch_size;
1289 info_out->io.clipDistances = nir->info.clip_distance_array_size;
1290 info_out->io.cullDistances = nir->info.cull_distance_array_size;
1291 info_out->io.layer_viewport_relative = nir->info.layer_viewport_relative;
1292
1293 switch(prog->getType()) {
1294 case Program::TYPE_COMPUTE:
1295 info->prop.cp.numThreads[0] = nir->info.workgroup_size[0];
1296 info->prop.cp.numThreads[1] = nir->info.workgroup_size[1];
1297 info->prop.cp.numThreads[2] = nir->info.workgroup_size[2];
1298 info_out->bin.smemSize = std::max(info_out->bin.smemSize, nir->info.shared_size);
1299
1300 if (info->target < NVISA_GF100_CHIPSET) {
1301 int gmemSlot = 0;
1302
1303 for (unsigned i = 0; i < nir->info.num_ssbos; i++) {
1304 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 0, .slot = i};
1305 assert(gmemSlot < 16);
1306 }
1307 nir_foreach_image_variable(var, nir) {
1308 int image_count = glsl_type_get_image_count(var->type);
1309 for (int i = 0; i < image_count; i++) {
1310 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = var->data.binding + i};
1311 assert(gmemSlot < 16);
1312 }
1313 }
1314 }
1315
1316 break;
1317 case Program::TYPE_FRAGMENT:
1318 info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1319 prog->persampleInvocation =
1320 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
1321 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1322 info_out->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1323 info_out->prop.fp.readsSampleLocations =
1324 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1325 info_out->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote;
1326 info_out->prop.fp.usesSampleMaskIn =
1327 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
1328 break;
1329 case Program::TYPE_GEOMETRY:
1330 info_out->prop.gp.instanceCount = nir->info.gs.invocations;
1331 info_out->prop.gp.maxVertices = nir->info.gs.vertices_out;
1332 info_out->prop.gp.outputPrim = nir->info.gs.output_primitive;
1333 break;
1334 case Program::TYPE_TESSELLATION_CONTROL:
1335 case Program::TYPE_TESSELLATION_EVAL:
1336 info_out->prop.tp.domain = u_tess_prim_from_shader(nir->info.tess._primitive_mode);
1337 info_out->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1338 info_out->prop.tp.outputPrim =
1339 nir->info.tess.point_mode ? MESA_PRIM_POINTS : MESA_PRIM_TRIANGLES;
1340 info_out->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1341 info_out->prop.tp.winding = !nir->info.tess.ccw;
1342 break;
1343 case Program::TYPE_VERTEX:
1344 info_out->prop.vp.usesDrawParameters =
1345 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX) ||
1346 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||
1347 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1348 break;
1349 default:
1350 break;
1351 }
1352
1353 return true;
1354 }
1355
1356 bool
visit(nir_function * function)1357 Converter::visit(nir_function *function)
1358 {
1359 assert(function->impl);
1360
1361 // usually the blocks will set everything up, but main is special
1362 BasicBlock *entry = new BasicBlock(prog->main);
1363 exit = new BasicBlock(prog->main);
1364 blocks[nir_start_block(function->impl)->index] = entry;
1365 prog->main->setEntry(entry);
1366 prog->main->setExit(exit);
1367
1368 setPosition(entry, true);
1369
1370 switch (prog->getType()) {
1371 case Program::TYPE_TESSELLATION_CONTROL:
1372 outBase = mkOp2v(
1373 OP_SUB, TYPE_U32, getSSA(),
1374 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1375 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1376 break;
1377 case Program::TYPE_FRAGMENT: {
1378 Symbol *sv = mkSysVal(SV_POSITION, 3);
1379 Value *temp = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1380 fp.position = mkOp1v(OP_RCP, TYPE_F32, temp, temp);
1381 break;
1382 }
1383 default:
1384 break;
1385 }
1386
1387 nir_index_ssa_defs(function->impl);
1388 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1389 if (!visit(node))
1390 return false;
1391 }
1392
1393 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1394 setPosition(exit, true);
1395
1396 // TODO: for non main function this needs to be a OP_RETURN
1397 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1398 return true;
1399 }
1400
1401 bool
visit(nir_cf_node * node)1402 Converter::visit(nir_cf_node *node)
1403 {
1404 switch (node->type) {
1405 case nir_cf_node_block:
1406 return visit(nir_cf_node_as_block(node));
1407 case nir_cf_node_if:
1408 return visit(nir_cf_node_as_if(node));
1409 case nir_cf_node_loop:
1410 return visit(nir_cf_node_as_loop(node));
1411 default:
1412 ERROR("unknown nir_cf_node type %u\n", node->type);
1413 return false;
1414 }
1415 }
1416
1417 bool
visit(nir_block * block)1418 Converter::visit(nir_block *block)
1419 {
1420 if (!block->predecessors->entries && block->instr_list.is_empty())
1421 return true;
1422
1423 BasicBlock *bb = convert(block);
1424
1425 setPosition(bb, true);
1426 nir_foreach_instr(insn, block) {
1427 if (!visit(insn))
1428 return false;
1429 }
1430 return true;
1431 }
1432
1433 bool
visit(nir_if * nif)1434 Converter::visit(nir_if *nif)
1435 {
1436 curIfDepth++;
1437
1438 DataType sType = getSType(nif->condition, false, false);
1439 Value *src = getSrc(&nif->condition, 0);
1440
1441 nir_block *lastThen = nir_if_last_then_block(nif);
1442 nir_block *lastElse = nir_if_last_else_block(nif);
1443
1444 BasicBlock *headBB = bb;
1445 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1446 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1447
1448 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1449 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1450
1451 bool insertJoins = lastThen->successors[0] == lastElse->successors[0];
1452 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1453
1454 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1455 if (!visit(node))
1456 return false;
1457 }
1458
1459 setPosition(convert(lastThen), true);
1460 if (!bb->isTerminated()) {
1461 BasicBlock *tailBB = convert(lastThen->successors[0]);
1462 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1463 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1464 } else {
1465 insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1466 }
1467
1468 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1469 if (!visit(node))
1470 return false;
1471 }
1472
1473 setPosition(convert(lastElse), true);
1474 if (!bb->isTerminated()) {
1475 BasicBlock *tailBB = convert(lastElse->successors[0]);
1476 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1477 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1478 } else {
1479 insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1480 }
1481
1482 if (curIfDepth > 6) {
1483 insertJoins = false;
1484 }
1485
1486 /* we made sure that all threads would converge at the same block */
1487 if (insertJoins) {
1488 BasicBlock *conv = convert(lastThen->successors[0]);
1489 setPosition(headBB->getExit(), false);
1490 headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL);
1491 setPosition(conv, false);
1492 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1493 }
1494
1495 curIfDepth--;
1496
1497 return true;
1498 }
1499
1500 // TODO: add convergency
1501 bool
visit(nir_loop * loop)1502 Converter::visit(nir_loop *loop)
1503 {
1504 assert(!nir_loop_has_continue_construct(loop));
1505 curLoopDepth += 1;
1506 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1507
1508 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1509 BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1510
1511 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1512
1513 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1514 setPosition(loopBB, false);
1515 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1516
1517 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1518 if (!visit(node))
1519 return false;
1520 }
1521
1522 if (!bb->isTerminated()) {
1523 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1524 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1525 }
1526
1527 if (tailBB->cfg.incidentCount() == 0)
1528 loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1529
1530 curLoopDepth -= 1;
1531
1532 info_out->loops++;
1533
1534 return true;
1535 }
1536
1537 bool
visit(nir_instr * insn)1538 Converter::visit(nir_instr *insn)
1539 {
1540 // we need an insertion point for on the fly generated immediate loads
1541 immInsertPos = bb->getExit();
1542 switch (insn->type) {
1543 case nir_instr_type_alu:
1544 return visit(nir_instr_as_alu(insn));
1545 case nir_instr_type_intrinsic:
1546 return visit(nir_instr_as_intrinsic(insn));
1547 case nir_instr_type_jump:
1548 return visit(nir_instr_as_jump(insn));
1549 case nir_instr_type_load_const:
1550 return visit(nir_instr_as_load_const(insn));
1551 case nir_instr_type_undef:
1552 return visit(nir_instr_as_undef(insn));
1553 case nir_instr_type_tex:
1554 return visit(nir_instr_as_tex(insn));
1555 default:
1556 ERROR("unknown nir_instr type %u\n", insn->type);
1557 return false;
1558 }
1559 return true;
1560 }
1561
1562 SVSemantic
convert(nir_intrinsic_op intr)1563 Converter::convert(nir_intrinsic_op intr)
1564 {
1565 switch (intr) {
1566 case nir_intrinsic_load_base_vertex:
1567 return SV_BASEVERTEX;
1568 case nir_intrinsic_load_base_instance:
1569 return SV_BASEINSTANCE;
1570 case nir_intrinsic_load_draw_id:
1571 return SV_DRAWID;
1572 case nir_intrinsic_load_front_face:
1573 return SV_FACE;
1574 case nir_intrinsic_is_helper_invocation:
1575 case nir_intrinsic_load_helper_invocation:
1576 return SV_THREAD_KILL;
1577 case nir_intrinsic_load_instance_id:
1578 return SV_INSTANCE_ID;
1579 case nir_intrinsic_load_invocation_id:
1580 return SV_INVOCATION_ID;
1581 case nir_intrinsic_load_workgroup_size:
1582 return SV_NTID;
1583 case nir_intrinsic_load_local_invocation_id:
1584 return SV_TID;
1585 case nir_intrinsic_load_num_workgroups:
1586 return SV_NCTAID;
1587 case nir_intrinsic_load_patch_vertices_in:
1588 return SV_VERTEX_COUNT;
1589 case nir_intrinsic_load_primitive_id:
1590 return SV_PRIMITIVE_ID;
1591 case nir_intrinsic_load_sample_id:
1592 return SV_SAMPLE_INDEX;
1593 case nir_intrinsic_load_sample_mask_in:
1594 return SV_SAMPLE_MASK;
1595 case nir_intrinsic_load_sample_pos:
1596 return SV_SAMPLE_POS;
1597 case nir_intrinsic_load_subgroup_eq_mask:
1598 return SV_LANEMASK_EQ;
1599 case nir_intrinsic_load_subgroup_ge_mask:
1600 return SV_LANEMASK_GE;
1601 case nir_intrinsic_load_subgroup_gt_mask:
1602 return SV_LANEMASK_GT;
1603 case nir_intrinsic_load_subgroup_le_mask:
1604 return SV_LANEMASK_LE;
1605 case nir_intrinsic_load_subgroup_lt_mask:
1606 return SV_LANEMASK_LT;
1607 case nir_intrinsic_load_subgroup_invocation:
1608 return SV_LANEID;
1609 case nir_intrinsic_load_tess_coord:
1610 return SV_TESS_COORD;
1611 case nir_intrinsic_load_tess_level_inner:
1612 return SV_TESS_INNER;
1613 case nir_intrinsic_load_tess_level_outer:
1614 return SV_TESS_OUTER;
1615 case nir_intrinsic_load_vertex_id:
1616 return SV_VERTEX_ID;
1617 case nir_intrinsic_load_workgroup_id:
1618 case nir_intrinsic_load_workgroup_id_zero_base:
1619 return SV_CTAID;
1620 case nir_intrinsic_load_work_dim:
1621 return SV_WORK_DIM;
1622 default:
1623 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1624 nir_intrinsic_infos[intr].name);
1625 assert(false);
1626 return SV_LAST;
1627 }
1628 }
1629
1630 bool
visit(nir_intrinsic_instr * insn)1631 Converter::visit(nir_intrinsic_instr *insn)
1632 {
1633 nir_intrinsic_op op = insn->intrinsic;
1634 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1635 unsigned dest_components = nir_intrinsic_dest_components(insn);
1636
1637 switch (op) {
1638 case nir_intrinsic_decl_reg: {
1639 const unsigned reg_index = insn->def.index;
1640 const unsigned bit_size = nir_intrinsic_bit_size(insn);
1641 const unsigned num_components = nir_intrinsic_num_components(insn);
1642 assert(nir_intrinsic_num_array_elems(insn) == 0);
1643
1644 LValues newDef(num_components);
1645 for (uint8_t c = 0; c < num_components; c++)
1646 newDef[c] = getScratch(std::max(4u, bit_size / 8));
1647
1648 assert(regDefs.find(reg_index) == regDefs.end());
1649 regDefs[reg_index] = newDef;
1650 break;
1651 }
1652
1653 case nir_intrinsic_load_reg: {
1654 const unsigned reg_index = insn->src[0].ssa->index;
1655 NirDefMap::iterator it = regDefs.find(reg_index);
1656 assert(it != regDefs.end());
1657 LValues &src = it->second;
1658
1659 DataType dType = getDType(insn);
1660 LValues &newDefs = convert(&insn->def);
1661 for (uint8_t c = 0; c < insn->num_components; c++)
1662 mkMov(newDefs[c], src[c], dType);
1663 break;
1664 }
1665
1666 case nir_intrinsic_store_reg: {
1667 const unsigned reg_index = insn->src[1].ssa->index;
1668 NirDefMap::iterator it = regDefs.find(reg_index);
1669 assert(it != regDefs.end());
1670 LValues &dst = it->second;
1671
1672 DataType dType = Converter::getSType(insn->src[0], false, false);
1673
1674 const nir_component_mask_t write_mask = nir_intrinsic_write_mask(insn);
1675 for (uint8_t c = 0u; c < insn->num_components; c++) {
1676 if (!((1u << c) & write_mask))
1677 continue;
1678
1679 Value *src = getSrc(&insn->src[0], c);
1680 mkMov(dst[c], src, dType);
1681 }
1682 break;
1683 }
1684
1685 case nir_intrinsic_store_output:
1686 case nir_intrinsic_store_per_vertex_output: {
1687 Value *indirect;
1688 DataType dType = getSType(insn->src[0], false, false);
1689 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1690
1691 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1692 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1693 continue;
1694
1695 uint8_t offset = 0;
1696 Value *src = getSrc(&insn->src[0], i);
1697 switch (prog->getType()) {
1698 case Program::TYPE_FRAGMENT: {
1699 if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1700 // TGSI uses a different interface than NIR, TGSI stores that
1701 // value in the z component, NIR in X
1702 offset += 2;
1703 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1704 }
1705 break;
1706 }
1707 default:
1708 break;
1709 }
1710
1711 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1712 }
1713 break;
1714 }
1715 case nir_intrinsic_load_input:
1716 case nir_intrinsic_load_interpolated_input:
1717 case nir_intrinsic_load_output: {
1718 LValues &newDefs = convert(&insn->def);
1719
1720 // FBFetch
1721 if (prog->getType() == Program::TYPE_FRAGMENT &&
1722 op == nir_intrinsic_load_output) {
1723 std::vector<Value*> defs, srcs;
1724 uint8_t mask = 0;
1725
1726 srcs.push_back(getSSA());
1727 srcs.push_back(getSSA());
1728 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1729 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1730 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1731 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1732
1733 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1734 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1735
1736 for (uint8_t i = 0u; i < dest_components; ++i) {
1737 defs.push_back(newDefs[i]);
1738 mask |= 1 << i;
1739 }
1740
1741 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1742 texi->tex.levelZero = true;
1743 texi->tex.mask = mask;
1744 texi->tex.useOffsets = 0;
1745 texi->tex.r = 0xffff;
1746 texi->tex.s = 0xffff;
1747
1748 info_out->prop.fp.readsFramebuffer = true;
1749 break;
1750 }
1751
1752 const DataType dType = getDType(insn);
1753 Value *indirect;
1754 bool input = op != nir_intrinsic_load_output;
1755 operation nvirOp = OP_LAST;
1756 uint32_t mode = 0;
1757
1758 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1759 nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx];
1760
1761 // see load_barycentric_* handling
1762 if (prog->getType() == Program::TYPE_FRAGMENT) {
1763 if (op == nir_intrinsic_load_interpolated_input) {
1764 ImmediateValue immMode;
1765 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1766 mode = immMode.reg.data.u32;
1767 }
1768 if (mode == NV50_IR_INTERP_DEFAULT)
1769 mode |= translateInterpMode(&vary, nvirOp);
1770 else {
1771 if (vary.linear) {
1772 nvirOp = OP_LINTERP;
1773 mode |= NV50_IR_INTERP_LINEAR;
1774 } else {
1775 nvirOp = OP_PINTERP;
1776 mode |= NV50_IR_INTERP_PERSPECTIVE;
1777 }
1778 }
1779 }
1780
1781 for (uint8_t i = 0u; i < dest_components; ++i) {
1782 uint32_t address = getSlotAddress(insn, idx, i);
1783 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1784 if (prog->getType() == Program::TYPE_FRAGMENT) {
1785 int s = 1;
1786 if (typeSizeof(dType) == 8) {
1787 Value *lo = getSSA();
1788 Value *hi = getSSA();
1789 Instruction *interp;
1790
1791 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1792 if (nvirOp == OP_PINTERP)
1793 interp->setSrc(s++, fp.position);
1794 if (mode & NV50_IR_INTERP_OFFSET)
1795 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1796 interp->setInterpolate(mode);
1797 interp->setIndirect(0, 0, indirect);
1798
1799 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1800 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1801 if (nvirOp == OP_PINTERP)
1802 interp->setSrc(s++, fp.position);
1803 if (mode & NV50_IR_INTERP_OFFSET)
1804 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1805 interp->setInterpolate(mode);
1806 interp->setIndirect(0, 0, indirect);
1807
1808 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1809 } else {
1810 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1811 if (nvirOp == OP_PINTERP)
1812 interp->setSrc(s++, fp.position);
1813 if (mode & NV50_IR_INTERP_OFFSET)
1814 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1815 interp->setInterpolate(mode);
1816 interp->setIndirect(0, 0, indirect);
1817 }
1818 } else {
1819 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1820 }
1821 }
1822 break;
1823 }
1824 case nir_intrinsic_load_barycentric_at_offset:
1825 case nir_intrinsic_load_barycentric_at_sample:
1826 case nir_intrinsic_load_barycentric_centroid:
1827 case nir_intrinsic_load_barycentric_pixel:
1828 case nir_intrinsic_load_barycentric_sample: {
1829 LValues &newDefs = convert(&insn->def);
1830 uint32_t mode;
1831
1832 if (op == nir_intrinsic_load_barycentric_centroid ||
1833 op == nir_intrinsic_load_barycentric_sample) {
1834 mode = NV50_IR_INTERP_CENTROID;
1835 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1836 Value *offs[2];
1837 for (uint8_t c = 0; c < 2; c++) {
1838 offs[c] = getScratch();
1839 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1840 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1841 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1842 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1843 }
1844 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1845
1846 mode = NV50_IR_INTERP_OFFSET;
1847 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1848 mode = NV50_IR_INTERP_DEFAULT;
1849 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1850 info_out->prop.fp.readsSampleLocations = true;
1851 Value *sample = getSSA();
1852 mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), getSrc(&insn->src[0], 0), mkImm(0))
1853 ->subOp = 2;
1854 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], sample)->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1855 mode = NV50_IR_INTERP_OFFSET;
1856 } else {
1857 unreachable("all intrinsics already handled above");
1858 }
1859
1860 loadImm(newDefs[1], mode);
1861 break;
1862 }
1863 case nir_intrinsic_demote:
1864 case nir_intrinsic_discard:
1865 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1866 break;
1867 case nir_intrinsic_demote_if:
1868 case nir_intrinsic_discard_if: {
1869 Value *pred = getSSA(1, FILE_PREDICATE);
1870 if (insn->num_components > 1) {
1871 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1872 assert(false);
1873 return false;
1874 }
1875 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1876 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1877 break;
1878 }
1879 case nir_intrinsic_load_base_vertex:
1880 case nir_intrinsic_load_base_instance:
1881 case nir_intrinsic_load_draw_id:
1882 case nir_intrinsic_load_front_face:
1883 case nir_intrinsic_is_helper_invocation:
1884 case nir_intrinsic_load_helper_invocation:
1885 case nir_intrinsic_load_instance_id:
1886 case nir_intrinsic_load_invocation_id:
1887 case nir_intrinsic_load_workgroup_size:
1888 case nir_intrinsic_load_local_invocation_id:
1889 case nir_intrinsic_load_num_workgroups:
1890 case nir_intrinsic_load_patch_vertices_in:
1891 case nir_intrinsic_load_primitive_id:
1892 case nir_intrinsic_load_sample_id:
1893 case nir_intrinsic_load_sample_mask_in:
1894 case nir_intrinsic_load_sample_pos:
1895 case nir_intrinsic_load_subgroup_eq_mask:
1896 case nir_intrinsic_load_subgroup_ge_mask:
1897 case nir_intrinsic_load_subgroup_gt_mask:
1898 case nir_intrinsic_load_subgroup_le_mask:
1899 case nir_intrinsic_load_subgroup_lt_mask:
1900 case nir_intrinsic_load_subgroup_invocation:
1901 case nir_intrinsic_load_tess_coord:
1902 case nir_intrinsic_load_tess_level_inner:
1903 case nir_intrinsic_load_tess_level_outer:
1904 case nir_intrinsic_load_vertex_id:
1905 case nir_intrinsic_load_workgroup_id:
1906 case nir_intrinsic_load_workgroup_id_zero_base:
1907 case nir_intrinsic_load_work_dim: {
1908 const DataType dType = getDType(insn);
1909 SVSemantic sv = convert(op);
1910 LValues &newDefs = convert(&insn->def);
1911
1912 for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
1913 Value *def;
1914 if (typeSizeof(dType) == 8)
1915 def = getSSA();
1916 else
1917 def = newDefs[i];
1918
1919 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1920 loadImm(def, 0u);
1921 } else {
1922 Symbol *sym = mkSysVal(sv, i);
1923 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1924 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1925 rdsv->perPatch = 1;
1926 }
1927
1928 if (typeSizeof(dType) == 8)
1929 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1930 }
1931 break;
1932 }
1933 // constants
1934 case nir_intrinsic_load_subgroup_size: {
1935 LValues &newDefs = convert(&insn->def);
1936 loadImm(newDefs[0], 32u);
1937 break;
1938 }
1939 case nir_intrinsic_vote_all:
1940 case nir_intrinsic_vote_any:
1941 case nir_intrinsic_vote_ieq: {
1942 LValues &newDefs = convert(&insn->def);
1943 Value *pred = getScratch(1, FILE_PREDICATE);
1944 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1945 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1946 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1947 break;
1948 }
1949 case nir_intrinsic_ballot: {
1950 LValues &newDefs = convert(&insn->def);
1951 Value *pred = getSSA(1, FILE_PREDICATE);
1952 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1953 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1954 break;
1955 }
1956 case nir_intrinsic_read_first_invocation:
1957 case nir_intrinsic_read_invocation: {
1958 LValues &newDefs = convert(&insn->def);
1959 const DataType dType = getDType(insn);
1960 Value *tmp = getScratch();
1961
1962 if (op == nir_intrinsic_read_first_invocation) {
1963 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1964 mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
1965 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1966 } else
1967 tmp = getSrc(&insn->src[1], 0);
1968
1969 for (uint8_t i = 0; i < dest_components; ++i) {
1970 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1971 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1972 }
1973 break;
1974 }
1975 case nir_intrinsic_load_per_vertex_input: {
1976 const DataType dType = getDType(insn);
1977 LValues &newDefs = convert(&insn->def);
1978 Value *indirectVertex;
1979 Value *indirectOffset;
1980 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1981 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1982
1983 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1984 mkImm(baseVertex), indirectVertex);
1985 for (uint8_t i = 0u; i < dest_components; ++i) {
1986 uint32_t address = getSlotAddress(insn, idx, i);
1987 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
1988 indirectOffset, vtxBase, info_out->in[idx].patch);
1989 }
1990 break;
1991 }
1992 case nir_intrinsic_load_per_vertex_output: {
1993 const DataType dType = getDType(insn);
1994 LValues &newDefs = convert(&insn->def);
1995 Value *indirectVertex;
1996 Value *indirectOffset;
1997 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1998 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1999 Value *vtxBase = NULL;
2000
2001 if (indirectVertex)
2002 vtxBase = indirectVertex;
2003 else
2004 vtxBase = loadImm(NULL, baseVertex);
2005
2006 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2007
2008 for (uint8_t i = 0u; i < dest_components; ++i) {
2009 uint32_t address = getSlotAddress(insn, idx, i);
2010 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2011 indirectOffset, vtxBase, info_out->in[idx].patch);
2012 }
2013 break;
2014 }
2015 case nir_intrinsic_emit_vertex: {
2016 uint32_t idx = nir_intrinsic_stream_id(insn);
2017 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2018 break;
2019 }
2020 case nir_intrinsic_end_primitive: {
2021 uint32_t idx = nir_intrinsic_stream_id(insn);
2022 if (idx)
2023 break;
2024 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2025 break;
2026 }
2027 case nir_intrinsic_load_ubo: {
2028 const DataType dType = getDType(insn);
2029 LValues &newDefs = convert(&insn->def);
2030 Value *indirectIndex;
2031 Value *indirectOffset;
2032 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex);
2033 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2034 if (indirectOffset)
2035 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2036
2037 for (uint8_t i = 0u; i < dest_components; ++i) {
2038 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2039 indirectOffset, indirectIndex);
2040 }
2041 break;
2042 }
2043 case nir_intrinsic_get_ssbo_size: {
2044 LValues &newDefs = convert(&insn->def);
2045 const DataType dType = getDType(insn);
2046 Value *indirectBuffer;
2047 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2048
2049 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2050 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2051 break;
2052 }
2053 case nir_intrinsic_store_ssbo: {
2054 DataType sType = getSType(insn->src[0], false, false);
2055 Value *indirectBuffer;
2056 Value *indirectOffset;
2057 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2058 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2059
2060 CacheMode cache = convert(nir_intrinsic_access(insn));
2061
2062 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2063 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2064 continue;
2065 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2066 offset + i * typeSizeof(sType));
2067 Instruction *st = mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2068 st->setIndirect(0, 1, indirectBuffer);
2069 st->cache = cache;
2070 }
2071 info_out->io.globalAccess |= 0x2;
2072 break;
2073 }
2074 case nir_intrinsic_load_ssbo: {
2075 const DataType dType = getDType(insn);
2076 LValues &newDefs = convert(&insn->def);
2077 Value *indirectBuffer;
2078 Value *indirectOffset;
2079 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2080 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2081
2082 CacheMode cache = convert(nir_intrinsic_access(insn));
2083
2084 for (uint8_t i = 0u; i < dest_components; ++i)
2085 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2086 indirectOffset, indirectBuffer, false, cache);
2087
2088 info_out->io.globalAccess |= 0x1;
2089 break;
2090 }
2091 case nir_intrinsic_shared_atomic:
2092 case nir_intrinsic_shared_atomic_swap: {
2093 const DataType dType = getDType(insn);
2094 LValues &newDefs = convert(&insn->def);
2095 Value *indirectOffset;
2096 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2097 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2098 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2099 if (op == nir_intrinsic_shared_atomic_swap)
2100 atom->setSrc(2, getSrc(&insn->src[2], 0));
2101 atom->setIndirect(0, 0, indirectOffset);
2102 atom->subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2103 break;
2104 }
2105 case nir_intrinsic_ssbo_atomic:
2106 case nir_intrinsic_ssbo_atomic_swap: {
2107 const DataType dType = getDType(insn);
2108 LValues &newDefs = convert(&insn->def);
2109 Value *indirectBuffer;
2110 Value *indirectOffset;
2111 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2112 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2113
2114 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2115 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2116 getSrc(&insn->src[2], 0));
2117 if (op == nir_intrinsic_ssbo_atomic_swap)
2118 atom->setSrc(2, getSrc(&insn->src[3], 0));
2119 atom->setIndirect(0, 0, indirectOffset);
2120 atom->setIndirect(0, 1, indirectBuffer);
2121 atom->subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2122
2123 info_out->io.globalAccess |= 0x2;
2124 break;
2125 }
2126 case nir_intrinsic_global_atomic:
2127 case nir_intrinsic_global_atomic_swap: {
2128 const DataType dType = getDType(insn);
2129 LValues &newDefs = convert(&insn->def);
2130 Value *address;
2131 uint32_t offset = getIndirect(&insn->src[0], 0, address);
2132
2133 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2134 Instruction *atom =
2135 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2136 if (op == nir_intrinsic_global_atomic_swap)
2137 atom->setSrc(2, getSrc(&insn->src[2], 0));
2138 atom->setIndirect(0, 0, address);
2139 atom->subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2140
2141 info_out->io.globalAccess |= 0x2;
2142 break;
2143 }
2144 case nir_intrinsic_bindless_image_atomic:
2145 case nir_intrinsic_bindless_image_atomic_swap:
2146 case nir_intrinsic_bindless_image_load:
2147 case nir_intrinsic_bindless_image_samples:
2148 case nir_intrinsic_bindless_image_size:
2149 case nir_intrinsic_bindless_image_store:
2150 case nir_intrinsic_image_atomic:
2151 case nir_intrinsic_image_atomic_swap:
2152 case nir_intrinsic_image_load:
2153 case nir_intrinsic_image_samples:
2154 case nir_intrinsic_image_size:
2155 case nir_intrinsic_image_store: {
2156 std::vector<Value*> srcs, defs;
2157 Value *indirect;
2158 DataType ty;
2159 int subOp = 0;
2160
2161 uint32_t mask = 0;
2162 TexInstruction::Target target =
2163 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2164 unsigned int argCount = getNIRArgCount(target);
2165 uint16_t location = 0;
2166
2167 if (opInfo.has_dest) {
2168 LValues &newDefs = convert(&insn->def);
2169 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2170 defs.push_back(newDefs[i]);
2171 mask |= 1 << i;
2172 }
2173 }
2174
2175 int lod_src = -1;
2176 bool bindless = false;
2177 switch (op) {
2178 case nir_intrinsic_bindless_image_atomic:
2179 case nir_intrinsic_bindless_image_atomic_swap:
2180 ty = getDType(insn);
2181 bindless = true;
2182 info_out->io.globalAccess |= 0x2;
2183 mask = 0x1;
2184 subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2185 break;
2186 case nir_intrinsic_image_atomic:
2187 case nir_intrinsic_image_atomic_swap:
2188 ty = getDType(insn);
2189 bindless = false;
2190 info_out->io.globalAccess |= 0x2;
2191 mask = 0x1;
2192 subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2193 break;
2194 case nir_intrinsic_bindless_image_load:
2195 case nir_intrinsic_image_load:
2196 ty = TYPE_U32;
2197 bindless = op == nir_intrinsic_bindless_image_load;
2198 info_out->io.globalAccess |= 0x1;
2199 lod_src = 4;
2200 break;
2201 case nir_intrinsic_bindless_image_store:
2202 case nir_intrinsic_image_store:
2203 ty = TYPE_U32;
2204 bindless = op == nir_intrinsic_bindless_image_store;
2205 info_out->io.globalAccess |= 0x2;
2206 lod_src = 5;
2207 mask = 0xf;
2208 break;
2209 case nir_intrinsic_bindless_image_samples:
2210 mask = 0x8;
2211 FALLTHROUGH;
2212 case nir_intrinsic_image_samples:
2213 argCount = 0; /* No coordinates */
2214 ty = TYPE_U32;
2215 bindless = op == nir_intrinsic_bindless_image_samples;
2216 mask = 0x8;
2217 break;
2218 case nir_intrinsic_bindless_image_size:
2219 case nir_intrinsic_image_size:
2220 assert(nir_src_as_uint(insn->src[1]) == 0);
2221 argCount = 0; /* No coordinates */
2222 ty = TYPE_U32;
2223 bindless = op == nir_intrinsic_bindless_image_size;
2224 break;
2225 default:
2226 unreachable("unhandled image opcode");
2227 break;
2228 }
2229
2230 if (bindless)
2231 indirect = getSrc(&insn->src[0], 0);
2232 else
2233 location = getIndirect(&insn->src[0], 0, indirect);
2234
2235 /* Pre-GF100, SSBOs and images are in the same HW file, managed by
2236 * prop.cp.gmem. images are located after SSBOs.
2237 */
2238 if (info->target < NVISA_GF100_CHIPSET)
2239 location += nir->info.num_ssbos;
2240
2241 // coords
2242 if (opInfo.num_srcs >= 2)
2243 for (unsigned int i = 0u; i < argCount; ++i)
2244 srcs.push_back(getSrc(&insn->src[1], i));
2245
2246 // the sampler is just another src added after coords
2247 if (opInfo.num_srcs >= 3 && target.isMS())
2248 srcs.push_back(getSrc(&insn->src[2], 0));
2249
2250 if (opInfo.num_srcs >= 4 && lod_src != 4) {
2251 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2252 for (uint8_t i = 0u; i < components; ++i)
2253 srcs.push_back(getSrc(&insn->src[3], i));
2254 }
2255
2256 if (opInfo.num_srcs >= 5 && lod_src != 5)
2257 // 1 for aotmic swap
2258 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2259 srcs.push_back(getSrc(&insn->src[4], i));
2260
2261 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2262 texi->tex.bindless = bindless;
2263 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2264 texi->tex.mask = mask;
2265 texi->cache = convert(nir_intrinsic_access(insn));
2266 texi->setType(ty);
2267 texi->subOp = subOp;
2268
2269 if (indirect)
2270 texi->setIndirectR(indirect);
2271
2272 break;
2273 }
2274 case nir_intrinsic_store_scratch:
2275 case nir_intrinsic_store_shared: {
2276 DataType sType = getSType(insn->src[0], false, false);
2277 Value *indirectOffset;
2278 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2279 if (indirectOffset)
2280 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2281
2282 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2283 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2284 continue;
2285 Symbol *sym = mkSymbol(getFile(op), 0, sType, offset + i * typeSizeof(sType));
2286 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2287 }
2288 break;
2289 }
2290 case nir_intrinsic_load_kernel_input:
2291 case nir_intrinsic_load_scratch:
2292 case nir_intrinsic_load_shared: {
2293 const DataType dType = getDType(insn);
2294 LValues &newDefs = convert(&insn->def);
2295 Value *indirectOffset;
2296 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2297 if (indirectOffset)
2298 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2299
2300 for (uint8_t i = 0u; i < dest_components; ++i)
2301 loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset);
2302
2303 break;
2304 }
2305 case nir_intrinsic_barrier: {
2306 mesa_scope exec_scope = nir_intrinsic_execution_scope(insn);
2307 mesa_scope mem_scope = nir_intrinsic_memory_scope(insn);
2308 nir_variable_mode modes = nir_intrinsic_memory_modes(insn);
2309 nir_variable_mode valid_modes =
2310 nir_var_mem_global | nir_var_image | nir_var_mem_ssbo | nir_var_mem_shared;
2311
2312 if (mem_scope != SCOPE_NONE && (modes & valid_modes)) {
2313
2314 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2315 bar->fixed = 1;
2316
2317 if (mem_scope >= SCOPE_QUEUE_FAMILY)
2318 bar->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
2319 else
2320 bar->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
2321 }
2322
2323 if (exec_scope != SCOPE_NONE &&
2324 !(exec_scope == SCOPE_WORKGROUP && nir->info.stage == MESA_SHADER_TESS_CTRL)) {
2325 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2326 bar->fixed = 1;
2327 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2328 info_out->numBarriers = 1;
2329 }
2330
2331 break;
2332 }
2333 case nir_intrinsic_shader_clock: {
2334 const DataType dType = getDType(insn);
2335 LValues &newDefs = convert(&insn->def);
2336
2337 loadImm(newDefs[0], 0u);
2338 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2339 break;
2340 }
2341 case nir_intrinsic_load_global:
2342 case nir_intrinsic_load_global_constant: {
2343 const DataType dType = getDType(insn);
2344 LValues &newDefs = convert(&insn->def);
2345 Value *indirectOffset;
2346 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2347
2348 for (auto i = 0u; i < dest_components; ++i)
2349 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2350
2351 info_out->io.globalAccess |= 0x1;
2352 break;
2353 }
2354 case nir_intrinsic_store_global: {
2355 DataType sType = getSType(insn->src[0], false, false);
2356
2357 for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2358 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2359 continue;
2360 if (typeSizeof(sType) == 8) {
2361 Value *split[2];
2362 mkSplit(split, 4, getSrc(&insn->src[0], i));
2363
2364 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2365 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2366
2367 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2368 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2369 } else {
2370 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2371 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2372 }
2373 }
2374
2375 info_out->io.globalAccess |= 0x2;
2376 break;
2377 }
2378 default:
2379 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2380 return false;
2381 }
2382
2383 return true;
2384 }
2385
2386 bool
visit(nir_jump_instr * insn)2387 Converter::visit(nir_jump_instr *insn)
2388 {
2389 switch (insn->type) {
2390 case nir_jump_break:
2391 case nir_jump_continue: {
2392 bool isBreak = insn->type == nir_jump_break;
2393 nir_block *block = insn->instr.block;
2394 BasicBlock *target = convert(block->successors[0]);
2395 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2396 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2397 break;
2398 }
2399 default:
2400 ERROR("unknown nir_jump_type %u\n", insn->type);
2401 return false;
2402 }
2403
2404 return true;
2405 }
2406
2407 Value*
convert(nir_load_const_instr * insn,uint8_t idx)2408 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2409 {
2410 Value *val;
2411
2412 if (immInsertPos)
2413 setPosition(immInsertPos, true);
2414 else
2415 setPosition(bb, false);
2416
2417 switch (insn->def.bit_size) {
2418 case 64:
2419 val = loadImm(getSSA(8), insn->value[idx].u64);
2420 break;
2421 case 32:
2422 val = loadImm(getSSA(4), insn->value[idx].u32);
2423 break;
2424 case 16:
2425 val = loadImm(getSSA(4), insn->value[idx].u16);
2426 break;
2427 case 8:
2428 val = loadImm(getSSA(4), insn->value[idx].u8);
2429 break;
2430 default:
2431 unreachable("unhandled bit size!\n");
2432 }
2433 setPosition(bb, true);
2434 return val;
2435 }
2436
2437 bool
visit(nir_load_const_instr * insn)2438 Converter::visit(nir_load_const_instr *insn)
2439 {
2440 assert(insn->def.bit_size <= 64);
2441 immediates[insn->def.index] = insn;
2442 return true;
2443 }
2444
2445 #define DEFAULT_CHECKS \
2446 if (insn->def.num_components > 1) { \
2447 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2448 return false; \
2449 }
2450 bool
visit(nir_alu_instr * insn)2451 Converter::visit(nir_alu_instr *insn)
2452 {
2453 const nir_op op = insn->op;
2454 const nir_op_info &info = nir_op_infos[op];
2455 DataType dType = getDType(insn);
2456 const std::vector<DataType> sTypes = getSTypes(insn);
2457
2458 Instruction *oldPos = this->bb->getExit();
2459
2460 switch (op) {
2461 case nir_op_fabs:
2462 case nir_op_iabs:
2463 case nir_op_fadd:
2464 case nir_op_iadd:
2465 case nir_op_iand:
2466 case nir_op_fceil:
2467 case nir_op_fcos:
2468 case nir_op_fddx:
2469 case nir_op_fddx_coarse:
2470 case nir_op_fddx_fine:
2471 case nir_op_fddy:
2472 case nir_op_fddy_coarse:
2473 case nir_op_fddy_fine:
2474 case nir_op_fdiv:
2475 case nir_op_idiv:
2476 case nir_op_udiv:
2477 case nir_op_fexp2:
2478 case nir_op_ffloor:
2479 case nir_op_ffma:
2480 case nir_op_ffmaz:
2481 case nir_op_flog2:
2482 case nir_op_fmax:
2483 case nir_op_imax:
2484 case nir_op_umax:
2485 case nir_op_fmin:
2486 case nir_op_imin:
2487 case nir_op_umin:
2488 case nir_op_fmod:
2489 case nir_op_imod:
2490 case nir_op_umod:
2491 case nir_op_fmul:
2492 case nir_op_fmulz:
2493 case nir_op_amul:
2494 case nir_op_imul:
2495 case nir_op_imul_high:
2496 case nir_op_umul_high:
2497 case nir_op_fneg:
2498 case nir_op_ineg:
2499 case nir_op_inot:
2500 case nir_op_ior:
2501 case nir_op_pack_64_2x32_split:
2502 case nir_op_frcp:
2503 case nir_op_frem:
2504 case nir_op_irem:
2505 case nir_op_frsq:
2506 case nir_op_fsat:
2507 case nir_op_ishr:
2508 case nir_op_ushr:
2509 case nir_op_fsin:
2510 case nir_op_fsqrt:
2511 case nir_op_ftrunc:
2512 case nir_op_ishl:
2513 case nir_op_ixor: {
2514 DEFAULT_CHECKS;
2515 LValues &newDefs = convert(&insn->def);
2516 operation preOp = preOperationNeeded(op);
2517 if (preOp != OP_NOP) {
2518 assert(info.num_inputs < 2);
2519 Value *tmp = getSSA(typeSizeof(dType));
2520 Instruction *i0 = mkOp(preOp, dType, tmp);
2521 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2522 if (info.num_inputs) {
2523 i0->setSrc(0, getSrc(&insn->src[0]));
2524 i1->setSrc(0, tmp);
2525 }
2526 i1->subOp = getSubOp(op);
2527 } else {
2528 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2529 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2530 i->setSrc(s, getSrc(&insn->src[s]));
2531
2532 switch (op) {
2533 case nir_op_fmul:
2534 case nir_op_ffma:
2535 i->dnz = this->info->io.mul_zero_wins;
2536 break;
2537 case nir_op_fmulz:
2538 case nir_op_ffmaz:
2539 i->dnz = true;
2540 break;
2541 default:
2542 break;
2543 }
2544 }
2545 i->subOp = getSubOp(op);
2546 }
2547 break;
2548 }
2549 case nir_op_ifind_msb:
2550 case nir_op_ufind_msb: {
2551 DEFAULT_CHECKS;
2552 LValues &newDefs = convert(&insn->def);
2553 dType = sTypes[0];
2554 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2555 break;
2556 }
2557 case nir_op_fround_even: {
2558 DEFAULT_CHECKS;
2559 LValues &newDefs = convert(&insn->def);
2560 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2561 break;
2562 }
2563 // convert instructions
2564 case nir_op_f2i8:
2565 case nir_op_f2u8:
2566 case nir_op_i2i8:
2567 case nir_op_u2u8:
2568 case nir_op_f2i16:
2569 case nir_op_f2u16:
2570 case nir_op_i2i16:
2571 case nir_op_u2u16:
2572 case nir_op_f2f32:
2573 case nir_op_f2i32:
2574 case nir_op_f2u32:
2575 case nir_op_i2f32:
2576 case nir_op_i2i32:
2577 case nir_op_u2f32:
2578 case nir_op_u2u32:
2579 case nir_op_f2f64:
2580 case nir_op_f2i64:
2581 case nir_op_f2u64:
2582 case nir_op_i2f64:
2583 case nir_op_i2i64:
2584 case nir_op_u2f64:
2585 case nir_op_u2u64: {
2586 DEFAULT_CHECKS;
2587 LValues &newDefs = convert(&insn->def);
2588 DataType stype = sTypes[0];
2589 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2590 if (::isFloatType(stype) && isIntType(dType))
2591 i->rnd = ROUND_Z;
2592 i->sType = stype;
2593 break;
2594 }
2595 // compare instructions
2596 case nir_op_ieq8:
2597 case nir_op_ige8:
2598 case nir_op_uge8:
2599 case nir_op_ilt8:
2600 case nir_op_ult8:
2601 case nir_op_ine8:
2602 case nir_op_ieq16:
2603 case nir_op_ige16:
2604 case nir_op_uge16:
2605 case nir_op_ilt16:
2606 case nir_op_ult16:
2607 case nir_op_ine16:
2608 case nir_op_feq32:
2609 case nir_op_ieq32:
2610 case nir_op_fge32:
2611 case nir_op_ige32:
2612 case nir_op_uge32:
2613 case nir_op_flt32:
2614 case nir_op_ilt32:
2615 case nir_op_ult32:
2616 case nir_op_fneu32:
2617 case nir_op_ine32: {
2618 DEFAULT_CHECKS;
2619 LValues &newDefs = convert(&insn->def);
2620 Instruction *i = mkCmp(getOperation(op),
2621 getCondCode(op),
2622 dType,
2623 newDefs[0],
2624 dType,
2625 getSrc(&insn->src[0]),
2626 getSrc(&insn->src[1]));
2627 if (info.num_inputs == 3)
2628 i->setSrc(2, getSrc(&insn->src[2]));
2629 i->sType = sTypes[0];
2630 break;
2631 }
2632 case nir_op_mov: {
2633 LValues &newDefs = convert(&insn->def);
2634 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2635 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2636 }
2637 break;
2638 }
2639 case nir_op_vec2:
2640 case nir_op_vec3:
2641 case nir_op_vec4:
2642 case nir_op_vec8:
2643 case nir_op_vec16: {
2644 LValues &newDefs = convert(&insn->def);
2645 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2646 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2647 }
2648 break;
2649 }
2650 // (un)pack
2651 case nir_op_pack_64_2x32: {
2652 LValues &newDefs = convert(&insn->def);
2653 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2654 merge->setSrc(0, getSrc(&insn->src[0], 0));
2655 merge->setSrc(1, getSrc(&insn->src[0], 1));
2656 break;
2657 }
2658 case nir_op_pack_half_2x16_split: {
2659 LValues &newDefs = convert(&insn->def);
2660 Value *tmpH = getSSA();
2661 Value *tmpL = getSSA();
2662
2663 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2664 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2665 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2666 break;
2667 }
2668 case nir_op_unpack_half_2x16_split_x:
2669 case nir_op_unpack_half_2x16_split_y: {
2670 LValues &newDefs = convert(&insn->def);
2671 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2672 if (op == nir_op_unpack_half_2x16_split_y)
2673 cvt->subOp = 1;
2674 break;
2675 }
2676 case nir_op_unpack_64_2x32: {
2677 LValues &newDefs = convert(&insn->def);
2678 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2679 break;
2680 }
2681 case nir_op_unpack_64_2x32_split_x: {
2682 LValues &newDefs = convert(&insn->def);
2683 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2684 break;
2685 }
2686 case nir_op_unpack_64_2x32_split_y: {
2687 LValues &newDefs = convert(&insn->def);
2688 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2689 break;
2690 }
2691 // special instructions
2692 case nir_op_fsign:
2693 case nir_op_isign: {
2694 DEFAULT_CHECKS;
2695 DataType iType;
2696 if (::isFloatType(dType))
2697 iType = TYPE_F32;
2698 else
2699 iType = TYPE_S32;
2700
2701 LValues &newDefs = convert(&insn->def);
2702 LValue *val0 = getScratch();
2703 LValue *val1 = getScratch();
2704 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2705 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2706
2707 if (dType == TYPE_F64) {
2708 mkOp2(OP_SUB, iType, val0, val0, val1);
2709 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2710 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2711 mkOp2(OP_SUB, iType, val0, val1, val0);
2712 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2713 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2714 } else if (::isFloatType(dType))
2715 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2716 else
2717 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2718 break;
2719 }
2720 case nir_op_fcsel:
2721 case nir_op_b32csel: {
2722 DEFAULT_CHECKS;
2723 LValues &newDefs = convert(&insn->def);
2724 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2725 break;
2726 }
2727 case nir_op_ibitfield_extract:
2728 case nir_op_ubitfield_extract: {
2729 DEFAULT_CHECKS;
2730 Value *tmp = getSSA();
2731 LValues &newDefs = convert(&insn->def);
2732 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2733 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2734 break;
2735 }
2736 case nir_op_bfm: {
2737 DEFAULT_CHECKS;
2738 LValues &newDefs = convert(&insn->def);
2739 mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
2740 break;
2741 }
2742 case nir_op_bitfield_insert: {
2743 DEFAULT_CHECKS;
2744 LValues &newDefs = convert(&insn->def);
2745 LValue *temp = getSSA();
2746 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2747 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2748 break;
2749 }
2750 case nir_op_bit_count: {
2751 DEFAULT_CHECKS;
2752 LValues &newDefs = convert(&insn->def);
2753 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2754 break;
2755 }
2756 case nir_op_bitfield_reverse: {
2757 DEFAULT_CHECKS;
2758 LValues &newDefs = convert(&insn->def);
2759 mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
2760 break;
2761 }
2762 case nir_op_find_lsb: {
2763 DEFAULT_CHECKS;
2764 LValues &newDefs = convert(&insn->def);
2765 Value *tmp = getSSA();
2766 mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
2767 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2768 break;
2769 }
2770 case nir_op_extract_u8: {
2771 DEFAULT_CHECKS;
2772 LValues &newDefs = convert(&insn->def);
2773 Value *prmt = getSSA();
2774 mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
2775 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2776 break;
2777 }
2778 case nir_op_extract_i8: {
2779 DEFAULT_CHECKS;
2780 LValues &newDefs = convert(&insn->def);
2781 Value *prmt = getSSA();
2782 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
2783 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2784 break;
2785 }
2786 case nir_op_extract_u16: {
2787 DEFAULT_CHECKS;
2788 LValues &newDefs = convert(&insn->def);
2789 Value *prmt = getSSA();
2790 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
2791 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2792 break;
2793 }
2794 case nir_op_extract_i16: {
2795 DEFAULT_CHECKS;
2796 LValues &newDefs = convert(&insn->def);
2797 Value *prmt = getSSA();
2798 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
2799 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2800 break;
2801 }
2802 case nir_op_fquantize2f16: {
2803 DEFAULT_CHECKS;
2804 LValues &newDefs = convert(&insn->def);
2805 Value *tmp = getSSA();
2806 mkCvt(OP_CVT, TYPE_F16, tmp, TYPE_F32, getSrc(&insn->src[0]))->ftz = 1;
2807 mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, tmp);
2808 break;
2809 }
2810 case nir_op_urol: {
2811 DEFAULT_CHECKS;
2812 LValues &newDefs = convert(&insn->def);
2813 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2814 getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2815 ->subOp = NV50_IR_SUBOP_SHF_L |
2816 NV50_IR_SUBOP_SHF_W |
2817 NV50_IR_SUBOP_SHF_HI;
2818 break;
2819 }
2820 case nir_op_uror: {
2821 DEFAULT_CHECKS;
2822 LValues &newDefs = convert(&insn->def);
2823 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2824 getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2825 ->subOp = NV50_IR_SUBOP_SHF_R |
2826 NV50_IR_SUBOP_SHF_W |
2827 NV50_IR_SUBOP_SHF_LO;
2828 break;
2829 }
2830 // boolean conversions
2831 case nir_op_b2f32: {
2832 DEFAULT_CHECKS;
2833 LValues &newDefs = convert(&insn->def);
2834 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2835 break;
2836 }
2837 case nir_op_b2f64: {
2838 DEFAULT_CHECKS;
2839 LValues &newDefs = convert(&insn->def);
2840 Value *tmp = getSSA(4);
2841 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2842 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2843 break;
2844 }
2845 case nir_op_b2i8:
2846 case nir_op_b2i16:
2847 case nir_op_b2i32: {
2848 DEFAULT_CHECKS;
2849 LValues &newDefs = convert(&insn->def);
2850 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2851 break;
2852 }
2853 case nir_op_b2i64: {
2854 DEFAULT_CHECKS;
2855 LValues &newDefs = convert(&insn->def);
2856 LValue *def = getScratch();
2857 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2858 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2859 break;
2860 }
2861 default:
2862 ERROR("unknown nir_op %s\n", info.name);
2863 assert(false);
2864 return false;
2865 }
2866
2867 if (!oldPos) {
2868 oldPos = this->bb->getEntry();
2869 oldPos->precise = insn->exact;
2870 }
2871
2872 if (unlikely(!oldPos))
2873 return true;
2874
2875 while (oldPos->next) {
2876 oldPos = oldPos->next;
2877 oldPos->precise = insn->exact;
2878 }
2879
2880 return true;
2881 }
2882 #undef DEFAULT_CHECKS
2883
2884 bool
visit(nir_undef_instr * insn)2885 Converter::visit(nir_undef_instr *insn)
2886 {
2887 LValues &newDefs = convert(&insn->def);
2888 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2889 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2890 }
2891 return true;
2892 }
2893
2894 #define CASE_SAMPLER(ty) \
2895 case GLSL_SAMPLER_DIM_ ## ty : \
2896 if (isArray && !isShadow) \
2897 return TEX_TARGET_ ## ty ## _ARRAY; \
2898 else if (!isArray && isShadow) \
2899 return TEX_TARGET_## ty ## _SHADOW; \
2900 else if (isArray && isShadow) \
2901 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2902 else \
2903 return TEX_TARGET_ ## ty
2904
2905 TexTarget
convert(glsl_sampler_dim dim,bool isArray,bool isShadow)2906 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2907 {
2908 switch (dim) {
2909 CASE_SAMPLER(1D);
2910 case GLSL_SAMPLER_DIM_SUBPASS:
2911 CASE_SAMPLER(2D);
2912 CASE_SAMPLER(CUBE);
2913 case GLSL_SAMPLER_DIM_3D:
2914 return TEX_TARGET_3D;
2915 case GLSL_SAMPLER_DIM_MS:
2916 case GLSL_SAMPLER_DIM_SUBPASS_MS:
2917 if (isArray)
2918 return TEX_TARGET_2D_MS_ARRAY;
2919 return TEX_TARGET_2D_MS;
2920 case GLSL_SAMPLER_DIM_RECT:
2921 if (isShadow)
2922 return TEX_TARGET_RECT_SHADOW;
2923 return TEX_TARGET_RECT;
2924 case GLSL_SAMPLER_DIM_BUF:
2925 return TEX_TARGET_BUFFER;
2926 case GLSL_SAMPLER_DIM_EXTERNAL:
2927 return TEX_TARGET_2D;
2928 default:
2929 ERROR("unknown glsl_sampler_dim %u\n", dim);
2930 assert(false);
2931 return TEX_TARGET_COUNT;
2932 }
2933 }
2934 #undef CASE_SAMPLER
2935
2936 unsigned int
getNIRArgCount(TexInstruction::Target & target)2937 Converter::getNIRArgCount(TexInstruction::Target& target)
2938 {
2939 unsigned int result = target.getArgCount();
2940 if (target.isCube() && target.isArray())
2941 result--;
2942 if (target.isMS())
2943 result--;
2944 return result;
2945 }
2946
2947 CacheMode
convert(enum gl_access_qualifier access)2948 Converter::convert(enum gl_access_qualifier access)
2949 {
2950 if (access & ACCESS_VOLATILE)
2951 return CACHE_CV;
2952 if (access & ACCESS_COHERENT)
2953 return CACHE_CG;
2954 return CACHE_CA;
2955 }
2956
2957 bool
visit(nir_tex_instr * insn)2958 Converter::visit(nir_tex_instr *insn)
2959 {
2960 switch (insn->op) {
2961 case nir_texop_lod:
2962 case nir_texop_query_levels:
2963 case nir_texop_tex:
2964 case nir_texop_texture_samples:
2965 case nir_texop_tg4:
2966 case nir_texop_txb:
2967 case nir_texop_txd:
2968 case nir_texop_txf:
2969 case nir_texop_txf_ms:
2970 case nir_texop_txl:
2971 case nir_texop_txs: {
2972 LValues &newDefs = convert(&insn->def);
2973 std::vector<Value*> srcs;
2974 std::vector<Value*> defs;
2975 std::vector<nir_src*> offsets;
2976 uint8_t mask = 0;
2977 bool lz = false;
2978 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2979 operation op = getOperation(insn->op);
2980
2981 int r, s;
2982 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2983 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2984 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2985 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2986 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2987 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2988 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2989 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2990 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2991 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2992 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
2993 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
2994
2995 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
2996 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
2997
2998 srcs.resize(insn->coord_components);
2999 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3000 srcs[i] = getSrc(&insn->src[coordsIdx].src, i);
3001
3002 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3003 if (insn->coord_components) {
3004 uint32_t argCount = target.getArgCount();
3005
3006 if (target.isMS())
3007 argCount -= 1;
3008
3009 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3010 srcs.push_back(getSSA());
3011 }
3012
3013 if (biasIdx != -1)
3014 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3015 // TXQ requires a lod argument for all queries we care about here.
3016 // For other ops on MS textures we skip it.
3017 if (lodIdx != -1 && !target.isMS())
3018 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3019 else if (op == OP_TXQ)
3020 srcs.push_back(zero); // TXQ always needs an LOD
3021 else if (op == OP_TXF)
3022 lz = true;
3023 if (msIdx != -1)
3024 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3025 if (offsetIdx != -1)
3026 offsets.push_back(&insn->src[offsetIdx].src);
3027 if (compIdx != -1)
3028 srcs.push_back(getSrc(&insn->src[compIdx].src, 0));
3029 if (texOffIdx != -1) {
3030 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3031 texOffIdx = srcs.size() - 1;
3032 }
3033 if (sampOffIdx != -1) {
3034 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3035 sampOffIdx = srcs.size() - 1;
3036 }
3037 if (bindless) {
3038 // currently we use the lower bits
3039 Value *split[2];
3040 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3041
3042 mkSplit(split, 4, handle);
3043
3044 srcs.push_back(split[0]);
3045 texOffIdx = srcs.size() - 1;
3046 }
3047
3048 r = bindless ? 0xff : insn->texture_index;
3049 s = bindless ? 0x1f : insn->sampler_index;
3050 if (op == OP_TXF || op == OP_TXQ)
3051 s = 0;
3052
3053 defs.resize(newDefs.size());
3054 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3055 defs[d] = newDefs[d];
3056 mask |= 1 << d;
3057 }
3058 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3059 lz = true;
3060
3061 // TODO figure out which instructions still need this.
3062 if (srcs.empty())
3063 srcs.push_back(loadImm(NULL, 0));
3064
3065 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3066 texi->tex.levelZero = lz;
3067 texi->tex.mask = mask;
3068 texi->tex.bindless = bindless;
3069
3070 if (texOffIdx != -1)
3071 texi->tex.rIndirectSrc = texOffIdx;
3072 if (sampOffIdx != -1)
3073 texi->tex.sIndirectSrc = sampOffIdx;
3074
3075 switch (insn->op) {
3076 case nir_texop_tg4:
3077 if (!target.isShadow())
3078 texi->tex.gatherComp = insn->component;
3079 break;
3080 case nir_texop_txs:
3081 texi->tex.query = TXQ_DIMS;
3082 break;
3083 case nir_texop_texture_samples:
3084 texi->tex.mask = 0x4;
3085 texi->tex.query = TXQ_TYPE;
3086 break;
3087 case nir_texop_query_levels:
3088 texi->tex.mask = 0x8;
3089 texi->tex.query = TXQ_DIMS;
3090 break;
3091 // TODO: TXQ_SAMPLE_POSITION needs the sample id instead of the LOD emited further up.
3092 default:
3093 break;
3094 }
3095
3096 texi->tex.useOffsets = offsets.size();
3097 if (texi->tex.useOffsets) {
3098 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3099 for (uint32_t c = 0u; c < 3; ++c) {
3100 uint8_t s2 = std::min(c, target.getDim() - 1);
3101 texi->offset[s][c].set(getSrc(offsets[s], s2));
3102 texi->offset[s][c].setInsn(texi);
3103 }
3104 }
3105 }
3106
3107 if (op == OP_TXG && offsetIdx == -1) {
3108 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3109 texi->tex.useOffsets = 4;
3110 setPosition(texi, false);
3111 for (uint8_t i = 0; i < 4; ++i) {
3112 for (uint8_t j = 0; j < 2; ++j) {
3113 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3114 texi->offset[i][j].setInsn(texi);
3115 }
3116 }
3117 setPosition(texi, true);
3118 }
3119 }
3120
3121 if (ddxIdx != -1 && ddyIdx != -1) {
3122 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3123 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3124 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3125 }
3126 }
3127
3128 break;
3129 }
3130 default:
3131 ERROR("unknown nir_texop %u\n", insn->op);
3132 return false;
3133 }
3134 return true;
3135 }
3136
3137 /* nouveau's RA doesn't track the liveness of exported registers in the fragment
3138 * shader, so we need all the store_outputs to appear at the end of the shader
3139 * with no other instructions that might generate a temp value in between them.
3140 */
3141 static void
nv_nir_move_stores_to_end(nir_shader * s)3142 nv_nir_move_stores_to_end(nir_shader *s)
3143 {
3144 nir_function_impl *impl = nir_shader_get_entrypoint(s);
3145 nir_block *block = nir_impl_last_block(impl);
3146 nir_instr *first_store = NULL;
3147
3148 nir_foreach_instr_safe(instr, block) {
3149 if (instr == first_store)
3150 break;
3151 if (instr->type != nir_instr_type_intrinsic)
3152 continue;
3153 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3154 if (intrin->intrinsic == nir_intrinsic_store_output) {
3155 nir_instr_remove(instr);
3156 nir_instr_insert(nir_after_block(block), instr);
3157
3158 if (!first_store)
3159 first_store = instr;
3160 }
3161 }
3162 nir_metadata_preserve(impl,
3163 nir_metadata_block_index |
3164 nir_metadata_dominance);
3165 }
3166
3167 unsigned
lowerBitSizeCB(const nir_instr * instr,void * data)3168 Converter::lowerBitSizeCB(const nir_instr *instr, void *data)
3169 {
3170 Converter *instance = static_cast<Converter *>(data);
3171 nir_alu_instr *alu;
3172
3173 if (instr->type != nir_instr_type_alu)
3174 return 0;
3175
3176 alu = nir_instr_as_alu(instr);
3177
3178 switch (alu->op) {
3179 /* TODO: Check for operation OP_SET instead of all listed nir opcodes
3180 * individually.
3181 *
3182 * Currently, we can't call getOperation(nir_op), since not all nir opcodes
3183 * are handled within getOperation() and we'd run into an assert().
3184 *
3185 * Adding all nir opcodes to getOperation() isn't trivial, since the
3186 * enum operation of some of the nir opcodes isn't distinct (e.g. depends
3187 * on the data type).
3188 */
3189 case nir_op_ieq8:
3190 case nir_op_ige8:
3191 case nir_op_uge8:
3192 case nir_op_ilt8:
3193 case nir_op_ult8:
3194 case nir_op_ine8:
3195 case nir_op_ieq16:
3196 case nir_op_ige16:
3197 case nir_op_uge16:
3198 case nir_op_ilt16:
3199 case nir_op_ult16:
3200 case nir_op_ine16:
3201 case nir_op_feq32:
3202 case nir_op_ieq32:
3203 case nir_op_fge32:
3204 case nir_op_ige32:
3205 case nir_op_uge32:
3206 case nir_op_flt32:
3207 case nir_op_ilt32:
3208 case nir_op_ult32:
3209 case nir_op_fneu32:
3210 case nir_op_ine32: {
3211 DataType stype = instance->getSTypes(alu)[0];
3212
3213 if (isSignedIntType(stype) && typeSizeof(stype) < 4)
3214 return 32;
3215
3216 return 0;
3217 }
3218 case nir_op_i2f64:
3219 case nir_op_u2f64: {
3220 DataType stype = instance->getSTypes(alu)[0];
3221
3222 if (isIntType(stype) && (typeSizeof(stype) <= 2))
3223 return 32;
3224
3225 return 0;
3226 }
3227 default:
3228 return 0;
3229 }
3230 }
3231
3232 bool
run()3233 Converter::run()
3234 {
3235 bool progress;
3236
3237 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3238 nir_print_shader(nir, stderr);
3239
3240 struct nir_lower_subgroups_options subgroup_options = {};
3241 subgroup_options.subgroup_size = 32;
3242 subgroup_options.ballot_bit_size = 32;
3243 subgroup_options.ballot_components = 1;
3244 subgroup_options.lower_elect = true;
3245 subgroup_options.lower_inverse_ballot = true;
3246
3247 unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) |
3248 (nir->options->lower_flrp32 ? 32 : 0) |
3249 (nir->options->lower_flrp64 ? 64 : 0);
3250 assert(lower_flrp);
3251
3252 info_out->io.genUserClip = info->io.genUserClip;
3253 if (info->io.genUserClip > 0) {
3254 bool lowered = false;
3255
3256 if (nir->info.stage == MESA_SHADER_VERTEX ||
3257 nir->info.stage == MESA_SHADER_TESS_EVAL)
3258 NIR_PASS(lowered, nir, nir_lower_clip_vs,
3259 (1 << info->io.genUserClip) - 1, true, false, NULL);
3260 else if (nir->info.stage == MESA_SHADER_GEOMETRY)
3261 NIR_PASS(lowered, nir, nir_lower_clip_gs,
3262 (1 << info->io.genUserClip) - 1, false, NULL);
3263
3264 if (lowered) {
3265 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3266 NIR_PASS_V(nir, nir_lower_io_to_temporaries, impl, true, false);
3267 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3268 NIR_PASS_V(nir, nv50_nir_lower_load_user_clip_plane, info);
3269 } else {
3270 info_out->io.genUserClip = -1;
3271 }
3272 }
3273
3274 /* prepare for IO lowering */
3275 NIR_PASS_V(nir, nir_lower_flrp, lower_flrp, false);
3276 NIR_PASS_V(nir, nir_opt_deref);
3277 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3278
3279 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
3280 type_size, (nir_lower_io_options)0);
3281
3282 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3283
3284 struct nir_lower_tex_options tex_options = {};
3285 tex_options.lower_txp = ~0;
3286
3287 NIR_PASS_V(nir, nir_lower_tex, &tex_options);
3288
3289 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3290 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3291 NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
3292
3293 NIR_PASS_V(nir, nir_lower_frexp);
3294
3295 /*TODO: improve this lowering/optimisation loop so that we can use
3296 * nir_opt_idiv_const effectively before this.
3297 */
3298 nir_lower_idiv_options idiv_options = {
3299 .allow_fp16 = true,
3300 };
3301 NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
3302
3303 do {
3304 progress = false;
3305 NIR_PASS(progress, nir, nir_copy_prop);
3306 NIR_PASS(progress, nir, nir_opt_remove_phis);
3307 NIR_PASS(progress, nir, nir_opt_loop);
3308 NIR_PASS(progress, nir, nir_opt_cse);
3309 NIR_PASS(progress, nir, nir_opt_algebraic);
3310 NIR_PASS(progress, nir, nir_opt_constant_folding);
3311 NIR_PASS(progress, nir, nir_copy_prop);
3312 NIR_PASS(progress, nir, nir_opt_dce);
3313 NIR_PASS(progress, nir, nir_opt_dead_cf);
3314 NIR_PASS(progress, nir, nir_lower_64bit_phis);
3315 } while (progress);
3316
3317 /* codegen assumes vec4 alignment for memory */
3318 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3319 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, function_temp_type_info);
3320 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset);
3321 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3322
3323 NIR_PASS_V(nir, nir_opt_combine_barriers, NULL, NULL);
3324
3325 nir_move_options move_options =
3326 (nir_move_options)(nir_move_const_undef |
3327 nir_move_load_ubo |
3328 nir_move_load_uniform |
3329 nir_move_load_input);
3330 NIR_PASS_V(nir, nir_opt_sink, move_options);
3331 NIR_PASS_V(nir, nir_opt_move, move_options);
3332
3333 if (nir->info.stage == MESA_SHADER_FRAGMENT)
3334 NIR_PASS_V(nir, nv_nir_move_stores_to_end);
3335
3336 NIR_PASS(progress, nir, nir_opt_algebraic_late);
3337
3338 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3339 NIR_PASS_V(nir, nir_lower_bit_size, Converter::lowerBitSizeCB, this);
3340
3341 NIR_PASS_V(nir, nir_divergence_analysis);
3342 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3343
3344 // Garbage collect dead instructions
3345 nir_sweep(nir);
3346
3347 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
3348
3349 if (!parseNIR()) {
3350 ERROR("Couldn't prase NIR!\n");
3351 return false;
3352 }
3353
3354 if (!assignSlots()) {
3355 ERROR("Couldn't assign slots!\n");
3356 return false;
3357 }
3358
3359 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3360 nir_print_shader(nir, stderr);
3361
3362 nir_foreach_function(function, nir) {
3363 if (!visit(function))
3364 return false;
3365 }
3366
3367 return true;
3368 }
3369
3370 } // unnamed namespace
3371
3372 namespace nv50_ir {
3373
3374 bool
makeFromNIR(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)3375 Program::makeFromNIR(struct nv50_ir_prog_info *info,
3376 struct nv50_ir_prog_info_out *info_out)
3377 {
3378 nir_shader *nir = info->bin.nir;
3379 Converter converter(this, nir, info, info_out);
3380 bool result = converter.run();
3381 if (!result)
3382 return result;
3383 LoweringHelper lowering;
3384 lowering.run(this);
3385 tlsSize = info_out->bin.tlsSpace;
3386 return result;
3387 }
3388
3389 } // namespace nv50_ir
3390
3391 static nir_shader_compiler_options
nvir_nir_shader_compiler_options(int chipset,uint8_t shader_type)3392 nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
3393 {
3394 nir_shader_compiler_options op = {};
3395 op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
3396 op.lower_ffma16 = false;
3397 op.lower_ffma32 = false;
3398 op.lower_ffma64 = false;
3399 op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
3400 op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
3401 op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
3402 op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
3403 op.lower_flrp32 = true;
3404 op.lower_flrp64 = true;
3405 op.lower_fpow = true;
3406 op.lower_fsat = false;
3407 op.lower_fsqrt = false; // TODO: only before gm200
3408 op.lower_sincos = false;
3409 op.lower_fmod = true;
3410 op.lower_bitfield_extract = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3411 op.lower_bitfield_insert = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3412 op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
3413 op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
3414 op.lower_ifind_msb = (chipset < NVISA_GF100_CHIPSET);
3415 op.lower_find_lsb = (chipset < NVISA_GF100_CHIPSET);
3416 op.lower_uadd_carry = true; // TODO
3417 op.lower_usub_borrow = true; // TODO
3418 op.lower_mul_high = false;
3419 op.lower_fneg = false;
3420 op.lower_ineg = false;
3421 op.lower_scmp = true; // TODO: not implemented yet
3422 op.lower_vector_cmp = false;
3423 op.lower_bitops = false;
3424 op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
3425 op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
3426 op.lower_fdph = false;
3427 op.lower_fdot = false;
3428 op.fdot_replicates = false; // TODO
3429 op.lower_ffloor = false; // TODO
3430 op.lower_ffract = true;
3431 op.lower_fceil = false; // TODO
3432 op.lower_ftrunc = false;
3433 op.lower_ldexp = true;
3434 op.lower_pack_half_2x16 = true;
3435 op.lower_pack_unorm_2x16 = true;
3436 op.lower_pack_snorm_2x16 = true;
3437 op.lower_pack_unorm_4x8 = true;
3438 op.lower_pack_snorm_4x8 = true;
3439 op.lower_unpack_half_2x16 = true;
3440 op.lower_unpack_unorm_2x16 = true;
3441 op.lower_unpack_snorm_2x16 = true;
3442 op.lower_unpack_unorm_4x8 = true;
3443 op.lower_unpack_snorm_4x8 = true;
3444 op.lower_pack_split = false;
3445 op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
3446 op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
3447 op.lower_insert_byte = true;
3448 op.lower_insert_word = true;
3449 op.lower_all_io_to_temps = false;
3450 op.lower_all_io_to_elements = false;
3451 op.vertex_id_zero_based = false;
3452 op.lower_base_vertex = false;
3453 op.lower_helper_invocation = false;
3454 op.optimize_sample_mask_in = false;
3455 op.lower_cs_local_index_to_id = true;
3456 op.lower_cs_local_id_to_index = false;
3457 op.lower_device_index_to_zero = true;
3458 op.lower_wpos_pntc = false; // TODO
3459 op.lower_hadd = true; // TODO
3460 op.lower_uadd_sat = true; // TODO
3461 op.lower_usub_sat = true; // TODO
3462 op.lower_iadd_sat = true; // TODO
3463 op.vectorize_io = false;
3464 op.lower_to_scalar = false;
3465 op.unify_interfaces = false;
3466 op.use_interpolated_input_intrinsics = true;
3467 op.lower_mul_2x32_64 = true; // TODO
3468 op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET);
3469 op.has_imul24 = false;
3470 op.has_fmulz = (chipset > NVISA_G80_CHIPSET);
3471 op.intel_vec4 = false;
3472 op.lower_uniforms_to_ubo = true;
3473 op.force_indirect_unrolling = (nir_variable_mode) (
3474 ((shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_out : 0) |
3475 /* HW doesn't support indirect addressing of fragment program inputs
3476 * on Volta. The binary driver generates a function to handle every
3477 * possible indirection, and indirectly calls the function to handle
3478 * this instead.
3479 */
3480 ((chipset >= NVISA_GV100_CHIPSET && shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_in : 0)
3481 );
3482 op.force_indirect_unrolling_sampler = (chipset < NVISA_GF100_CHIPSET);
3483 op.max_unroll_iterations = 32;
3484 op.lower_int64_options = (nir_lower_int64_options) (
3485 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
3486 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
3487 nir_lower_divmod64 |
3488 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
3489 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_bcsel64 : 0) |
3490 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
3491 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
3492 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
3493 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
3494 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
3495 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
3496 nir_lower_imul_2x32_64 |
3497 ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
3498 nir_lower_ufind_msb64 |
3499 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_conv64 : 0)
3500 );
3501 op.lower_doubles_options = (nir_lower_doubles_options) (
3502 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
3503 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
3504 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
3505 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
3506 nir_lower_dmod |
3507 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
3508 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
3509 );
3510 return op;
3511 }
3512
3513 static const nir_shader_compiler_options g80_nir_shader_compiler_options =
3514 nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_TYPES);
3515 static const nir_shader_compiler_options g80_fs_nir_shader_compiler_options =
3516 nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_FRAGMENT);
3517 static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
3518 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_TYPES);
3519 static const nir_shader_compiler_options gf100_fs_nir_shader_compiler_options =
3520 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_FRAGMENT);
3521 static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
3522 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_TYPES);
3523 static const nir_shader_compiler_options gm107_fs_nir_shader_compiler_options =
3524 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_FRAGMENT);
3525 static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
3526 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_TYPES);
3527 static const nir_shader_compiler_options gv100_fs_nir_shader_compiler_options =
3528 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_FRAGMENT);
3529
3530 const nir_shader_compiler_options *
nv50_ir_nir_shader_compiler_options(int chipset,uint8_t shader_type)3531 nv50_ir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
3532 {
3533 if (chipset >= NVISA_GV100_CHIPSET) {
3534 if (shader_type == PIPE_SHADER_FRAGMENT) {
3535 return &gv100_fs_nir_shader_compiler_options;
3536 } else {
3537 return &gv100_nir_shader_compiler_options;
3538 }
3539 }
3540
3541 if (chipset >= NVISA_GM107_CHIPSET) {
3542 if (shader_type == PIPE_SHADER_FRAGMENT) {
3543 return &gm107_fs_nir_shader_compiler_options;
3544 } else {
3545 return &gm107_nir_shader_compiler_options;
3546 }
3547 }
3548
3549 if (chipset >= NVISA_GF100_CHIPSET) {
3550 if (shader_type == PIPE_SHADER_FRAGMENT) {
3551 return &gf100_fs_nir_shader_compiler_options;
3552 } else {
3553 return &gf100_nir_shader_compiler_options;
3554 }
3555 }
3556
3557 if (shader_type == PIPE_SHADER_FRAGMENT) {
3558 return &g80_fs_nir_shader_compiler_options;
3559 } else {
3560 return &g80_nir_shader_compiler_options;
3561 }
3562 }
3563