1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26 #include "compiler/nir/nir_builder.h"
27
28 #include "util/u_debug.h"
29 #include "util/u_prim.h"
30
31 #include "nv50_ir.h"
32 #include "nv50_ir_lowering_helper.h"
33 #include "nv50_ir_target.h"
34 #include "nv50_ir_util.h"
35 #include "tgsi/tgsi_from_mesa.h"
36
37 #include <unordered_map>
38 #include <cstring>
39 #include <list>
40 #include <vector>
41
42 namespace {
43
44 using namespace nv50_ir;
45
46 int
type_size(const struct glsl_type * type,bool bindless)47 type_size(const struct glsl_type *type, bool bindless)
48 {
49 return glsl_count_attribute_slots(type, false);
50 }
51
52 bool
nv50_nir_lower_load_user_clip_plane_cb(nir_builder * b,nir_intrinsic_instr * intrin,void * params)53 nv50_nir_lower_load_user_clip_plane_cb(nir_builder *b, nir_intrinsic_instr *intrin, void *params)
54 {
55 struct nv50_ir_prog_info *info = (struct nv50_ir_prog_info *)params;
56
57 if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
58 return false;
59
60 uint16_t offset = info->io.ucpBase + nir_intrinsic_ucp_id(intrin) * 16;
61
62 b->cursor = nir_before_instr(&intrin->instr);
63 nir_def *replacement =
64 nir_load_ubo(b, 4, 32, nir_imm_int(b, info->io.auxCBSlot),
65 nir_imm_int(b, offset), .range = ~0u);
66
67 nir_def_replace(&intrin->def, replacement);
68
69 return true;
70 }
71
72 bool
nv50_nir_lower_load_user_clip_plane(nir_shader * nir,struct nv50_ir_prog_info * info)73 nv50_nir_lower_load_user_clip_plane(nir_shader *nir, struct nv50_ir_prog_info *info) {
74 return nir_shader_intrinsics_pass(nir, nv50_nir_lower_load_user_clip_plane_cb,
75 nir_metadata_control_flow,
76 info);
77 }
78
79 class Converter : public BuildUtil
80 {
81 public:
82 Converter(Program *, nir_shader *, nv50_ir_prog_info *, nv50_ir_prog_info_out *);
83
84 bool run();
85 private:
86 typedef std::vector<LValue*> LValues;
87 typedef std::unordered_map<unsigned, LValues> NirDefMap;
88 typedef std::unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
89 typedef std::unordered_map<unsigned, BasicBlock*> NirBlockMap;
90
91 CacheMode convert(enum gl_access_qualifier);
92 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
93 BasicBlock* convert(nir_block *);
94 SVSemantic convert(nir_intrinsic_op);
95 Value* convert(nir_load_const_instr*, uint8_t);
96 LValues& convert(nir_def *);
97
98 Value* getSrc(nir_alu_src *, uint8_t component = 0);
99 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
100 Value* getSrc(nir_def *, uint8_t);
101
102 // returned value is the constant part of the given source (either the
103 // nir_src or the selected source component of an intrinsic). Even though
104 // this is mostly an optimization to be able to skip indirects in a few
105 // cases, sometimes we require immediate values or set some fileds on
106 // instructions (e.g. tex) in order for codegen to consume those.
107 // If the found value has not a constant part, the Value gets returned
108 // through the Value parameter.
109 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
110 // isScalar indicates that the addressing is scalar, vec4 addressing is
111 // assumed otherwise
112 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
113 bool isScalar = false);
114
115 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
116
117 uint8_t translateInterpMode(const struct nv50_ir_varying *var, operation& op);
118 void setInterpolate(nv50_ir_varying *,
119 uint8_t,
120 bool centroid,
121 unsigned semantics);
122
123 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
124 uint8_t c, Value *indirect0 = NULL,
125 Value *indirect1 = NULL, bool patch = false,
126 CacheMode cache=CACHE_CA);
127 Instruction *loadVector(nir_intrinsic_instr *insn,
128 uint8_t buffer, Value *indirectBuffer,
129 uint32_t offset, Value *indirectOffset);
130 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
131 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
132 Value *indirect1 = NULL);
133 Instruction *storeVector(nir_intrinsic_instr *insn,
134 uint8_t buffer, Value *indirectBuffer,
135 uint32_t offset, Value *indirectOffset);
136
137 static bool memVectorizeCb(unsigned align_mul,
138 unsigned align_offset,
139 unsigned bit_size,
140 unsigned num_components,
141 int64_t hole_size,
142 nir_intrinsic_instr *low,
143 nir_intrinsic_instr *high,
144 void *cb_data);
145 static nir_mem_access_size_align
146 getMemAccessSizeAlign(nir_intrinsic_op intrin,
147 uint8_t bytes,
148 uint8_t bit_size,
149 uint32_t align_mul,
150 uint32_t align_offset,
151 bool offset_is_const,
152 enum gl_access_qualifier access,
153 const void *cb_data);
154
155 bool isFloatType(nir_alu_type);
156 bool isSignedType(nir_alu_type);
157 bool isResultFloat(nir_op);
158 bool isResultSigned(nir_op);
159
160 DataType getDType(nir_alu_instr *);
161 DataType getDType(nir_intrinsic_instr *);
162 DataType getDType(nir_op, uint8_t);
163
164 DataFile getFile(nir_intrinsic_op) const;
165
166 std::vector<DataType> getSTypes(nir_alu_instr *);
167 DataType getSType(nir_src &, bool isFloat, bool isSigned);
168
169 operation getOperation(nir_intrinsic_op);
170 operation getOperation(nir_op);
171 operation getOperation(nir_texop);
172 operation preOperationNeeded(nir_op);
173
174 int getSubOp(nir_intrinsic_op);
175 int getSubOp(nir_op);
176 int getAtomicSubOp(nir_atomic_op);
177
178 CondCode getCondCode(nir_op);
179
180 bool assignSlots();
181 bool parseNIR();
182
183 bool visit(nir_alu_instr *);
184 bool visit(nir_block *);
185 bool visit(nir_cf_node *);
186 bool visit(nir_function *);
187 bool visit(nir_if *);
188 bool visit(nir_instr *);
189 bool visit(nir_intrinsic_instr *);
190 bool visit(nir_jump_instr *);
191 bool visit(nir_load_const_instr*);
192 bool visit(nir_loop *);
193 bool visit(nir_undef_instr *);
194 bool visit(nir_tex_instr *);
195
196 static unsigned lowerBitSizeCB(const nir_instr *, void *);
197
198 // tex stuff
199 unsigned int getNIRArgCount(TexInstruction::Target&);
200
201 void runOptLoop();
202
203 struct nv50_ir_prog_info *info;
204 struct nv50_ir_prog_info_out *info_out;
205
206 nir_shader *nir;
207
208 NirDefMap ssaDefs;
209 NirDefMap regDefs;
210 ImmediateMap immediates;
211 NirBlockMap blocks;
212 unsigned int curLoopDepth;
213 unsigned int curIfDepth;
214
215 BasicBlock *exit;
216 Value *zero;
217 Instruction *immInsertPos;
218
219 Value *outBase; // base address of vertex out patch (for TCP)
220
221 union {
222 struct {
223 Value *position;
224 } fp;
225 };
226 };
227
Converter(Program * prog,nir_shader * nir,nv50_ir_prog_info * info,nv50_ir_prog_info_out * info_out)228 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info,
229 nv50_ir_prog_info_out *info_out)
230 : BuildUtil(prog),
231 info(info),
232 info_out(info_out),
233 nir(nir),
234 curLoopDepth(0),
235 curIfDepth(0),
236 exit(NULL),
237 immInsertPos(NULL),
238 outBase(nullptr)
239 {
240 zero = mkImm((uint32_t)0);
241 }
242
243 BasicBlock *
convert(nir_block * block)244 Converter::convert(nir_block *block)
245 {
246 NirBlockMap::iterator it = blocks.find(block->index);
247 if (it != blocks.end())
248 return it->second;
249
250 BasicBlock *bb = new BasicBlock(func);
251 blocks[block->index] = bb;
252 return bb;
253 }
254
255 bool
isFloatType(nir_alu_type type)256 Converter::isFloatType(nir_alu_type type)
257 {
258 return nir_alu_type_get_base_type(type) == nir_type_float;
259 }
260
261 bool
isSignedType(nir_alu_type type)262 Converter::isSignedType(nir_alu_type type)
263 {
264 return nir_alu_type_get_base_type(type) == nir_type_int;
265 }
266
267 bool
isResultFloat(nir_op op)268 Converter::isResultFloat(nir_op op)
269 {
270 const nir_op_info &info = nir_op_infos[op];
271 if (info.output_type != nir_type_invalid)
272 return isFloatType(info.output_type);
273
274 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
275 assert(false);
276 return true;
277 }
278
279 bool
isResultSigned(nir_op op)280 Converter::isResultSigned(nir_op op)
281 {
282 switch (op) {
283 // there is no umul and we get wrong results if we treat all muls as signed
284 case nir_op_imul:
285 case nir_op_inot:
286 return false;
287 default:
288 const nir_op_info &info = nir_op_infos[op];
289 if (info.output_type != nir_type_invalid)
290 return isSignedType(info.output_type);
291 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
292 assert(false);
293 return true;
294 }
295 }
296
297 DataType
getDType(nir_alu_instr * insn)298 Converter::getDType(nir_alu_instr *insn)
299 {
300 return getDType(insn->op, insn->def.bit_size);
301 }
302
303 DataType
getDType(nir_intrinsic_instr * insn)304 Converter::getDType(nir_intrinsic_instr *insn)
305 {
306 bool isFloat, isSigned;
307 switch (insn->intrinsic) {
308 case nir_intrinsic_bindless_image_atomic:
309 case nir_intrinsic_global_atomic:
310 case nir_intrinsic_image_atomic:
311 case nir_intrinsic_shared_atomic:
312 case nir_intrinsic_ssbo_atomic: {
313 nir_alu_type type = nir_atomic_op_type(nir_intrinsic_atomic_op(insn));
314 isFloat = type == nir_type_float;
315 isSigned = type == nir_type_int;
316 break;
317 }
318 default:
319 isFloat = false;
320 isSigned = false;
321 break;
322 }
323
324 return typeOfSize(insn->def.bit_size / 8, isFloat, isSigned);
325 }
326
327 DataType
getDType(nir_op op,uint8_t bitSize)328 Converter::getDType(nir_op op, uint8_t bitSize)
329 {
330 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
331 if (ty == TYPE_NONE) {
332 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
333 assert(false);
334 }
335 return ty;
336 }
337
338 std::vector<DataType>
getSTypes(nir_alu_instr * insn)339 Converter::getSTypes(nir_alu_instr *insn)
340 {
341 const nir_op_info &info = nir_op_infos[insn->op];
342 std::vector<DataType> res(info.num_inputs);
343
344 for (uint8_t i = 0; i < info.num_inputs; ++i) {
345 if (info.input_types[i] != nir_type_invalid) {
346 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
347 } else {
348 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
349 assert(false);
350 res[i] = TYPE_NONE;
351 break;
352 }
353 }
354
355 return res;
356 }
357
358 DataType
getSType(nir_src & src,bool isFloat,bool isSigned)359 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
360 {
361 const uint8_t bitSize = src.ssa->bit_size;
362
363 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
364 if (ty == TYPE_NONE) {
365 const char *str;
366 if (isFloat)
367 str = "float";
368 else if (isSigned)
369 str = "int";
370 else
371 str = "uint";
372 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
373 assert(false);
374 }
375 return ty;
376 }
377
378 DataFile
getFile(nir_intrinsic_op op) const379 Converter::getFile(nir_intrinsic_op op) const
380 {
381 switch (op) {
382 case nir_intrinsic_load_uniform:
383 case nir_intrinsic_load_ubo:
384 case nir_intrinsic_ldc_nv:
385 return FILE_MEMORY_CONST;
386 case nir_intrinsic_load_ssbo:
387 case nir_intrinsic_store_ssbo:
388 return FILE_MEMORY_BUFFER;
389 case nir_intrinsic_load_global:
390 case nir_intrinsic_store_global:
391 case nir_intrinsic_load_global_constant:
392 return FILE_MEMORY_GLOBAL;
393 case nir_intrinsic_load_scratch:
394 case nir_intrinsic_store_scratch:
395 return FILE_MEMORY_LOCAL;
396 case nir_intrinsic_load_shared:
397 case nir_intrinsic_store_shared:
398 return FILE_MEMORY_SHARED;
399 case nir_intrinsic_load_input:
400 case nir_intrinsic_load_interpolated_input:
401 case nir_intrinsic_load_kernel_input:
402 case nir_intrinsic_load_per_vertex_input:
403 return FILE_SHADER_INPUT;
404 case nir_intrinsic_load_output:
405 case nir_intrinsic_load_per_vertex_output:
406 case nir_intrinsic_store_output:
407 case nir_intrinsic_store_per_vertex_output:
408 return FILE_SHADER_OUTPUT;
409 default:
410 ERROR("couldn't get DataFile for op %s\n", nir_intrinsic_infos[op].name);
411 assert(false);
412 }
413 return FILE_NULL;
414 }
415
416 operation
getOperation(nir_op op)417 Converter::getOperation(nir_op op)
418 {
419 switch (op) {
420 // basic ops with float and int variants
421 case nir_op_fabs:
422 case nir_op_iabs:
423 return OP_ABS;
424 case nir_op_fadd:
425 case nir_op_iadd:
426 return OP_ADD;
427 case nir_op_iand:
428 return OP_AND;
429 case nir_op_ifind_msb:
430 case nir_op_ufind_msb:
431 return OP_BFIND;
432 case nir_op_fceil:
433 return OP_CEIL;
434 case nir_op_fcos:
435 return OP_COS;
436 case nir_op_f2f32:
437 case nir_op_f2f64:
438 case nir_op_f2i8:
439 case nir_op_f2i16:
440 case nir_op_f2i32:
441 case nir_op_f2i64:
442 case nir_op_f2u8:
443 case nir_op_f2u16:
444 case nir_op_f2u32:
445 case nir_op_f2u64:
446 case nir_op_i2f32:
447 case nir_op_i2f64:
448 case nir_op_i2i8:
449 case nir_op_i2i16:
450 case nir_op_i2i32:
451 case nir_op_i2i64:
452 case nir_op_u2f32:
453 case nir_op_u2f64:
454 case nir_op_u2u8:
455 case nir_op_u2u16:
456 case nir_op_u2u32:
457 case nir_op_u2u64:
458 return OP_CVT;
459 case nir_op_fdiv:
460 case nir_op_idiv:
461 case nir_op_udiv:
462 return OP_DIV;
463 case nir_op_fexp2:
464 return OP_EX2;
465 case nir_op_ffloor:
466 return OP_FLOOR;
467 case nir_op_ffma:
468 case nir_op_ffmaz:
469 /* No FMA op pre-nvc0 */
470 if (info->target < 0xc0)
471 return OP_MAD;
472 return OP_FMA;
473 case nir_op_flog2:
474 return OP_LG2;
475 case nir_op_fmax:
476 case nir_op_imax:
477 case nir_op_umax:
478 return OP_MAX;
479 case nir_op_pack_64_2x32_split:
480 return OP_MERGE;
481 case nir_op_fmin:
482 case nir_op_imin:
483 case nir_op_umin:
484 return OP_MIN;
485 case nir_op_fmod:
486 case nir_op_imod:
487 case nir_op_umod:
488 case nir_op_frem:
489 case nir_op_irem:
490 return OP_MOD;
491 case nir_op_fmul:
492 case nir_op_fmulz:
493 case nir_op_amul:
494 case nir_op_imul:
495 case nir_op_imul_high:
496 case nir_op_umul_high:
497 return OP_MUL;
498 case nir_op_fneg:
499 case nir_op_ineg:
500 return OP_NEG;
501 case nir_op_inot:
502 return OP_NOT;
503 case nir_op_ior:
504 return OP_OR;
505 case nir_op_frcp:
506 return OP_RCP;
507 case nir_op_frsq:
508 return OP_RSQ;
509 case nir_op_fsat:
510 return OP_SAT;
511 case nir_op_ieq8:
512 case nir_op_ige8:
513 case nir_op_uge8:
514 case nir_op_ilt8:
515 case nir_op_ult8:
516 case nir_op_ine8:
517 case nir_op_ieq16:
518 case nir_op_ige16:
519 case nir_op_uge16:
520 case nir_op_ilt16:
521 case nir_op_ult16:
522 case nir_op_ine16:
523 case nir_op_feq32:
524 case nir_op_ieq32:
525 case nir_op_fge32:
526 case nir_op_ige32:
527 case nir_op_uge32:
528 case nir_op_flt32:
529 case nir_op_ilt32:
530 case nir_op_ult32:
531 case nir_op_fneu32:
532 case nir_op_ine32:
533 return OP_SET;
534 case nir_op_ishl:
535 return OP_SHL;
536 case nir_op_ishr:
537 case nir_op_ushr:
538 return OP_SHR;
539 case nir_op_fsin:
540 return OP_SIN;
541 case nir_op_fsqrt:
542 return OP_SQRT;
543 case nir_op_ftrunc:
544 return OP_TRUNC;
545 case nir_op_ixor:
546 return OP_XOR;
547 default:
548 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
549 assert(false);
550 return OP_NOP;
551 }
552 }
553
554 operation
getOperation(nir_texop op)555 Converter::getOperation(nir_texop op)
556 {
557 switch (op) {
558 case nir_texop_tex:
559 return OP_TEX;
560 case nir_texop_lod:
561 return OP_TXLQ;
562 case nir_texop_txb:
563 return OP_TXB;
564 case nir_texop_txd:
565 return OP_TXD;
566 case nir_texop_txf:
567 case nir_texop_txf_ms:
568 return OP_TXF;
569 case nir_texop_tg4:
570 return OP_TXG;
571 case nir_texop_txl:
572 return OP_TXL;
573 case nir_texop_query_levels:
574 case nir_texop_texture_samples:
575 case nir_texop_txs:
576 return OP_TXQ;
577 default:
578 ERROR("couldn't get operation for nir_texop %u\n", op);
579 assert(false);
580 return OP_NOP;
581 }
582 }
583
584 operation
getOperation(nir_intrinsic_op op)585 Converter::getOperation(nir_intrinsic_op op)
586 {
587 switch (op) {
588 case nir_intrinsic_emit_vertex:
589 return OP_EMIT;
590 case nir_intrinsic_end_primitive:
591 return OP_RESTART;
592 case nir_intrinsic_bindless_image_atomic:
593 case nir_intrinsic_image_atomic:
594 case nir_intrinsic_bindless_image_atomic_swap:
595 case nir_intrinsic_image_atomic_swap:
596 return OP_SUREDP;
597 case nir_intrinsic_bindless_image_load:
598 case nir_intrinsic_image_load:
599 return OP_SULDP;
600 case nir_intrinsic_bindless_image_samples:
601 case nir_intrinsic_image_samples:
602 case nir_intrinsic_bindless_image_size:
603 case nir_intrinsic_image_size:
604 return OP_SUQ;
605 case nir_intrinsic_bindless_image_store:
606 case nir_intrinsic_image_store:
607 return OP_SUSTP;
608 case nir_intrinsic_ddx:
609 case nir_intrinsic_ddx_coarse:
610 case nir_intrinsic_ddx_fine:
611 return OP_DFDX;
612 case nir_intrinsic_ddy:
613 case nir_intrinsic_ddy_coarse:
614 case nir_intrinsic_ddy_fine:
615 return OP_DFDY;
616 default:
617 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
618 assert(false);
619 return OP_NOP;
620 }
621 }
622
623 operation
preOperationNeeded(nir_op op)624 Converter::preOperationNeeded(nir_op op)
625 {
626 switch (op) {
627 case nir_op_fcos:
628 case nir_op_fsin:
629 return OP_PRESIN;
630 default:
631 return OP_NOP;
632 }
633 }
634
635 int
getSubOp(nir_op op)636 Converter::getSubOp(nir_op op)
637 {
638 switch (op) {
639 case nir_op_imul_high:
640 case nir_op_umul_high:
641 return NV50_IR_SUBOP_MUL_HIGH;
642 case nir_op_ishl:
643 case nir_op_ishr:
644 case nir_op_ushr:
645 return NV50_IR_SUBOP_SHIFT_WRAP;
646 default:
647 return 0;
648 }
649 }
650
651 int
getAtomicSubOp(nir_atomic_op op)652 Converter::getAtomicSubOp(nir_atomic_op op)
653 {
654 switch (op) {
655 case nir_atomic_op_fadd:
656 case nir_atomic_op_iadd:
657 return NV50_IR_SUBOP_ATOM_ADD;
658 case nir_atomic_op_iand:
659 return NV50_IR_SUBOP_ATOM_AND;
660 case nir_atomic_op_cmpxchg:
661 return NV50_IR_SUBOP_ATOM_CAS;
662 case nir_atomic_op_imax:
663 case nir_atomic_op_umax:
664 return NV50_IR_SUBOP_ATOM_MAX;
665 case nir_atomic_op_imin:
666 case nir_atomic_op_umin:
667 return NV50_IR_SUBOP_ATOM_MIN;
668 case nir_atomic_op_xchg:
669 return NV50_IR_SUBOP_ATOM_EXCH;
670 case nir_atomic_op_ior:
671 return NV50_IR_SUBOP_ATOM_OR;
672 case nir_atomic_op_ixor:
673 return NV50_IR_SUBOP_ATOM_XOR;
674 case nir_atomic_op_dec_wrap:
675 return NV50_IR_SUBOP_ATOM_DEC;
676 case nir_atomic_op_inc_wrap:
677 return NV50_IR_SUBOP_ATOM_INC;
678 default:
679 ERROR("couldn't get SubOp for atomic\n");
680 assert(false);
681 return 0;
682 }
683 }
684
685 int
getSubOp(nir_intrinsic_op op)686 Converter::getSubOp(nir_intrinsic_op op)
687 {
688 switch (op) {
689 case nir_intrinsic_vote_all:
690 return NV50_IR_SUBOP_VOTE_ALL;
691 case nir_intrinsic_vote_any:
692 return NV50_IR_SUBOP_VOTE_ANY;
693 case nir_intrinsic_vote_ieq:
694 return NV50_IR_SUBOP_VOTE_UNI;
695 default:
696 return 0;
697 }
698 }
699
700 CondCode
getCondCode(nir_op op)701 Converter::getCondCode(nir_op op)
702 {
703 switch (op) {
704 case nir_op_ieq8:
705 case nir_op_ieq16:
706 case nir_op_feq32:
707 case nir_op_ieq32:
708 return CC_EQ;
709 case nir_op_ige8:
710 case nir_op_uge8:
711 case nir_op_ige16:
712 case nir_op_uge16:
713 case nir_op_fge32:
714 case nir_op_ige32:
715 case nir_op_uge32:
716 return CC_GE;
717 case nir_op_ilt8:
718 case nir_op_ult8:
719 case nir_op_ilt16:
720 case nir_op_ult16:
721 case nir_op_flt32:
722 case nir_op_ilt32:
723 case nir_op_ult32:
724 return CC_LT;
725 case nir_op_fneu32:
726 return CC_NEU;
727 case nir_op_ine8:
728 case nir_op_ine16:
729 case nir_op_ine32:
730 return CC_NE;
731 default:
732 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
733 assert(false);
734 return CC_FL;
735 }
736 }
737
738 Converter::LValues&
convert(nir_def * def)739 Converter::convert(nir_def *def)
740 {
741 NirDefMap::iterator it = ssaDefs.find(def->index);
742 if (it != ssaDefs.end())
743 return it->second;
744
745 LValues newDef(def->num_components);
746 for (uint8_t i = 0; i < def->num_components; i++)
747 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
748 return ssaDefs[def->index] = newDef;
749 }
750
751 Value*
getSrc(nir_alu_src * src,uint8_t component)752 Converter::getSrc(nir_alu_src *src, uint8_t component)
753 {
754 return getSrc(&src->src, src->swizzle[component]);
755 }
756
757 Value*
getSrc(nir_src * src,uint8_t idx,bool indirect)758 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
759 {
760 return getSrc(src->ssa, idx);
761 }
762
763 Value*
getSrc(nir_def * src,uint8_t idx)764 Converter::getSrc(nir_def *src, uint8_t idx)
765 {
766 ImmediateMap::iterator iit = immediates.find(src->index);
767 if (iit != immediates.end())
768 return convert((*iit).second, idx);
769
770 NirDefMap::iterator it = ssaDefs.find(src->index);
771 if (it == ssaDefs.end()) {
772 ERROR("SSA value %u not found\n", src->index);
773 assert(false);
774 return NULL;
775 }
776 return it->second[idx];
777 }
778
779 uint32_t
getIndirect(nir_src * src,uint8_t idx,Value * & indirect)780 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
781 {
782 nir_const_value *offset = nir_src_as_const_value(*src);
783
784 if (offset) {
785 indirect = NULL;
786 return offset[0].u32;
787 }
788
789 indirect = getSrc(src, idx, true);
790 return 0;
791 }
792
793 uint32_t
getIndirect(nir_intrinsic_instr * insn,uint8_t s,uint8_t c,Value * & indirect,bool isScalar)794 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
795 {
796 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
797
798 if (indirect && !isScalar)
799 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
800 return idx;
801 }
802
803 static void
vert_attrib_to_tgsi_semantic(gl_vert_attrib slot,unsigned * name,unsigned * index)804 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
805 {
806 assert(name && index);
807
808 if (slot >= VERT_ATTRIB_MAX) {
809 ERROR("invalid varying slot %u\n", slot);
810 assert(false);
811 return;
812 }
813
814 if (slot >= VERT_ATTRIB_GENERIC0 &&
815 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
816 *name = TGSI_SEMANTIC_GENERIC;
817 *index = slot - VERT_ATTRIB_GENERIC0;
818 return;
819 }
820
821 if (slot >= VERT_ATTRIB_TEX0 &&
822 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
823 *name = TGSI_SEMANTIC_TEXCOORD;
824 *index = slot - VERT_ATTRIB_TEX0;
825 return;
826 }
827
828 switch (slot) {
829 case VERT_ATTRIB_COLOR0:
830 *name = TGSI_SEMANTIC_COLOR;
831 *index = 0;
832 break;
833 case VERT_ATTRIB_COLOR1:
834 *name = TGSI_SEMANTIC_COLOR;
835 *index = 1;
836 break;
837 case VERT_ATTRIB_EDGEFLAG:
838 *name = TGSI_SEMANTIC_EDGEFLAG;
839 *index = 0;
840 break;
841 case VERT_ATTRIB_FOG:
842 *name = TGSI_SEMANTIC_FOG;
843 *index = 0;
844 break;
845 case VERT_ATTRIB_NORMAL:
846 *name = TGSI_SEMANTIC_NORMAL;
847 *index = 0;
848 break;
849 case VERT_ATTRIB_POS:
850 *name = TGSI_SEMANTIC_POSITION;
851 *index = 0;
852 break;
853 case VERT_ATTRIB_POINT_SIZE:
854 *name = TGSI_SEMANTIC_PSIZE;
855 *index = 0;
856 break;
857 default:
858 ERROR("unknown vert attrib slot %u\n", slot);
859 assert(false);
860 break;
861 }
862 }
863
864 uint8_t
translateInterpMode(const struct nv50_ir_varying * var,operation & op)865 Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
866 {
867 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
868
869 if (var->flat)
870 mode = NV50_IR_INTERP_FLAT;
871 else
872 if (var->linear)
873 mode = NV50_IR_INTERP_LINEAR;
874 else
875 if (var->sc)
876 mode = NV50_IR_INTERP_SC;
877
878 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
879 ? OP_PINTERP : OP_LINTERP;
880
881 if (var->centroid)
882 mode |= NV50_IR_INTERP_CENTROID;
883
884 return mode;
885 }
886
887 void
setInterpolate(nv50_ir_varying * var,uint8_t mode,bool centroid,unsigned semantic)888 Converter::setInterpolate(nv50_ir_varying *var,
889 uint8_t mode,
890 bool centroid,
891 unsigned semantic)
892 {
893 switch (mode) {
894 case INTERP_MODE_FLAT:
895 var->flat = 1;
896 break;
897 case INTERP_MODE_NONE:
898 if (semantic == TGSI_SEMANTIC_COLOR)
899 var->sc = 1;
900 else if (semantic == TGSI_SEMANTIC_POSITION)
901 var->linear = 1;
902 break;
903 case INTERP_MODE_NOPERSPECTIVE:
904 var->linear = 1;
905 break;
906 case INTERP_MODE_SMOOTH:
907 break;
908 }
909 var->centroid = centroid;
910 }
911
912 static uint16_t
calcSlots(const glsl_type * type,Program::Type stage,const shader_info & info,bool input,const nir_variable * var)913 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
914 bool input, const nir_variable *var)
915 {
916 if (!glsl_type_is_array(type))
917 return glsl_count_attribute_slots(type, false);
918
919 uint16_t slots;
920 switch (stage) {
921 case Program::TYPE_GEOMETRY:
922 slots = glsl_count_attribute_slots(type, false);
923 if (input)
924 slots /= info.gs.vertices_in;
925 break;
926 case Program::TYPE_TESSELLATION_CONTROL:
927 case Program::TYPE_TESSELLATION_EVAL:
928 // remove first dimension
929 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
930 slots = glsl_count_attribute_slots(type, false);
931 else
932 slots = glsl_count_attribute_slots(type->fields.array, false);
933 break;
934 default:
935 slots = glsl_count_attribute_slots(type, false);
936 break;
937 }
938
939 return slots;
940 }
941
942 static uint8_t
getMaskForType(const glsl_type * type,uint8_t slot)943 getMaskForType(const glsl_type *type, uint8_t slot) {
944 uint16_t comp = glsl_get_components(glsl_without_array(type));
945 comp = comp ? comp : 4;
946
947 if (glsl_base_type_is_64bit(glsl_without_array(type)->base_type)) {
948 comp *= 2;
949 if (comp > 4) {
950 if (slot % 2)
951 comp -= 4;
952 else
953 comp = 4;
954 }
955 }
956
957 return (1 << comp) - 1;
958 }
959
assignSlots()960 bool Converter::assignSlots() {
961 unsigned name;
962 unsigned index;
963
964 info->io.viewportId = -1;
965 info->io.mul_zero_wins = nir->info.use_legacy_math_rules;
966 info_out->numInputs = 0;
967 info_out->numOutputs = 0;
968 info_out->numSysVals = 0;
969
970 uint8_t i;
971 BITSET_FOREACH_SET(i, nir->info.system_values_read, SYSTEM_VALUE_MAX) {
972 switch (i) {
973 case SYSTEM_VALUE_VERTEX_ID:
974 info_out->io.vertexId = info_out->numSysVals;
975 break;
976 case SYSTEM_VALUE_INSTANCE_ID:
977 info_out->io.instanceId = info_out->numSysVals;
978 break;
979 default:
980 break;
981 }
982
983 info_out->sv[info_out->numSysVals].sn = (gl_system_value)i;
984 info_out->numSysVals += 1;
985 }
986
987 if (prog->getType() == Program::TYPE_COMPUTE)
988 return true;
989
990 nir_foreach_shader_in_variable(var, nir) {
991 const glsl_type *type = var->type;
992 int slot = var->data.location;
993 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
994 uint32_t vary = var->data.driver_location;
995 assert(vary + slots <= NV50_CODEGEN_MAX_VARYINGS);
996
997 switch(prog->getType()) {
998 case Program::TYPE_FRAGMENT:
999 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1000 &name, &index);
1001 for (uint16_t i = 0; i < slots; ++i) {
1002 setInterpolate(&info_out->in[vary + i], var->data.interpolation,
1003 var->data.centroid | var->data.sample, name);
1004 }
1005 break;
1006 case Program::TYPE_GEOMETRY:
1007 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1008 &name, &index);
1009 break;
1010 case Program::TYPE_TESSELLATION_CONTROL:
1011 case Program::TYPE_TESSELLATION_EVAL:
1012 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1013 &name, &index);
1014 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1015 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
1016 break;
1017 case Program::TYPE_VERTEX:
1018 if (slot >= VERT_ATTRIB_GENERIC0 && slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX)
1019 slot = VERT_ATTRIB_GENERIC0 + vary;
1020 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1021 switch (name) {
1022 case TGSI_SEMANTIC_EDGEFLAG:
1023 info_out->io.edgeFlagIn = vary;
1024 break;
1025 default:
1026 break;
1027 }
1028 break;
1029 default:
1030 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1031 return false;
1032 }
1033
1034 if (var->data.compact) {
1035 assert(!(nir->info.outputs_read & 1ull << slot));
1036 if (nir_is_arrayed_io(var, nir->info.stage)) {
1037 assert(glsl_type_is_array(type->fields.array));
1038 assert(glsl_type_is_scalar(type->fields.array->fields.array));
1039 assert(slots == glsl_get_length(type->fields.array));
1040 } else {
1041 assert(glsl_type_is_array(type));
1042 assert(glsl_type_is_scalar(type->fields.array));
1043 assert(slots == glsl_get_length(type));
1044 }
1045 assert(!glsl_base_type_is_64bit(glsl_without_array(type)->base_type));
1046
1047 uint32_t comps = BITFIELD_RANGE(var->data.location_frac, slots);
1048 assert(!(comps & ~0xff));
1049
1050 if (comps & 0x0f) {
1051 nv50_ir_varying *v = &info_out->in[vary];
1052 v->patch = var->data.patch;
1053 v->sn = name;
1054 v->si = index;
1055 v->mask |= comps & 0x0f;
1056 info_out->numInputs =
1057 std::max<uint8_t>(info_out->numInputs, vary + 1);
1058 }
1059 if (comps & 0xf0) {
1060 nv50_ir_varying *v = &info_out->in[vary + 1];
1061 v->patch = var->data.patch;
1062 v->sn = name;
1063 v->si = index + 1;
1064 v->mask |= (comps & 0xf0) >> 4;
1065 info_out->numInputs =
1066 std::max<uint8_t>(info_out->numInputs, vary + 2);
1067 }
1068 } else {
1069 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1070 nv50_ir_varying *v = &info_out->in[vary];
1071
1072 v->patch = var->data.patch;
1073 v->sn = name;
1074 v->si = index + i;
1075 v->mask |= getMaskForType(type, i) << var->data.location_frac;
1076 }
1077 info_out->numInputs = std::max<uint8_t>(info_out->numInputs, vary);
1078 }
1079 }
1080
1081 nir_foreach_shader_out_variable(var, nir) {
1082 const glsl_type *type = var->type;
1083 int slot = var->data.location;
1084 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1085 uint32_t vary = var->data.driver_location;
1086
1087 assert(vary < NV50_CODEGEN_MAX_VARYINGS);
1088
1089 switch(prog->getType()) {
1090 case Program::TYPE_FRAGMENT:
1091 tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1092 switch (name) {
1093 case TGSI_SEMANTIC_COLOR:
1094 if (!var->data.fb_fetch_output)
1095 info_out->prop.fp.numColourResults++;
1096 if (var->data.location == FRAG_RESULT_COLOR &&
1097 nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
1098 info_out->prop.fp.separateFragData = true;
1099 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1100 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1101 index = index == 0 ? var->data.index : index;
1102 break;
1103 case TGSI_SEMANTIC_POSITION:
1104 info_out->io.fragDepth = vary;
1105 info_out->prop.fp.writesDepth = true;
1106 break;
1107 case TGSI_SEMANTIC_SAMPLEMASK:
1108 info_out->io.sampleMask = vary;
1109 break;
1110 default:
1111 break;
1112 }
1113 break;
1114 case Program::TYPE_GEOMETRY:
1115 case Program::TYPE_TESSELLATION_CONTROL:
1116 case Program::TYPE_TESSELLATION_EVAL:
1117 case Program::TYPE_VERTEX:
1118 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1119 &name, &index);
1120
1121 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1122 name != TGSI_SEMANTIC_TESSOUTER)
1123 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
1124
1125 switch (name) {
1126 case TGSI_SEMANTIC_EDGEFLAG:
1127 info_out->io.edgeFlagOut = vary;
1128 break;
1129 default:
1130 break;
1131 }
1132 break;
1133 default:
1134 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1135 return false;
1136 }
1137
1138 if (var->data.compact) {
1139 assert(!(nir->info.outputs_read & 1ull << slot));
1140 if (nir_is_arrayed_io(var, nir->info.stage)) {
1141 assert(glsl_type_is_array(type->fields.array));
1142 assert(glsl_type_is_scalar(type->fields.array->fields.array));
1143 assert(slots == glsl_get_length(type->fields.array));
1144 } else {
1145 assert(glsl_type_is_array(type));
1146 assert(glsl_type_is_scalar(type->fields.array));
1147 assert(slots == glsl_get_length(type));
1148 }
1149 assert(!glsl_base_type_is_64bit(glsl_without_array(type)->base_type));
1150
1151 uint32_t comps = BITFIELD_RANGE(var->data.location_frac, slots);
1152 assert(!(comps & ~0xff));
1153
1154 if (comps & 0x0f) {
1155 nv50_ir_varying *v = &info_out->out[vary];
1156 v->patch = var->data.patch;
1157 v->sn = name;
1158 v->si = index;
1159 v->mask |= comps & 0x0f;
1160 info_out->numOutputs =
1161 std::max<uint8_t>(info_out->numOutputs, vary + 1);
1162 }
1163 if (comps & 0xf0) {
1164 nv50_ir_varying *v = &info_out->out[vary + 1];
1165 v->patch = var->data.patch;
1166 v->sn = name;
1167 v->si = index + 1;
1168 v->mask |= (comps & 0xf0) >> 4;
1169 info_out->numOutputs =
1170 std::max<uint8_t>(info_out->numOutputs, vary + 2);
1171 }
1172 } else {
1173 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1174 nv50_ir_varying *v = &info_out->out[vary];
1175 v->patch = var->data.patch;
1176 v->sn = name;
1177 v->si = index + i;
1178 v->mask |= getMaskForType(type, i) << var->data.location_frac;
1179
1180 if (nir->info.outputs_read & 1ull << slot)
1181 v->oread = 1;
1182 }
1183 info_out->numOutputs = std::max<uint8_t>(info_out->numOutputs, vary);
1184 }
1185 }
1186
1187 return info->assignSlots(info_out) == 0;
1188 }
1189
1190 uint32_t
getSlotAddress(nir_intrinsic_instr * insn,uint8_t idx,uint8_t slot)1191 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1192 {
1193 DataType ty;
1194 int offset = nir_intrinsic_component(insn);
1195 bool input;
1196
1197 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1198 ty = getDType(insn);
1199 else
1200 ty = getSType(insn->src[0], false, false);
1201
1202 switch (insn->intrinsic) {
1203 case nir_intrinsic_load_input:
1204 case nir_intrinsic_load_interpolated_input:
1205 case nir_intrinsic_load_per_vertex_input:
1206 input = true;
1207 break;
1208 case nir_intrinsic_load_output:
1209 case nir_intrinsic_load_per_vertex_output:
1210 case nir_intrinsic_store_output:
1211 case nir_intrinsic_store_per_vertex_output:
1212 input = false;
1213 break;
1214 default:
1215 ERROR("unknown intrinsic in getSlotAddress %s",
1216 nir_intrinsic_infos[insn->intrinsic].name);
1217 input = false;
1218 assert(false);
1219 break;
1220 }
1221
1222 if (typeSizeof(ty) == 8) {
1223 slot *= 2;
1224 slot += offset;
1225 if (slot >= 4) {
1226 idx += 1;
1227 slot -= 4;
1228 }
1229 } else {
1230 slot += offset;
1231 }
1232
1233 assert(slot < 4);
1234 assert(!input || idx < NV50_CODEGEN_MAX_VARYINGS);
1235 assert(input || idx < NV50_CODEGEN_MAX_VARYINGS);
1236
1237 const nv50_ir_varying *vary = input ? info_out->in : info_out->out;
1238 return vary[idx].slot[slot] * 4;
1239 }
1240
1241 Instruction *
loadFrom(DataFile file,uint8_t i,DataType ty,Value * def,uint32_t base,uint8_t c,Value * indirect0,Value * indirect1,bool patch,CacheMode cache)1242 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1243 uint32_t base, uint8_t c, Value *indirect0,
1244 Value *indirect1, bool patch, CacheMode cache)
1245 {
1246 unsigned int tySize = typeSizeof(ty);
1247
1248 if (tySize == 8 &&
1249 (indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) {
1250 Value *lo = getSSA();
1251 Value *hi = getSSA();
1252
1253 Instruction *loi =
1254 mkLoad(TYPE_U32, lo,
1255 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1256 indirect0);
1257 loi->setIndirect(0, 1, indirect1);
1258 loi->perPatch = patch;
1259 loi->cache = cache;
1260
1261 Instruction *hii =
1262 mkLoad(TYPE_U32, hi,
1263 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1264 indirect0);
1265 hii->setIndirect(0, 1, indirect1);
1266 hii->perPatch = patch;
1267 hii->cache = cache;
1268
1269 return mkOp2(OP_MERGE, ty, def, lo, hi);
1270 } else {
1271 Instruction *ld =
1272 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1273 ld->setIndirect(0, 1, indirect1);
1274 ld->perPatch = patch;
1275 ld->cache = cache;
1276 return ld;
1277 }
1278 }
1279
1280 Instruction *
loadVector(nir_intrinsic_instr * insn,uint8_t buffer,Value * indirectBuffer,uint32_t offset,Value * indirectOffset)1281 Converter::loadVector(nir_intrinsic_instr *insn,
1282 uint8_t buffer, Value *indirectBuffer,
1283 uint32_t offset, Value *indirectOffset)
1284 {
1285 uint32_t load_bytes = insn->def.bit_size / 8 * insn->def.num_components;
1286 DataType ty = typeOfSize(load_bytes, false, false);
1287 DataFile file = getFile(insn->intrinsic);
1288
1289 LValues &newDefs = convert(&insn->def);
1290 Value* def;
1291 if (insn->def.num_components == 1) {
1292 def = newDefs[0];
1293 } else {
1294 def = getSSA(load_bytes);
1295 }
1296
1297 Instruction *ld = mkLoad(ty, def, mkSymbol(file, buffer, ty, offset), indirectOffset);
1298 ld->setIndirect(0, 1, indirectBuffer);
1299
1300 if (insn->def.num_components != 1) {
1301 Instruction *split = mkOp1(OP_SPLIT, ty, newDefs[0], def);
1302 for (int i = 1; i < insn->def.num_components; i++) {
1303 split->setDef(i, newDefs[i]);
1304 }
1305 }
1306
1307 return ld;
1308 }
1309
1310 void
storeTo(nir_intrinsic_instr * insn,DataFile file,operation op,DataType ty,Value * src,uint8_t idx,uint8_t c,Value * indirect0,Value * indirect1)1311 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1312 DataType ty, Value *src, uint8_t idx, uint8_t c,
1313 Value *indirect0, Value *indirect1)
1314 {
1315 uint8_t size = typeSizeof(ty);
1316 uint32_t address = getSlotAddress(insn, idx, c);
1317
1318 if (size == 8 && indirect0) {
1319 Value *split[2];
1320 mkSplit(split, 4, src);
1321
1322 if (op == OP_EXPORT) {
1323 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1324 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1325 }
1326
1327 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1328 split[0])->perPatch = info_out->out[idx].patch;
1329 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1330 split[1])->perPatch = info_out->out[idx].patch;
1331 } else {
1332 if (op == OP_EXPORT)
1333 src = mkMov(getSSA(size), src, ty)->getDef(0);
1334 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1335 src)->perPatch = info_out->out[idx].patch;
1336 }
1337 }
1338
1339 Instruction *
storeVector(nir_intrinsic_instr * insn,uint8_t buffer,Value * indirectBuffer,uint32_t offset,Value * indirectOffset)1340 Converter::storeVector(nir_intrinsic_instr *insn,
1341 uint8_t buffer, Value *indirectBuffer,
1342 uint32_t offset, Value *indirectOffset)
1343 {
1344 const uint8_t num_components = insn->src[0].ssa->num_components;
1345 uint32_t bytes = insn->src[0].ssa->bit_size / 8 * num_components;
1346 DataType ty = typeOfSize(bytes, false, false);
1347 DataFile file = getFile(insn->intrinsic);
1348 assert(nir_intrinsic_write_mask(insn) == nir_component_mask(num_components));
1349
1350 Value* src;
1351 if (num_components == 1) {
1352 src = getSrc(&insn->src[0], 0);
1353 } else {
1354 src = getSSA(bytes);
1355
1356 Instruction *merge = mkOp(OP_MERGE, ty, src);
1357 for (int i = 0; i < num_components; i++) {
1358 merge->setSrc(i, getSrc(&insn->src[0], i));
1359 }
1360 }
1361
1362 Instruction *st = mkStore(OP_STORE, ty, mkSymbol(file, buffer, ty, offset),
1363 indirectOffset, src);
1364 st->setIndirect(0, 1, indirectBuffer);
1365
1366 return st;
1367 }
1368
1369 bool
memVectorizeCb(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,int64_t hole_size,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * cb_data)1370 Converter::memVectorizeCb(unsigned align_mul,
1371 unsigned align_offset,
1372 unsigned bit_size,
1373 unsigned num_components,
1374 int64_t hole_size,
1375 nir_intrinsic_instr *low,
1376 nir_intrinsic_instr *high,
1377 void *cb_data)
1378 {
1379 if (hole_size > 0)
1380 return false;
1381
1382 /*
1383 * Since we legalize these later with nir_lower_mem_access_bit_sizes,
1384 * we can optimistically combine anything that might be profitable
1385 */
1386 const Converter* converter = (Converter*) cb_data;
1387 const Target* target = converter->prog->getTarget();
1388
1389 assert(util_is_power_of_two_nonzero(align_mul));
1390 align_mul = std::min(align_mul, 128u / 8u);
1391
1392 const DataFile file = converter->getFile(low->intrinsic);
1393 if (align_mul == 128u / 8u && !target->isAccessSupported(file, TYPE_B128))
1394 align_mul = 64u / 8u;
1395
1396 if (align_mul == 64u / 8u && !target->isAccessSupported(file, TYPE_U64))
1397 align_mul = 32u / 8u;
1398
1399 align_offset = align_offset % align_mul;
1400
1401 return align_offset + num_components * (bit_size / 8) <= align_mul;
1402 }
1403
1404 nir_mem_access_size_align
getMemAccessSizeAlign(nir_intrinsic_op intrin,uint8_t original_bytes,uint8_t original_bit_size,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)1405 Converter::getMemAccessSizeAlign(nir_intrinsic_op intrin,
1406 uint8_t original_bytes,
1407 uint8_t original_bit_size,
1408 uint32_t align_mul,
1409 uint32_t align_offset,
1410 bool offset_is_const,
1411 enum gl_access_qualifier access,
1412 const void *cb_data)
1413 {
1414 const Converter* converter = (Converter*) cb_data;
1415 const Target* target = converter->prog->getTarget();
1416
1417 const uint32_t align = nir_combined_align(align_mul, align_offset);
1418
1419 assert(original_bytes != 0);
1420 uint32_t bytes = 1 << (util_last_bit(original_bytes) - 1);
1421
1422 bytes = std::min(bytes, align);
1423 bytes = std::min(bytes, 128u / 8u);
1424
1425 assert(util_is_power_of_two_nonzero(bytes));
1426
1427 const DataFile file = converter->getFile(intrin);
1428 if (bytes == 128u / 8u && !target->isAccessSupported(file, TYPE_B128))
1429 bytes = 64u / 8u;
1430
1431 if (bytes == 64u / 8u && !target->isAccessSupported(file, TYPE_U64))
1432 bytes = 32u / 8u;
1433
1434 uint32_t bit_size = original_bit_size;
1435 bit_size = std::max(bit_size, 32u);
1436 bit_size = std::min(bit_size, bytes * 8u);
1437
1438 return {
1439 .num_components = (uint8_t) (bytes / (bit_size / 8)),
1440 .bit_size = (uint8_t) bit_size,
1441 .align = (uint16_t) bytes,
1442 .shift = nir_mem_access_shift_method_scalar,
1443 };
1444 }
1445
1446 bool
parseNIR()1447 Converter::parseNIR()
1448 {
1449 info_out->bin.tlsSpace = nir->scratch_size;
1450 info_out->io.clipDistances = nir->info.clip_distance_array_size;
1451 info_out->io.cullDistances = nir->info.cull_distance_array_size;
1452 info_out->io.layer_viewport_relative = nir->info.layer_viewport_relative;
1453
1454 switch(prog->getType()) {
1455 case Program::TYPE_COMPUTE:
1456 info->prop.cp.numThreads[0] = nir->info.workgroup_size[0];
1457 info->prop.cp.numThreads[1] = nir->info.workgroup_size[1];
1458 info->prop.cp.numThreads[2] = nir->info.workgroup_size[2];
1459 info_out->bin.smemSize = std::max(info_out->bin.smemSize, nir->info.shared_size);
1460
1461 if (info->target < NVISA_GF100_CHIPSET) {
1462 int gmemSlot = 0;
1463
1464 for (unsigned i = 0; i < nir->info.num_ssbos; i++) {
1465 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 0, .slot = i};
1466 assert(gmemSlot < 16);
1467 }
1468 nir_foreach_image_variable(var, nir) {
1469 int image_count = glsl_type_get_image_count(var->type);
1470 for (int i = 0; i < image_count; i++) {
1471 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = var->data.binding + i};
1472 assert(gmemSlot < 16);
1473 }
1474 }
1475 }
1476
1477 break;
1478 case Program::TYPE_FRAGMENT:
1479 info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1480 prog->persampleInvocation =
1481 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
1482 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1483 info_out->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1484 info_out->prop.fp.readsSampleLocations =
1485 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1486 info_out->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1487 info_out->prop.fp.usesSampleMaskIn =
1488 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
1489 break;
1490 case Program::TYPE_GEOMETRY:
1491 info_out->prop.gp.instanceCount = nir->info.gs.invocations;
1492 info_out->prop.gp.maxVertices = nir->info.gs.vertices_out;
1493 info_out->prop.gp.outputPrim = nir->info.gs.output_primitive;
1494 break;
1495 case Program::TYPE_TESSELLATION_CONTROL:
1496 case Program::TYPE_TESSELLATION_EVAL:
1497 info_out->prop.tp.domain = u_tess_prim_from_shader(nir->info.tess._primitive_mode);
1498 info_out->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1499 info_out->prop.tp.outputPrim =
1500 nir->info.tess.point_mode ? MESA_PRIM_POINTS : MESA_PRIM_TRIANGLES;
1501 info_out->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1502 info_out->prop.tp.winding = !nir->info.tess.ccw;
1503 break;
1504 case Program::TYPE_VERTEX:
1505 info_out->prop.vp.usesDrawParameters =
1506 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX) ||
1507 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||
1508 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1509 break;
1510 default:
1511 break;
1512 }
1513
1514 return true;
1515 }
1516
1517 bool
visit(nir_function * function)1518 Converter::visit(nir_function *function)
1519 {
1520 assert(function->impl);
1521
1522 // usually the blocks will set everything up, but main is special
1523 BasicBlock *entry = new BasicBlock(prog->main);
1524 exit = new BasicBlock(prog->main);
1525 blocks[nir_start_block(function->impl)->index] = entry;
1526 prog->main->setEntry(entry);
1527 prog->main->setExit(exit);
1528
1529 setPosition(entry, true);
1530
1531 switch (prog->getType()) {
1532 case Program::TYPE_TESSELLATION_CONTROL:
1533 outBase = mkOp2v(
1534 OP_SUB, TYPE_U32, getSSA(),
1535 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1536 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1537 break;
1538 case Program::TYPE_FRAGMENT: {
1539 Symbol *sv = mkSysVal(SV_POSITION, 3);
1540 Value *temp = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1541 fp.position = mkOp1v(OP_RCP, TYPE_F32, temp, temp);
1542 break;
1543 }
1544 default:
1545 break;
1546 }
1547
1548 nir_index_ssa_defs(function->impl);
1549 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1550 if (!visit(node))
1551 return false;
1552 }
1553
1554 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1555 setPosition(exit, true);
1556
1557 // TODO: for non main function this needs to be a OP_RETURN
1558 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1559 return true;
1560 }
1561
1562 bool
visit(nir_cf_node * node)1563 Converter::visit(nir_cf_node *node)
1564 {
1565 switch (node->type) {
1566 case nir_cf_node_block:
1567 return visit(nir_cf_node_as_block(node));
1568 case nir_cf_node_if:
1569 return visit(nir_cf_node_as_if(node));
1570 case nir_cf_node_loop:
1571 return visit(nir_cf_node_as_loop(node));
1572 default:
1573 ERROR("unknown nir_cf_node type %u\n", node->type);
1574 return false;
1575 }
1576 }
1577
1578 bool
visit(nir_block * block)1579 Converter::visit(nir_block *block)
1580 {
1581 if (!block->predecessors->entries && block->instr_list.is_empty())
1582 return true;
1583
1584 BasicBlock *bb = convert(block);
1585
1586 setPosition(bb, true);
1587 nir_foreach_instr(insn, block) {
1588 if (!visit(insn))
1589 return false;
1590 }
1591 return true;
1592 }
1593
1594 bool
visit(nir_if * nif)1595 Converter::visit(nir_if *nif)
1596 {
1597 curIfDepth++;
1598
1599 DataType sType = getSType(nif->condition, false, false);
1600 Value *src = getSrc(&nif->condition, 0);
1601
1602 nir_block *lastThen = nir_if_last_then_block(nif);
1603 nir_block *lastElse = nir_if_last_else_block(nif);
1604
1605 BasicBlock *headBB = bb;
1606 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1607 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1608
1609 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1610 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1611
1612 bool insertJoins = lastThen->successors[0] == lastElse->successors[0];
1613 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1614
1615 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1616 if (!visit(node))
1617 return false;
1618 }
1619
1620 setPosition(convert(lastThen), true);
1621 if (!bb->isTerminated()) {
1622 BasicBlock *tailBB = convert(lastThen->successors[0]);
1623 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1624 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1625 } else {
1626 insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1627 }
1628
1629 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1630 if (!visit(node))
1631 return false;
1632 }
1633
1634 setPosition(convert(lastElse), true);
1635 if (!bb->isTerminated()) {
1636 BasicBlock *tailBB = convert(lastElse->successors[0]);
1637 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1638 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1639 } else {
1640 insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1641 }
1642
1643 if (curIfDepth > 6) {
1644 insertJoins = false;
1645 }
1646
1647 /* we made sure that all threads would converge at the same block */
1648 if (insertJoins) {
1649 BasicBlock *conv = convert(lastThen->successors[0]);
1650 setPosition(headBB->getExit(), false);
1651 headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL);
1652 setPosition(conv, false);
1653 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1654 }
1655
1656 curIfDepth--;
1657
1658 return true;
1659 }
1660
1661 // TODO: add convergency
1662 bool
visit(nir_loop * loop)1663 Converter::visit(nir_loop *loop)
1664 {
1665 assert(!nir_loop_has_continue_construct(loop));
1666 curLoopDepth += 1;
1667 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1668
1669 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1670 BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1671
1672 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1673
1674 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1675 setPosition(loopBB, false);
1676 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1677
1678 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1679 if (!visit(node))
1680 return false;
1681 }
1682
1683 if (!bb->isTerminated()) {
1684 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1685 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1686 }
1687
1688 if (tailBB->cfg.incidentCount() == 0)
1689 loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1690
1691 curLoopDepth -= 1;
1692
1693 info_out->loops++;
1694
1695 return true;
1696 }
1697
1698 bool
visit(nir_instr * insn)1699 Converter::visit(nir_instr *insn)
1700 {
1701 // we need an insertion point for on the fly generated immediate loads
1702 immInsertPos = bb->getExit();
1703 switch (insn->type) {
1704 case nir_instr_type_alu:
1705 return visit(nir_instr_as_alu(insn));
1706 case nir_instr_type_intrinsic:
1707 return visit(nir_instr_as_intrinsic(insn));
1708 case nir_instr_type_jump:
1709 return visit(nir_instr_as_jump(insn));
1710 case nir_instr_type_load_const:
1711 return visit(nir_instr_as_load_const(insn));
1712 case nir_instr_type_undef:
1713 return visit(nir_instr_as_undef(insn));
1714 case nir_instr_type_tex:
1715 return visit(nir_instr_as_tex(insn));
1716 default:
1717 ERROR("unknown nir_instr type %u\n", insn->type);
1718 return false;
1719 }
1720 return true;
1721 }
1722
1723 SVSemantic
convert(nir_intrinsic_op intr)1724 Converter::convert(nir_intrinsic_op intr)
1725 {
1726 switch (intr) {
1727 case nir_intrinsic_load_base_vertex:
1728 return SV_BASEVERTEX;
1729 case nir_intrinsic_load_base_instance:
1730 return SV_BASEINSTANCE;
1731 case nir_intrinsic_load_draw_id:
1732 return SV_DRAWID;
1733 case nir_intrinsic_load_front_face:
1734 return SV_FACE;
1735 case nir_intrinsic_is_helper_invocation:
1736 case nir_intrinsic_load_helper_invocation:
1737 return SV_THREAD_KILL;
1738 case nir_intrinsic_load_instance_id:
1739 return SV_INSTANCE_ID;
1740 case nir_intrinsic_load_invocation_id:
1741 return SV_INVOCATION_ID;
1742 case nir_intrinsic_load_workgroup_size:
1743 return SV_NTID;
1744 case nir_intrinsic_load_local_invocation_id:
1745 return SV_TID;
1746 case nir_intrinsic_load_num_workgroups:
1747 return SV_NCTAID;
1748 case nir_intrinsic_load_patch_vertices_in:
1749 return SV_VERTEX_COUNT;
1750 case nir_intrinsic_load_primitive_id:
1751 return SV_PRIMITIVE_ID;
1752 case nir_intrinsic_load_sample_id:
1753 return SV_SAMPLE_INDEX;
1754 case nir_intrinsic_load_sample_mask_in:
1755 return SV_SAMPLE_MASK;
1756 case nir_intrinsic_load_sample_pos:
1757 return SV_SAMPLE_POS;
1758 case nir_intrinsic_load_subgroup_eq_mask:
1759 return SV_LANEMASK_EQ;
1760 case nir_intrinsic_load_subgroup_ge_mask:
1761 return SV_LANEMASK_GE;
1762 case nir_intrinsic_load_subgroup_gt_mask:
1763 return SV_LANEMASK_GT;
1764 case nir_intrinsic_load_subgroup_le_mask:
1765 return SV_LANEMASK_LE;
1766 case nir_intrinsic_load_subgroup_lt_mask:
1767 return SV_LANEMASK_LT;
1768 case nir_intrinsic_load_subgroup_invocation:
1769 return SV_LANEID;
1770 case nir_intrinsic_load_tess_coord:
1771 return SV_TESS_COORD;
1772 case nir_intrinsic_load_tess_level_inner:
1773 return SV_TESS_INNER;
1774 case nir_intrinsic_load_tess_level_outer:
1775 return SV_TESS_OUTER;
1776 case nir_intrinsic_load_vertex_id:
1777 return SV_VERTEX_ID;
1778 case nir_intrinsic_load_workgroup_id:
1779 return SV_CTAID;
1780 case nir_intrinsic_load_work_dim:
1781 return SV_WORK_DIM;
1782 default:
1783 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1784 nir_intrinsic_infos[intr].name);
1785 assert(false);
1786 return SV_LAST;
1787 }
1788 }
1789
1790 bool
visit(nir_intrinsic_instr * insn)1791 Converter::visit(nir_intrinsic_instr *insn)
1792 {
1793 nir_intrinsic_op op = insn->intrinsic;
1794 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1795 unsigned dest_components = nir_intrinsic_dest_components(insn);
1796
1797 switch (op) {
1798 case nir_intrinsic_decl_reg: {
1799 const unsigned reg_index = insn->def.index;
1800 const unsigned bit_size = nir_intrinsic_bit_size(insn);
1801 const unsigned num_components = nir_intrinsic_num_components(insn);
1802 assert(nir_intrinsic_num_array_elems(insn) == 0);
1803
1804 LValues newDef(num_components);
1805 for (uint8_t c = 0; c < num_components; c++)
1806 newDef[c] = getScratch(std::max(4u, bit_size / 8));
1807
1808 assert(regDefs.find(reg_index) == regDefs.end());
1809 regDefs[reg_index] = newDef;
1810 break;
1811 }
1812
1813 case nir_intrinsic_load_reg: {
1814 const unsigned reg_index = insn->src[0].ssa->index;
1815 NirDefMap::iterator it = regDefs.find(reg_index);
1816 assert(it != regDefs.end());
1817 LValues &src = it->second;
1818
1819 DataType dType = getDType(insn);
1820 LValues &newDefs = convert(&insn->def);
1821 for (uint8_t c = 0; c < insn->num_components; c++)
1822 mkMov(newDefs[c], src[c], dType);
1823 break;
1824 }
1825
1826 case nir_intrinsic_store_reg: {
1827 const unsigned reg_index = insn->src[1].ssa->index;
1828 NirDefMap::iterator it = regDefs.find(reg_index);
1829 assert(it != regDefs.end());
1830 LValues &dst = it->second;
1831
1832 DataType dType = Converter::getSType(insn->src[0], false, false);
1833
1834 const nir_component_mask_t write_mask = nir_intrinsic_write_mask(insn);
1835 for (uint8_t c = 0u; c < insn->num_components; c++) {
1836 if (!((1u << c) & write_mask))
1837 continue;
1838
1839 Value *src = getSrc(&insn->src[0], c);
1840 mkMov(dst[c], src, dType);
1841 }
1842 break;
1843 }
1844
1845 case nir_intrinsic_store_output:
1846 case nir_intrinsic_store_per_vertex_output: {
1847 Value *indirect;
1848 DataType dType = getSType(insn->src[0], false, false);
1849 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1850
1851 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1852 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1853 continue;
1854
1855 uint8_t offset = 0;
1856 Value *src = getSrc(&insn->src[0], i);
1857 switch (prog->getType()) {
1858 case Program::TYPE_FRAGMENT: {
1859 if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1860 // TGSI uses a different interface than NIR, TGSI stores that
1861 // value in the z component, NIR in X
1862 offset += 2;
1863 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1864 }
1865 break;
1866 }
1867 default:
1868 break;
1869 }
1870
1871 storeTo(insn, getFile(op), OP_EXPORT, dType, src, idx, i + offset, indirect);
1872 }
1873 break;
1874 }
1875 case nir_intrinsic_load_input:
1876 case nir_intrinsic_load_interpolated_input:
1877 case nir_intrinsic_load_output: {
1878 LValues &newDefs = convert(&insn->def);
1879
1880 // FBFetch
1881 if (prog->getType() == Program::TYPE_FRAGMENT &&
1882 op == nir_intrinsic_load_output) {
1883 std::vector<Value*> defs, srcs;
1884 uint8_t mask = 0;
1885
1886 srcs.push_back(getSSA());
1887 srcs.push_back(getSSA());
1888 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1889 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1890 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1891 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1892
1893 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1894 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1895
1896 for (uint8_t i = 0u; i < dest_components; ++i) {
1897 defs.push_back(newDefs[i]);
1898 mask |= 1 << i;
1899 }
1900
1901 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1902 texi->tex.levelZero = true;
1903 texi->tex.mask = mask;
1904 texi->tex.useOffsets = 0;
1905 texi->tex.r = 0xffff;
1906 texi->tex.s = 0xffff;
1907
1908 info_out->prop.fp.readsFramebuffer = true;
1909 break;
1910 }
1911
1912 const DataType dType = getDType(insn);
1913 Value *indirect;
1914 bool input = op != nir_intrinsic_load_output;
1915 operation nvirOp = OP_LAST;
1916 uint32_t mode = 0;
1917
1918 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1919 nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx];
1920
1921 // see load_barycentric_* handling
1922 if (prog->getType() == Program::TYPE_FRAGMENT) {
1923 if (op == nir_intrinsic_load_interpolated_input) {
1924 ImmediateValue immMode;
1925 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1926 mode = immMode.reg.data.u32;
1927 }
1928 if (mode == NV50_IR_INTERP_DEFAULT)
1929 mode |= translateInterpMode(&vary, nvirOp);
1930 else {
1931 if (vary.linear) {
1932 nvirOp = OP_LINTERP;
1933 mode |= NV50_IR_INTERP_LINEAR;
1934 } else {
1935 nvirOp = OP_PINTERP;
1936 mode |= NV50_IR_INTERP_PERSPECTIVE;
1937 }
1938 }
1939 }
1940
1941 for (uint8_t i = 0u; i < dest_components; ++i) {
1942 uint32_t address = getSlotAddress(insn, idx, i);
1943 Symbol *sym = mkSymbol(getFile(op), 0, dType, address);
1944 if (prog->getType() == Program::TYPE_FRAGMENT) {
1945 int s = 1;
1946 if (typeSizeof(dType) == 8) {
1947 Value *lo = getSSA();
1948 Value *hi = getSSA();
1949 Instruction *interp;
1950
1951 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1952 if (nvirOp == OP_PINTERP)
1953 interp->setSrc(s++, fp.position);
1954 if (mode & NV50_IR_INTERP_OFFSET)
1955 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1956 interp->setInterpolate(mode);
1957 interp->setIndirect(0, 0, indirect);
1958
1959 Symbol *sym1 = mkSymbol(getFile(op), 0, dType, address + 4);
1960 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1961 if (nvirOp == OP_PINTERP)
1962 interp->setSrc(s++, fp.position);
1963 if (mode & NV50_IR_INTERP_OFFSET)
1964 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1965 interp->setInterpolate(mode);
1966 interp->setIndirect(0, 0, indirect);
1967
1968 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1969 } else {
1970 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1971 if (nvirOp == OP_PINTERP)
1972 interp->setSrc(s++, fp.position);
1973 if (mode & NV50_IR_INTERP_OFFSET)
1974 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1975 interp->setInterpolate(mode);
1976 interp->setIndirect(0, 0, indirect);
1977 }
1978 } else {
1979 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1980 }
1981 }
1982 break;
1983 }
1984 case nir_intrinsic_load_barycentric_at_offset:
1985 case nir_intrinsic_load_barycentric_at_sample:
1986 case nir_intrinsic_load_barycentric_centroid:
1987 case nir_intrinsic_load_barycentric_pixel:
1988 case nir_intrinsic_load_barycentric_sample: {
1989 LValues &newDefs = convert(&insn->def);
1990 uint32_t mode;
1991
1992 if (op == nir_intrinsic_load_barycentric_centroid ||
1993 op == nir_intrinsic_load_barycentric_sample) {
1994 mode = NV50_IR_INTERP_CENTROID;
1995 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1996 Value *offs[2];
1997 for (uint8_t c = 0; c < 2; c++) {
1998 offs[c] = getScratch();
1999 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2000 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2001 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2002 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2003 }
2004 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2005
2006 mode = NV50_IR_INTERP_OFFSET;
2007 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2008 mode = NV50_IR_INTERP_DEFAULT;
2009 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2010 info_out->prop.fp.readsSampleLocations = true;
2011 Value *sample = getSSA();
2012 mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), getSrc(&insn->src[0], 0), mkImm(0))
2013 ->subOp = 2;
2014 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], sample)->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2015 mode = NV50_IR_INTERP_OFFSET;
2016 } else {
2017 unreachable("all intrinsics already handled above");
2018 }
2019
2020 loadImm(newDefs[1], mode);
2021 break;
2022 }
2023 case nir_intrinsic_demote:
2024 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2025 break;
2026 case nir_intrinsic_demote_if: {
2027 Value *pred = getSSA(1, FILE_PREDICATE);
2028 if (insn->num_components > 1) {
2029 ERROR("nir_intrinsic_demote_if only with 1 component supported!\n");
2030 assert(false);
2031 return false;
2032 }
2033 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2034 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2035 break;
2036 }
2037 case nir_intrinsic_load_base_vertex:
2038 case nir_intrinsic_load_base_instance:
2039 case nir_intrinsic_load_draw_id:
2040 case nir_intrinsic_load_front_face:
2041 case nir_intrinsic_is_helper_invocation:
2042 case nir_intrinsic_load_helper_invocation:
2043 case nir_intrinsic_load_instance_id:
2044 case nir_intrinsic_load_invocation_id:
2045 case nir_intrinsic_load_workgroup_size:
2046 case nir_intrinsic_load_local_invocation_id:
2047 case nir_intrinsic_load_num_workgroups:
2048 case nir_intrinsic_load_patch_vertices_in:
2049 case nir_intrinsic_load_primitive_id:
2050 case nir_intrinsic_load_sample_id:
2051 case nir_intrinsic_load_sample_mask_in:
2052 case nir_intrinsic_load_sample_pos:
2053 case nir_intrinsic_load_subgroup_eq_mask:
2054 case nir_intrinsic_load_subgroup_ge_mask:
2055 case nir_intrinsic_load_subgroup_gt_mask:
2056 case nir_intrinsic_load_subgroup_le_mask:
2057 case nir_intrinsic_load_subgroup_lt_mask:
2058 case nir_intrinsic_load_subgroup_invocation:
2059 case nir_intrinsic_load_tess_coord:
2060 case nir_intrinsic_load_tess_level_inner:
2061 case nir_intrinsic_load_tess_level_outer:
2062 case nir_intrinsic_load_vertex_id:
2063 case nir_intrinsic_load_workgroup_id:
2064 case nir_intrinsic_load_work_dim: {
2065 const DataType dType = getDType(insn);
2066 SVSemantic sv = convert(op);
2067 LValues &newDefs = convert(&insn->def);
2068
2069 for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
2070 Value *def;
2071 if (typeSizeof(dType) == 8)
2072 def = getSSA();
2073 else
2074 def = newDefs[i];
2075
2076 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2077 loadImm(def, 0u);
2078 } else {
2079 Symbol *sym = mkSysVal(sv, i);
2080 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2081 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2082 rdsv->perPatch = 1;
2083 }
2084
2085 if (typeSizeof(dType) == 8)
2086 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2087 }
2088 break;
2089 }
2090 // constants
2091 case nir_intrinsic_load_subgroup_size: {
2092 LValues &newDefs = convert(&insn->def);
2093 loadImm(newDefs[0], 32u);
2094 break;
2095 }
2096 case nir_intrinsic_vote_all:
2097 case nir_intrinsic_vote_any:
2098 case nir_intrinsic_vote_ieq: {
2099 LValues &newDefs = convert(&insn->def);
2100 Value *pred = getScratch(1, FILE_PREDICATE);
2101 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2102 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2103 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2104 break;
2105 }
2106 case nir_intrinsic_ballot: {
2107 LValues &newDefs = convert(&insn->def);
2108 Value *pred = getSSA(1, FILE_PREDICATE);
2109 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2110 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2111 break;
2112 }
2113 case nir_intrinsic_read_first_invocation:
2114 case nir_intrinsic_read_invocation: {
2115 LValues &newDefs = convert(&insn->def);
2116 const DataType dType = getDType(insn);
2117 Value *tmp = getScratch();
2118
2119 if (op == nir_intrinsic_read_first_invocation) {
2120 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2121 mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
2122 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2123 } else
2124 tmp = getSrc(&insn->src[1], 0);
2125
2126 for (uint8_t i = 0; i < dest_components; ++i) {
2127 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2128 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2129 }
2130 break;
2131 }
2132 case nir_intrinsic_load_per_vertex_input: {
2133 const DataType dType = getDType(insn);
2134 LValues &newDefs = convert(&insn->def);
2135 Value *indirectVertex;
2136 Value *indirectOffset;
2137 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2138 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2139
2140 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2141 mkImm(baseVertex), indirectVertex);
2142 for (uint8_t i = 0u; i < dest_components; ++i) {
2143 uint32_t address = getSlotAddress(insn, idx, i);
2144 loadFrom(getFile(op), 0, dType, newDefs[i], address, 0,
2145 indirectOffset, vtxBase, info_out->in[idx].patch);
2146 }
2147 break;
2148 }
2149 case nir_intrinsic_load_per_vertex_output: {
2150 const DataType dType = getDType(insn);
2151 LValues &newDefs = convert(&insn->def);
2152 Value *indirectVertex;
2153 Value *indirectOffset;
2154 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2155 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2156 Value *vtxBase = NULL;
2157
2158 if (indirectVertex)
2159 vtxBase = indirectVertex;
2160 else
2161 vtxBase = loadImm(NULL, baseVertex);
2162
2163 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2164
2165 for (uint8_t i = 0u; i < dest_components; ++i) {
2166 uint32_t address = getSlotAddress(insn, idx, i);
2167 loadFrom(getFile(op), 0, dType, newDefs[i], address, 0,
2168 indirectOffset, vtxBase, info_out->in[idx].patch);
2169 }
2170 break;
2171 }
2172 case nir_intrinsic_emit_vertex: {
2173 uint32_t idx = nir_intrinsic_stream_id(insn);
2174 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2175 break;
2176 }
2177 case nir_intrinsic_end_primitive: {
2178 uint32_t idx = nir_intrinsic_stream_id(insn);
2179 if (idx)
2180 break;
2181 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2182 break;
2183 }
2184 case nir_intrinsic_load_ubo:
2185 case nir_intrinsic_ldc_nv: {
2186 const DataType dType = getDType(insn);
2187 LValues &newDefs = convert(&insn->def);
2188 Value *indirectIndex;
2189 Value *indirectOffset;
2190 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex);
2191 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2192 if (indirectOffset)
2193 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2194
2195 for (uint8_t i = 0u; i < dest_components; ++i) {
2196 loadFrom(getFile(op), index, dType, newDefs[i], offset, i,
2197 indirectOffset, indirectIndex);
2198 }
2199 break;
2200 }
2201 case nir_intrinsic_get_ssbo_size: {
2202 LValues &newDefs = convert(&insn->def);
2203 const DataType dType = getDType(insn);
2204 Value *indirectBuffer;
2205 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2206
2207 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2208 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2209 break;
2210 }
2211 case nir_intrinsic_store_ssbo: {
2212 Value *indirectBuffer;
2213 Value *indirectOffset;
2214 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2215 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2216
2217 CacheMode cache = convert(nir_intrinsic_access(insn));
2218
2219 storeVector(insn, buffer, indirectBuffer, offset, indirectOffset)->cache = cache;
2220
2221 info_out->io.globalAccess |= 0x2;
2222 break;
2223 }
2224 case nir_intrinsic_load_ssbo: {
2225 Value *indirectBuffer;
2226 Value *indirectOffset;
2227 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2228 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2229
2230 CacheMode cache = convert(nir_intrinsic_access(insn));
2231
2232 loadVector(insn, buffer, indirectBuffer, offset, indirectOffset)->cache = cache;
2233
2234 info_out->io.globalAccess |= 0x1;
2235 break;
2236 }
2237 case nir_intrinsic_shared_atomic:
2238 case nir_intrinsic_shared_atomic_swap: {
2239 const DataType dType = getDType(insn);
2240 LValues &newDefs = convert(&insn->def);
2241 Value *indirectOffset;
2242 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2243 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2244 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2245 if (op == nir_intrinsic_shared_atomic_swap)
2246 atom->setSrc(2, getSrc(&insn->src[2], 0));
2247 atom->setIndirect(0, 0, indirectOffset);
2248 atom->subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2249 break;
2250 }
2251 case nir_intrinsic_ssbo_atomic:
2252 case nir_intrinsic_ssbo_atomic_swap: {
2253 const DataType dType = getDType(insn);
2254 LValues &newDefs = convert(&insn->def);
2255 Value *indirectBuffer;
2256 Value *indirectOffset;
2257 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2258 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2259
2260 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2261 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2262 getSrc(&insn->src[2], 0));
2263 if (op == nir_intrinsic_ssbo_atomic_swap)
2264 atom->setSrc(2, getSrc(&insn->src[3], 0));
2265 atom->setIndirect(0, 0, indirectOffset);
2266 atom->setIndirect(0, 1, indirectBuffer);
2267 atom->subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2268
2269 info_out->io.globalAccess |= 0x2;
2270 break;
2271 }
2272 case nir_intrinsic_global_atomic:
2273 case nir_intrinsic_global_atomic_swap: {
2274 const DataType dType = getDType(insn);
2275 LValues &newDefs = convert(&insn->def);
2276 Value *address;
2277 uint32_t offset = getIndirect(&insn->src[0], 0, address);
2278
2279 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2280 Instruction *atom =
2281 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2282 if (op == nir_intrinsic_global_atomic_swap)
2283 atom->setSrc(2, getSrc(&insn->src[2], 0));
2284 atom->setIndirect(0, 0, address);
2285 atom->subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2286
2287 info_out->io.globalAccess |= 0x2;
2288 break;
2289 }
2290 case nir_intrinsic_bindless_image_atomic:
2291 case nir_intrinsic_bindless_image_atomic_swap:
2292 case nir_intrinsic_bindless_image_load:
2293 case nir_intrinsic_bindless_image_samples:
2294 case nir_intrinsic_bindless_image_size:
2295 case nir_intrinsic_bindless_image_store:
2296 case nir_intrinsic_image_atomic:
2297 case nir_intrinsic_image_atomic_swap:
2298 case nir_intrinsic_image_load:
2299 case nir_intrinsic_image_samples:
2300 case nir_intrinsic_image_size:
2301 case nir_intrinsic_image_store: {
2302 std::vector<Value*> srcs, defs;
2303 Value *indirect;
2304 DataType ty;
2305 int subOp = 0;
2306
2307 uint32_t mask = 0;
2308 TexInstruction::Target target =
2309 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2310 unsigned int argCount = getNIRArgCount(target);
2311 uint16_t location = 0;
2312
2313 if (opInfo.has_dest) {
2314 LValues &newDefs = convert(&insn->def);
2315 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2316 defs.push_back(newDefs[i]);
2317 mask |= 1 << i;
2318 }
2319 }
2320
2321 int lod_src = -1;
2322 bool bindless = false;
2323 switch (op) {
2324 case nir_intrinsic_bindless_image_atomic:
2325 case nir_intrinsic_bindless_image_atomic_swap:
2326 ty = getDType(insn);
2327 bindless = true;
2328 info_out->io.globalAccess |= 0x2;
2329 mask = 0x1;
2330 subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2331 break;
2332 case nir_intrinsic_image_atomic:
2333 case nir_intrinsic_image_atomic_swap:
2334 ty = getDType(insn);
2335 bindless = false;
2336 info_out->io.globalAccess |= 0x2;
2337 mask = 0x1;
2338 subOp = getAtomicSubOp(nir_intrinsic_atomic_op(insn));
2339 break;
2340 case nir_intrinsic_bindless_image_load:
2341 case nir_intrinsic_image_load:
2342 ty = TYPE_U32;
2343 bindless = op == nir_intrinsic_bindless_image_load;
2344 info_out->io.globalAccess |= 0x1;
2345 lod_src = 4;
2346 break;
2347 case nir_intrinsic_bindless_image_store:
2348 case nir_intrinsic_image_store:
2349 ty = TYPE_U32;
2350 bindless = op == nir_intrinsic_bindless_image_store;
2351 info_out->io.globalAccess |= 0x2;
2352 lod_src = 5;
2353 mask = 0xf;
2354 break;
2355 case nir_intrinsic_bindless_image_samples:
2356 mask = 0x8;
2357 FALLTHROUGH;
2358 case nir_intrinsic_image_samples:
2359 argCount = 0; /* No coordinates */
2360 ty = TYPE_U32;
2361 bindless = op == nir_intrinsic_bindless_image_samples;
2362 mask = 0x8;
2363 break;
2364 case nir_intrinsic_bindless_image_size:
2365 case nir_intrinsic_image_size:
2366 assert(nir_src_as_uint(insn->src[1]) == 0);
2367 argCount = 0; /* No coordinates */
2368 ty = TYPE_U32;
2369 bindless = op == nir_intrinsic_bindless_image_size;
2370 break;
2371 default:
2372 unreachable("unhandled image opcode");
2373 break;
2374 }
2375
2376 if (bindless)
2377 indirect = getSrc(&insn->src[0], 0);
2378 else
2379 location = getIndirect(&insn->src[0], 0, indirect);
2380
2381 /* Pre-GF100, SSBOs and images are in the same HW file, managed by
2382 * prop.cp.gmem. images are located after SSBOs.
2383 */
2384 if (info->target < NVISA_GF100_CHIPSET)
2385 location += nir->info.num_ssbos;
2386
2387 // coords
2388 if (opInfo.num_srcs >= 2)
2389 for (unsigned int i = 0u; i < argCount; ++i)
2390 srcs.push_back(getSrc(&insn->src[1], i));
2391
2392 // the sampler is just another src added after coords
2393 if (opInfo.num_srcs >= 3 && target.isMS())
2394 srcs.push_back(getSrc(&insn->src[2], 0));
2395
2396 if (opInfo.num_srcs >= 4 && lod_src != 4) {
2397 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2398 for (uint8_t i = 0u; i < components; ++i)
2399 srcs.push_back(getSrc(&insn->src[3], i));
2400 }
2401
2402 if (opInfo.num_srcs >= 5 && lod_src != 5)
2403 // 1 for aotmic swap
2404 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2405 srcs.push_back(getSrc(&insn->src[4], i));
2406
2407 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2408 texi->tex.bindless = bindless;
2409 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2410 texi->tex.mask = mask;
2411 texi->cache = convert(nir_intrinsic_access(insn));
2412 texi->setType(ty);
2413 texi->subOp = subOp;
2414
2415 if (indirect)
2416 texi->setIndirectR(indirect);
2417
2418 break;
2419 }
2420 case nir_intrinsic_store_scratch:
2421 case nir_intrinsic_store_shared: {
2422 Value *indirectOffset;
2423 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2424 if (indirectOffset)
2425 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2426
2427 storeVector(insn, 0, nullptr, offset, indirectOffset);
2428 break;
2429 }
2430 case nir_intrinsic_load_kernel_input: {
2431 const DataType dType = getDType(insn);
2432 LValues &newDefs = convert(&insn->def);
2433 Value *indirectOffset;
2434 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2435 if (indirectOffset)
2436 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2437
2438 for (uint8_t i = 0u; i < dest_components; ++i)
2439 loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset);
2440
2441 break;
2442 }
2443 case nir_intrinsic_load_scratch:
2444 case nir_intrinsic_load_shared: {
2445 Value *indirectOffset;
2446 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2447 if (indirectOffset)
2448 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2449
2450 loadVector(insn, 0, nullptr, offset, indirectOffset);
2451
2452 break;
2453 }
2454 case nir_intrinsic_barrier: {
2455 mesa_scope exec_scope = nir_intrinsic_execution_scope(insn);
2456 mesa_scope mem_scope = nir_intrinsic_memory_scope(insn);
2457 nir_variable_mode modes = nir_intrinsic_memory_modes(insn);
2458 nir_variable_mode valid_modes =
2459 nir_var_mem_global | nir_var_image | nir_var_mem_ssbo | nir_var_mem_shared;
2460
2461 if (mem_scope != SCOPE_NONE && (modes & valid_modes)) {
2462
2463 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2464 bar->fixed = 1;
2465
2466 if (mem_scope >= SCOPE_QUEUE_FAMILY)
2467 bar->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
2468 else
2469 bar->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
2470 }
2471
2472 if (exec_scope != SCOPE_NONE &&
2473 !(exec_scope == SCOPE_WORKGROUP && nir->info.stage == MESA_SHADER_TESS_CTRL)) {
2474 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2475 bar->fixed = 1;
2476 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2477 info_out->numBarriers = 1;
2478 }
2479
2480 break;
2481 }
2482 case nir_intrinsic_shader_clock: {
2483 const DataType dType = getDType(insn);
2484 LValues &newDefs = convert(&insn->def);
2485
2486 loadImm(newDefs[0], 0u);
2487 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2488 break;
2489 }
2490 case nir_intrinsic_load_global:
2491 case nir_intrinsic_load_global_constant: {
2492 Value *indirectOffset;
2493 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2494
2495 loadVector(insn, 0, nullptr, offset, indirectOffset);
2496
2497 info_out->io.globalAccess |= 0x1;
2498 break;
2499 }
2500 case nir_intrinsic_store_global: {
2501 Value *indirectOffset;
2502 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2503
2504 storeVector(insn, 0, nullptr, offset, indirectOffset);
2505
2506 info_out->io.globalAccess |= 0x2;
2507 break;
2508 }
2509 case nir_intrinsic_ddx:
2510 case nir_intrinsic_ddx_coarse:
2511 case nir_intrinsic_ddx_fine:
2512 case nir_intrinsic_ddy:
2513 case nir_intrinsic_ddy_coarse:
2514 case nir_intrinsic_ddy_fine: {
2515 assert(insn->def.num_components == 1);
2516 const DataType dType = getDType(insn);
2517 LValues &newDefs = convert(&insn->def);
2518 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0], 0));
2519 break;
2520 }
2521 default:
2522 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2523 return false;
2524 }
2525
2526 return true;
2527 }
2528
2529 bool
visit(nir_jump_instr * insn)2530 Converter::visit(nir_jump_instr *insn)
2531 {
2532 switch (insn->type) {
2533 case nir_jump_break:
2534 case nir_jump_continue: {
2535 bool isBreak = insn->type == nir_jump_break;
2536 nir_block *block = insn->instr.block;
2537 BasicBlock *target = convert(block->successors[0]);
2538 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2539 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2540 break;
2541 }
2542 default:
2543 ERROR("unknown nir_jump_type %u\n", insn->type);
2544 return false;
2545 }
2546
2547 return true;
2548 }
2549
2550 Value*
convert(nir_load_const_instr * insn,uint8_t idx)2551 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2552 {
2553 Value *val;
2554
2555 if (immInsertPos)
2556 setPosition(immInsertPos, true);
2557 else
2558 setPosition(bb, false);
2559
2560 switch (insn->def.bit_size) {
2561 case 64:
2562 val = loadImm(getSSA(8), insn->value[idx].u64);
2563 break;
2564 case 32:
2565 val = loadImm(getSSA(4), insn->value[idx].u32);
2566 break;
2567 case 16:
2568 val = loadImm(getSSA(4), insn->value[idx].u16);
2569 break;
2570 case 8:
2571 val = loadImm(getSSA(4), insn->value[idx].u8);
2572 break;
2573 default:
2574 unreachable("unhandled bit size!\n");
2575 }
2576 setPosition(bb, true);
2577 return val;
2578 }
2579
2580 bool
visit(nir_load_const_instr * insn)2581 Converter::visit(nir_load_const_instr *insn)
2582 {
2583 assert(insn->def.bit_size <= 64);
2584 immediates[insn->def.index] = insn;
2585 return true;
2586 }
2587
2588 #define DEFAULT_CHECKS \
2589 if (insn->def.num_components > 1) { \
2590 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2591 return false; \
2592 }
2593 bool
visit(nir_alu_instr * insn)2594 Converter::visit(nir_alu_instr *insn)
2595 {
2596 const nir_op op = insn->op;
2597 const nir_op_info &info = nir_op_infos[op];
2598 DataType dType = getDType(insn);
2599 const std::vector<DataType> sTypes = getSTypes(insn);
2600
2601 Instruction *oldPos = this->bb->getExit();
2602
2603 switch (op) {
2604 case nir_op_fabs:
2605 case nir_op_iabs:
2606 case nir_op_fadd:
2607 case nir_op_iadd:
2608 case nir_op_iand:
2609 case nir_op_fceil:
2610 case nir_op_fcos:
2611 case nir_op_fdiv:
2612 case nir_op_idiv:
2613 case nir_op_udiv:
2614 case nir_op_fexp2:
2615 case nir_op_ffloor:
2616 case nir_op_ffma:
2617 case nir_op_ffmaz:
2618 case nir_op_flog2:
2619 case nir_op_fmax:
2620 case nir_op_imax:
2621 case nir_op_umax:
2622 case nir_op_fmin:
2623 case nir_op_imin:
2624 case nir_op_umin:
2625 case nir_op_fmod:
2626 case nir_op_imod:
2627 case nir_op_umod:
2628 case nir_op_fmul:
2629 case nir_op_fmulz:
2630 case nir_op_amul:
2631 case nir_op_imul:
2632 case nir_op_imul_high:
2633 case nir_op_umul_high:
2634 case nir_op_fneg:
2635 case nir_op_ineg:
2636 case nir_op_inot:
2637 case nir_op_ior:
2638 case nir_op_pack_64_2x32_split:
2639 case nir_op_frcp:
2640 case nir_op_frem:
2641 case nir_op_irem:
2642 case nir_op_frsq:
2643 case nir_op_fsat:
2644 case nir_op_ishr:
2645 case nir_op_ushr:
2646 case nir_op_fsin:
2647 case nir_op_fsqrt:
2648 case nir_op_ftrunc:
2649 case nir_op_ishl:
2650 case nir_op_ixor: {
2651 DEFAULT_CHECKS;
2652 LValues &newDefs = convert(&insn->def);
2653 operation preOp = preOperationNeeded(op);
2654 if (preOp != OP_NOP) {
2655 assert(info.num_inputs < 2);
2656 Value *tmp = getSSA(typeSizeof(dType));
2657 Instruction *i0 = mkOp(preOp, dType, tmp);
2658 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2659 if (info.num_inputs) {
2660 i0->setSrc(0, getSrc(&insn->src[0]));
2661 i1->setSrc(0, tmp);
2662 }
2663 i1->subOp = getSubOp(op);
2664 } else {
2665 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2666 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2667 i->setSrc(s, getSrc(&insn->src[s]));
2668
2669 switch (op) {
2670 case nir_op_fmul:
2671 case nir_op_ffma:
2672 i->dnz = this->info->io.mul_zero_wins;
2673 break;
2674 case nir_op_fmulz:
2675 case nir_op_ffmaz:
2676 i->dnz = true;
2677 break;
2678 default:
2679 break;
2680 }
2681 }
2682 i->subOp = getSubOp(op);
2683 }
2684 break;
2685 }
2686 case nir_op_ifind_msb:
2687 case nir_op_ufind_msb: {
2688 DEFAULT_CHECKS;
2689 LValues &newDefs = convert(&insn->def);
2690 dType = sTypes[0];
2691 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2692 break;
2693 }
2694 case nir_op_fround_even: {
2695 DEFAULT_CHECKS;
2696 LValues &newDefs = convert(&insn->def);
2697 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2698 break;
2699 }
2700 // convert instructions
2701 case nir_op_f2i8:
2702 case nir_op_f2u8:
2703 case nir_op_i2i8:
2704 case nir_op_u2u8:
2705 case nir_op_f2i16:
2706 case nir_op_f2u16:
2707 case nir_op_i2i16:
2708 case nir_op_u2u16:
2709 case nir_op_f2f32:
2710 case nir_op_f2i32:
2711 case nir_op_f2u32:
2712 case nir_op_i2f32:
2713 case nir_op_i2i32:
2714 case nir_op_u2f32:
2715 case nir_op_u2u32:
2716 case nir_op_f2f64:
2717 case nir_op_f2i64:
2718 case nir_op_f2u64:
2719 case nir_op_i2f64:
2720 case nir_op_i2i64:
2721 case nir_op_u2f64:
2722 case nir_op_u2u64: {
2723 DEFAULT_CHECKS;
2724 LValues &newDefs = convert(&insn->def);
2725 DataType stype = sTypes[0];
2726 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2727 if (::isFloatType(stype) && isIntType(dType))
2728 i->rnd = ROUND_Z;
2729 i->sType = stype;
2730 break;
2731 }
2732 // compare instructions
2733 case nir_op_ieq8:
2734 case nir_op_ige8:
2735 case nir_op_uge8:
2736 case nir_op_ilt8:
2737 case nir_op_ult8:
2738 case nir_op_ine8:
2739 case nir_op_ieq16:
2740 case nir_op_ige16:
2741 case nir_op_uge16:
2742 case nir_op_ilt16:
2743 case nir_op_ult16:
2744 case nir_op_ine16:
2745 case nir_op_feq32:
2746 case nir_op_ieq32:
2747 case nir_op_fge32:
2748 case nir_op_ige32:
2749 case nir_op_uge32:
2750 case nir_op_flt32:
2751 case nir_op_ilt32:
2752 case nir_op_ult32:
2753 case nir_op_fneu32:
2754 case nir_op_ine32: {
2755 DEFAULT_CHECKS;
2756 LValues &newDefs = convert(&insn->def);
2757 Instruction *i = mkCmp(getOperation(op),
2758 getCondCode(op),
2759 dType,
2760 newDefs[0],
2761 dType,
2762 getSrc(&insn->src[0]),
2763 getSrc(&insn->src[1]));
2764 if (info.num_inputs == 3)
2765 i->setSrc(2, getSrc(&insn->src[2]));
2766 i->sType = sTypes[0];
2767 break;
2768 }
2769 case nir_op_mov: {
2770 LValues &newDefs = convert(&insn->def);
2771 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2772 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2773 }
2774 break;
2775 }
2776 case nir_op_vec2:
2777 case nir_op_vec3:
2778 case nir_op_vec4:
2779 case nir_op_vec8:
2780 case nir_op_vec16: {
2781 LValues &newDefs = convert(&insn->def);
2782 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2783 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2784 }
2785 break;
2786 }
2787 // (un)pack
2788 case nir_op_pack_64_2x32: {
2789 LValues &newDefs = convert(&insn->def);
2790 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2791 merge->setSrc(0, getSrc(&insn->src[0], 0));
2792 merge->setSrc(1, getSrc(&insn->src[0], 1));
2793 break;
2794 }
2795 case nir_op_pack_half_2x16_split: {
2796 LValues &newDefs = convert(&insn->def);
2797 Value *tmpH = getSSA();
2798 Value *tmpL = getSSA();
2799
2800 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2801 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2802 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2803 break;
2804 }
2805 case nir_op_unpack_half_2x16_split_x:
2806 case nir_op_unpack_half_2x16_split_y: {
2807 LValues &newDefs = convert(&insn->def);
2808 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2809 if (op == nir_op_unpack_half_2x16_split_y)
2810 cvt->subOp = 1;
2811 break;
2812 }
2813 case nir_op_unpack_64_2x32: {
2814 LValues &newDefs = convert(&insn->def);
2815 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2816 break;
2817 }
2818 case nir_op_unpack_64_2x32_split_x: {
2819 LValues &newDefs = convert(&insn->def);
2820 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2821 break;
2822 }
2823 case nir_op_unpack_64_2x32_split_y: {
2824 LValues &newDefs = convert(&insn->def);
2825 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2826 break;
2827 }
2828 // special instructions
2829 case nir_op_fsign:
2830 case nir_op_isign: {
2831 DEFAULT_CHECKS;
2832 DataType iType;
2833 if (::isFloatType(dType))
2834 iType = TYPE_F32;
2835 else
2836 iType = TYPE_S32;
2837
2838 LValues &newDefs = convert(&insn->def);
2839 LValue *val0 = getScratch();
2840 LValue *val1 = getScratch();
2841 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2842 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2843
2844 if (dType == TYPE_F64) {
2845 mkOp2(OP_SUB, iType, val0, val0, val1);
2846 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2847 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2848 mkOp2(OP_SUB, iType, val0, val1, val0);
2849 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2850 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2851 } else if (::isFloatType(dType))
2852 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2853 else
2854 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2855 break;
2856 }
2857 case nir_op_fcsel:
2858 case nir_op_b32csel: {
2859 DEFAULT_CHECKS;
2860 LValues &newDefs = convert(&insn->def);
2861 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2862 break;
2863 }
2864 case nir_op_ibitfield_extract:
2865 case nir_op_ubitfield_extract: {
2866 DEFAULT_CHECKS;
2867 Value *tmp = getSSA();
2868 LValues &newDefs = convert(&insn->def);
2869 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2870 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2871 break;
2872 }
2873 case nir_op_bfm: {
2874 DEFAULT_CHECKS;
2875 LValues &newDefs = convert(&insn->def);
2876 mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
2877 break;
2878 }
2879 case nir_op_bitfield_insert: {
2880 DEFAULT_CHECKS;
2881 LValues &newDefs = convert(&insn->def);
2882 LValue *temp = getSSA();
2883 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2884 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2885 break;
2886 }
2887 case nir_op_bit_count: {
2888 DEFAULT_CHECKS;
2889 LValues &newDefs = convert(&insn->def);
2890 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2891 break;
2892 }
2893 case nir_op_bitfield_reverse: {
2894 DEFAULT_CHECKS;
2895 LValues &newDefs = convert(&insn->def);
2896 mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
2897 break;
2898 }
2899 case nir_op_find_lsb: {
2900 DEFAULT_CHECKS;
2901 LValues &newDefs = convert(&insn->def);
2902 Value *tmp = getSSA();
2903 mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
2904 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2905 break;
2906 }
2907 case nir_op_extract_u8: {
2908 DEFAULT_CHECKS;
2909 LValues &newDefs = convert(&insn->def);
2910 Value *prmt = getSSA();
2911 mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
2912 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2913 break;
2914 }
2915 case nir_op_extract_i8: {
2916 DEFAULT_CHECKS;
2917 LValues &newDefs = convert(&insn->def);
2918 Value *prmt = getSSA();
2919 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
2920 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2921 break;
2922 }
2923 case nir_op_extract_u16: {
2924 DEFAULT_CHECKS;
2925 LValues &newDefs = convert(&insn->def);
2926 Value *prmt = getSSA();
2927 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
2928 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2929 break;
2930 }
2931 case nir_op_extract_i16: {
2932 DEFAULT_CHECKS;
2933 LValues &newDefs = convert(&insn->def);
2934 Value *prmt = getSSA();
2935 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
2936 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2937 break;
2938 }
2939 case nir_op_fquantize2f16: {
2940 DEFAULT_CHECKS;
2941 LValues &newDefs = convert(&insn->def);
2942 Value *tmp = getSSA();
2943 mkCvt(OP_CVT, TYPE_F16, tmp, TYPE_F32, getSrc(&insn->src[0]))->ftz = 1;
2944 mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, tmp);
2945 break;
2946 }
2947 case nir_op_urol: {
2948 DEFAULT_CHECKS;
2949 LValues &newDefs = convert(&insn->def);
2950 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2951 getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2952 ->subOp = NV50_IR_SUBOP_SHF_L |
2953 NV50_IR_SUBOP_SHF_W |
2954 NV50_IR_SUBOP_SHF_HI;
2955 break;
2956 }
2957 case nir_op_uror: {
2958 DEFAULT_CHECKS;
2959 LValues &newDefs = convert(&insn->def);
2960 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2961 getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2962 ->subOp = NV50_IR_SUBOP_SHF_R |
2963 NV50_IR_SUBOP_SHF_W |
2964 NV50_IR_SUBOP_SHF_LO;
2965 break;
2966 }
2967 // boolean conversions
2968 case nir_op_b2f32: {
2969 DEFAULT_CHECKS;
2970 LValues &newDefs = convert(&insn->def);
2971 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2972 break;
2973 }
2974 case nir_op_b2f64: {
2975 DEFAULT_CHECKS;
2976 LValues &newDefs = convert(&insn->def);
2977 Value *tmp = getSSA(4);
2978 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2979 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2980 break;
2981 }
2982 case nir_op_b2i8:
2983 case nir_op_b2i16:
2984 case nir_op_b2i32: {
2985 DEFAULT_CHECKS;
2986 LValues &newDefs = convert(&insn->def);
2987 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2988 break;
2989 }
2990 case nir_op_b2i64: {
2991 DEFAULT_CHECKS;
2992 LValues &newDefs = convert(&insn->def);
2993 LValue *def = getScratch();
2994 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2995 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2996 break;
2997 }
2998 default:
2999 ERROR("unknown nir_op %s\n", info.name);
3000 assert(false);
3001 return false;
3002 }
3003
3004 if (!oldPos) {
3005 oldPos = this->bb->getEntry();
3006 oldPos->precise = insn->exact;
3007 }
3008
3009 if (unlikely(!oldPos))
3010 return true;
3011
3012 while (oldPos->next) {
3013 oldPos = oldPos->next;
3014 oldPos->precise = insn->exact;
3015 }
3016
3017 return true;
3018 }
3019 #undef DEFAULT_CHECKS
3020
3021 bool
visit(nir_undef_instr * insn)3022 Converter::visit(nir_undef_instr *insn)
3023 {
3024 LValues &newDefs = convert(&insn->def);
3025 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3026 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3027 }
3028 return true;
3029 }
3030
3031 #define CASE_SAMPLER(ty) \
3032 case GLSL_SAMPLER_DIM_ ## ty : \
3033 if (isArray && !isShadow) \
3034 return TEX_TARGET_ ## ty ## _ARRAY; \
3035 else if (!isArray && isShadow) \
3036 return TEX_TARGET_## ty ## _SHADOW; \
3037 else if (isArray && isShadow) \
3038 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3039 else \
3040 return TEX_TARGET_ ## ty
3041
3042 TexTarget
convert(glsl_sampler_dim dim,bool isArray,bool isShadow)3043 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3044 {
3045 switch (dim) {
3046 CASE_SAMPLER(1D);
3047 case GLSL_SAMPLER_DIM_SUBPASS:
3048 CASE_SAMPLER(2D);
3049 CASE_SAMPLER(CUBE);
3050 case GLSL_SAMPLER_DIM_3D:
3051 return TEX_TARGET_3D;
3052 case GLSL_SAMPLER_DIM_MS:
3053 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3054 if (isArray)
3055 return TEX_TARGET_2D_MS_ARRAY;
3056 return TEX_TARGET_2D_MS;
3057 case GLSL_SAMPLER_DIM_RECT:
3058 if (isShadow)
3059 return TEX_TARGET_RECT_SHADOW;
3060 return TEX_TARGET_RECT;
3061 case GLSL_SAMPLER_DIM_BUF:
3062 return TEX_TARGET_BUFFER;
3063 case GLSL_SAMPLER_DIM_EXTERNAL:
3064 return TEX_TARGET_2D;
3065 default:
3066 ERROR("unknown glsl_sampler_dim %u\n", dim);
3067 assert(false);
3068 return TEX_TARGET_COUNT;
3069 }
3070 }
3071 #undef CASE_SAMPLER
3072
3073 unsigned int
getNIRArgCount(TexInstruction::Target & target)3074 Converter::getNIRArgCount(TexInstruction::Target& target)
3075 {
3076 unsigned int result = target.getArgCount();
3077 if (target.isCube() && target.isArray())
3078 result--;
3079 if (target.isMS())
3080 result--;
3081 return result;
3082 }
3083
3084 CacheMode
convert(enum gl_access_qualifier access)3085 Converter::convert(enum gl_access_qualifier access)
3086 {
3087 if (access & ACCESS_VOLATILE)
3088 return CACHE_CV;
3089 if (access & ACCESS_COHERENT)
3090 return CACHE_CG;
3091 return CACHE_CA;
3092 }
3093
3094 bool
visit(nir_tex_instr * insn)3095 Converter::visit(nir_tex_instr *insn)
3096 {
3097 switch (insn->op) {
3098 case nir_texop_lod:
3099 case nir_texop_query_levels:
3100 case nir_texop_tex:
3101 case nir_texop_texture_samples:
3102 case nir_texop_tg4:
3103 case nir_texop_txb:
3104 case nir_texop_txd:
3105 case nir_texop_txf:
3106 case nir_texop_txf_ms:
3107 case nir_texop_txl:
3108 case nir_texop_txs: {
3109 LValues &newDefs = convert(&insn->def);
3110 std::vector<Value*> srcs;
3111 std::vector<Value*> defs;
3112 std::vector<nir_src*> offsets;
3113 uint8_t mask = 0;
3114 bool lz = false;
3115 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3116 operation op = getOperation(insn->op);
3117
3118 int r, s;
3119 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3120 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3121 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3122 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3123 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3124 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3125 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3126 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3127 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3128 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3129 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3130 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3131
3132 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3133 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3134
3135 srcs.resize(insn->coord_components);
3136 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3137 srcs[i] = getSrc(&insn->src[coordsIdx].src, i);
3138
3139 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3140 if (insn->coord_components) {
3141 uint32_t argCount = target.getArgCount();
3142
3143 if (target.isMS())
3144 argCount -= 1;
3145
3146 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3147 srcs.push_back(getSSA());
3148 }
3149
3150 if (biasIdx != -1)
3151 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3152 // TXQ requires a lod argument for all queries we care about here.
3153 // For other ops on MS textures we skip it.
3154 if (lodIdx != -1 && !target.isMS())
3155 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3156 else if (op == OP_TXQ)
3157 srcs.push_back(loadImm(NULL, 0)); // TXQ always needs an LOD
3158 else if (op == OP_TXF)
3159 lz = true;
3160 if (msIdx != -1)
3161 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3162 if (offsetIdx != -1)
3163 offsets.push_back(&insn->src[offsetIdx].src);
3164 if (compIdx != -1)
3165 srcs.push_back(getSrc(&insn->src[compIdx].src, 0));
3166 if (texOffIdx != -1) {
3167 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3168 texOffIdx = srcs.size() - 1;
3169 }
3170 if (sampOffIdx != -1) {
3171 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3172 sampOffIdx = srcs.size() - 1;
3173 }
3174 if (bindless) {
3175 // currently we use the lower bits
3176 Value *split[2];
3177 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3178
3179 mkSplit(split, 4, handle);
3180
3181 srcs.push_back(split[0]);
3182 texOffIdx = srcs.size() - 1;
3183 }
3184
3185 r = bindless ? 0xff : insn->texture_index;
3186 s = bindless ? 0x1f : insn->sampler_index;
3187 if (op == OP_TXF || op == OP_TXQ)
3188 s = 0;
3189
3190 defs.resize(newDefs.size());
3191 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3192 defs[d] = newDefs[d];
3193 mask |= 1 << d;
3194 }
3195 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3196 lz = true;
3197
3198 // TODO figure out which instructions still need this.
3199 if (srcs.empty())
3200 srcs.push_back(loadImm(NULL, 0));
3201
3202 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3203 texi->tex.levelZero = lz;
3204 texi->tex.mask = mask;
3205 texi->tex.bindless = bindless;
3206
3207 if (texOffIdx != -1)
3208 texi->tex.rIndirectSrc = texOffIdx;
3209 if (sampOffIdx != -1)
3210 texi->tex.sIndirectSrc = sampOffIdx;
3211
3212 switch (insn->op) {
3213 case nir_texop_tg4:
3214 if (!target.isShadow())
3215 texi->tex.gatherComp = insn->component;
3216 break;
3217 case nir_texop_txs:
3218 texi->tex.query = TXQ_DIMS;
3219 break;
3220 case nir_texop_texture_samples:
3221 texi->tex.mask = 0x4;
3222 texi->tex.query = TXQ_TYPE;
3223 break;
3224 case nir_texop_query_levels:
3225 texi->tex.mask = 0x8;
3226 texi->tex.query = TXQ_DIMS;
3227 break;
3228 // TODO: TXQ_SAMPLE_POSITION needs the sample id instead of the LOD emited further up.
3229 default:
3230 break;
3231 }
3232
3233 texi->tex.useOffsets = offsets.size();
3234 if (texi->tex.useOffsets) {
3235 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3236 for (uint32_t c = 0u; c < 3; ++c) {
3237 uint8_t s2 = std::min(c, target.getDim() - 1);
3238 texi->offset[s][c].set(getSrc(offsets[s], s2));
3239 texi->offset[s][c].setInsn(texi);
3240 }
3241 }
3242 }
3243
3244 if (op == OP_TXG && offsetIdx == -1) {
3245 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3246 texi->tex.useOffsets = 4;
3247 setPosition(texi, false);
3248 for (uint8_t i = 0; i < 4; ++i) {
3249 for (uint8_t j = 0; j < 2; ++j) {
3250 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3251 texi->offset[i][j].setInsn(texi);
3252 }
3253 }
3254 setPosition(texi, true);
3255 }
3256 }
3257
3258 if (ddxIdx != -1 && ddyIdx != -1) {
3259 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3260 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3261 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3262 }
3263 }
3264
3265 break;
3266 }
3267 default:
3268 ERROR("unknown nir_texop %u\n", insn->op);
3269 return false;
3270 }
3271 return true;
3272 }
3273
3274 /* nouveau's RA doesn't track the liveness of exported registers in the fragment
3275 * shader, so we need all the store_outputs to appear at the end of the shader
3276 * with no other instructions that might generate a temp value in between them.
3277 */
3278 static void
nv_nir_move_stores_to_end(nir_shader * s)3279 nv_nir_move_stores_to_end(nir_shader *s)
3280 {
3281 nir_function_impl *impl = nir_shader_get_entrypoint(s);
3282 nir_block *block = nir_impl_last_block(impl);
3283 nir_instr *first_store = NULL;
3284
3285 nir_foreach_instr_safe(instr, block) {
3286 if (instr == first_store)
3287 break;
3288 if (instr->type != nir_instr_type_intrinsic)
3289 continue;
3290 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3291 if (intrin->intrinsic == nir_intrinsic_store_output) {
3292 nir_instr_remove(instr);
3293 nir_instr_insert(nir_after_block(block), instr);
3294
3295 if (!first_store)
3296 first_store = instr;
3297 }
3298 }
3299 nir_metadata_preserve(impl,
3300 nir_metadata_control_flow);
3301 }
3302
3303 unsigned
lowerBitSizeCB(const nir_instr * instr,void * data)3304 Converter::lowerBitSizeCB(const nir_instr *instr, void *data)
3305 {
3306 Converter *instance = static_cast<Converter *>(data);
3307 nir_alu_instr *alu;
3308
3309 if (instr->type != nir_instr_type_alu)
3310 return 0;
3311
3312 alu = nir_instr_as_alu(instr);
3313
3314 switch (alu->op) {
3315 /* TODO: Check for operation OP_SET instead of all listed nir opcodes
3316 * individually.
3317 *
3318 * Currently, we can't call getOperation(nir_op), since not all nir opcodes
3319 * are handled within getOperation() and we'd run into an assert().
3320 *
3321 * Adding all nir opcodes to getOperation() isn't trivial, since the
3322 * enum operation of some of the nir opcodes isn't distinct (e.g. depends
3323 * on the data type).
3324 */
3325 case nir_op_ieq8:
3326 case nir_op_ige8:
3327 case nir_op_uge8:
3328 case nir_op_ilt8:
3329 case nir_op_ult8:
3330 case nir_op_ine8:
3331 case nir_op_ieq16:
3332 case nir_op_ige16:
3333 case nir_op_uge16:
3334 case nir_op_ilt16:
3335 case nir_op_ult16:
3336 case nir_op_ine16:
3337 case nir_op_feq32:
3338 case nir_op_ieq32:
3339 case nir_op_fge32:
3340 case nir_op_ige32:
3341 case nir_op_uge32:
3342 case nir_op_flt32:
3343 case nir_op_ilt32:
3344 case nir_op_ult32:
3345 case nir_op_fneu32:
3346 case nir_op_ine32: {
3347 DataType stype = instance->getSTypes(alu)[0];
3348
3349 if (isSignedIntType(stype) && typeSizeof(stype) < 4)
3350 return 32;
3351
3352 return 0;
3353 }
3354 case nir_op_i2f64:
3355 case nir_op_u2f64: {
3356 DataType stype = instance->getSTypes(alu)[0];
3357
3358 if (isIntType(stype) && (typeSizeof(stype) <= 2))
3359 return 32;
3360
3361 return 0;
3362 }
3363 default:
3364 return 0;
3365 }
3366 }
3367
3368 void
runOptLoop()3369 Converter::runOptLoop()
3370 {
3371 bool progress;
3372 do {
3373 progress = false;
3374 NIR_PASS(progress, nir, nir_copy_prop);
3375 NIR_PASS(progress, nir, nir_opt_remove_phis);
3376 NIR_PASS(progress, nir, nir_opt_loop);
3377 NIR_PASS(progress, nir, nir_opt_cse);
3378 NIR_PASS(progress, nir, nir_opt_algebraic);
3379 NIR_PASS(progress, nir, nir_opt_constant_folding);
3380 NIR_PASS(progress, nir, nir_copy_prop);
3381 NIR_PASS(progress, nir, nir_opt_dce);
3382 NIR_PASS(progress, nir, nir_opt_dead_cf);
3383 NIR_PASS(progress, nir, nir_lower_64bit_phis);
3384 } while (progress);
3385 }
3386
3387 bool
run()3388 Converter::run()
3389 {
3390 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3391 nir_print_shader(nir, stderr);
3392
3393 struct nir_lower_subgroups_options subgroup_options = {};
3394 subgroup_options.subgroup_size = 32;
3395 subgroup_options.ballot_bit_size = 32;
3396 subgroup_options.ballot_components = 1;
3397 subgroup_options.lower_elect = true;
3398 subgroup_options.lower_inverse_ballot = true;
3399
3400 unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) |
3401 (nir->options->lower_flrp32 ? 32 : 0) |
3402 (nir->options->lower_flrp64 ? 64 : 0);
3403 assert(lower_flrp);
3404
3405 info_out->io.genUserClip = info->io.genUserClip;
3406 if (info->io.genUserClip > 0) {
3407 bool lowered = false;
3408
3409 if (nir->info.stage == MESA_SHADER_VERTEX ||
3410 nir->info.stage == MESA_SHADER_TESS_EVAL)
3411 NIR_PASS(lowered, nir, nir_lower_clip_vs,
3412 (1 << info->io.genUserClip) - 1, true, false, NULL);
3413 else if (nir->info.stage == MESA_SHADER_GEOMETRY)
3414 NIR_PASS(lowered, nir, nir_lower_clip_gs,
3415 (1 << info->io.genUserClip) - 1, false, NULL);
3416
3417 if (lowered) {
3418 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3419 NIR_PASS_V(nir, nir_lower_io_to_temporaries, impl, true, false);
3420 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3421 NIR_PASS_V(nir, nv50_nir_lower_load_user_clip_plane, info);
3422 } else {
3423 info_out->io.genUserClip = -1;
3424 }
3425 }
3426
3427 /* prepare for IO lowering */
3428 NIR_PASS_V(nir, nir_lower_flrp, lower_flrp, false);
3429 NIR_PASS_V(nir, nir_opt_deref);
3430 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3431
3432 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
3433 type_size, nir_lower_io_use_interpolated_input_intrinsics);
3434
3435 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3436
3437 struct nir_lower_tex_options tex_options = {};
3438 tex_options.lower_txp = ~0;
3439
3440 NIR_PASS_V(nir, nir_lower_tex, &tex_options);
3441
3442 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3443 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3444 NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
3445
3446 NIR_PASS_V(nir, nir_lower_frexp);
3447
3448 /*TODO: improve this lowering/optimisation loop so that we can use
3449 * nir_opt_idiv_const effectively before this.
3450 */
3451 nir_lower_idiv_options idiv_options = {
3452 .allow_fp16 = true,
3453 };
3454 NIR_PASS_V(nir, nir_lower_idiv, &idiv_options);
3455
3456 runOptLoop();
3457
3458 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3459 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
3460 glsl_get_natural_size_align_bytes);
3461 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset);
3462 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3463
3464 NIR_PASS_V(nir, nir_opt_constant_folding); // Improves aliasing information
3465
3466 nir_load_store_vectorize_options vectorize_opts = {};
3467 vectorize_opts.modes = nir_var_mem_global |
3468 nir_var_mem_ssbo |
3469 nir_var_mem_shared |
3470 nir_var_shader_temp;
3471 vectorize_opts.callback = Converter::memVectorizeCb;
3472 vectorize_opts.cb_data = this;
3473 NIR_PASS_V(nir, nir_opt_load_store_vectorize, &vectorize_opts);
3474
3475 nir_lower_mem_access_bit_sizes_options mem_bit_sizes = {};
3476 mem_bit_sizes.modes = nir_var_mem_global |
3477 nir_var_mem_constant |
3478 nir_var_mem_ssbo |
3479 nir_var_mem_shared |
3480 nir_var_shader_temp | nir_var_function_temp;
3481 mem_bit_sizes.callback = Converter::getMemAccessSizeAlign;
3482 mem_bit_sizes.cb_data = this;
3483 NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &mem_bit_sizes);
3484
3485 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3486 NIR_PASS_V(nir, nir_lower_pack);
3487
3488 runOptLoop();
3489
3490 NIR_PASS_V(nir, nir_opt_combine_barriers, NULL, NULL);
3491
3492 nir_move_options move_options =
3493 (nir_move_options)(nir_move_const_undef |
3494 nir_move_load_ubo |
3495 nir_move_load_uniform |
3496 nir_move_load_input);
3497 NIR_PASS_V(nir, nir_opt_sink, move_options);
3498 NIR_PASS_V(nir, nir_opt_move, move_options);
3499
3500 if (nir->info.stage == MESA_SHADER_FRAGMENT)
3501 NIR_PASS_V(nir, nv_nir_move_stores_to_end);
3502
3503 NIR_PASS_V(nir, nir_opt_algebraic_late);
3504
3505 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3506 NIR_PASS_V(nir, nir_lower_bit_size, Converter::lowerBitSizeCB, this);
3507
3508 NIR_PASS_V(nir, nir_divergence_analysis);
3509 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3510
3511 // Garbage collect dead instructions
3512 nir_sweep(nir);
3513
3514 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
3515
3516 if (!parseNIR()) {
3517 ERROR("Couldn't prase NIR!\n");
3518 return false;
3519 }
3520
3521 if (!assignSlots()) {
3522 ERROR("Couldn't assign slots!\n");
3523 return false;
3524 }
3525
3526 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3527 nir_print_shader(nir, stderr);
3528
3529 nir_foreach_function(function, nir) {
3530 if (!visit(function))
3531 return false;
3532 }
3533
3534 return true;
3535 }
3536
3537 } // unnamed namespace
3538
3539 namespace nv50_ir {
3540
3541 bool
makeFromNIR(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)3542 Program::makeFromNIR(struct nv50_ir_prog_info *info,
3543 struct nv50_ir_prog_info_out *info_out)
3544 {
3545 nir_shader *nir = info->bin.nir;
3546 Converter converter(this, nir, info, info_out);
3547 bool result = converter.run();
3548 if (!result)
3549 return result;
3550 LoweringHelper lowering;
3551 lowering.run(this);
3552 tlsSize = info_out->bin.tlsSpace;
3553 return result;
3554 }
3555
3556 } // namespace nv50_ir
3557
3558 static nir_shader_compiler_options
nvir_nir_shader_compiler_options(int chipset,uint8_t shader_type)3559 nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
3560 {
3561 nir_shader_compiler_options op = {};
3562 op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
3563 op.lower_ffma16 = false;
3564 op.lower_ffma32 = false;
3565 op.lower_ffma64 = false;
3566 op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
3567 op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
3568 op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
3569 op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
3570 op.lower_flrp32 = true;
3571 op.lower_flrp64 = true;
3572 op.lower_fpow = true;
3573 op.lower_fsat = false;
3574 op.lower_fsqrt = false; // TODO: only before gm200
3575 op.lower_sincos = false;
3576 op.lower_fmod = true;
3577 op.lower_bitfield_extract = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3578 op.lower_bitfield_insert = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3579 op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
3580 op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
3581 op.lower_ifind_msb = (chipset < NVISA_GF100_CHIPSET);
3582 op.lower_find_lsb = (chipset < NVISA_GF100_CHIPSET);
3583 op.lower_uadd_carry = true; // TODO
3584 op.lower_usub_borrow = true; // TODO
3585 op.lower_mul_high = false;
3586 op.lower_fneg = false;
3587 op.lower_ineg = false;
3588 op.lower_scmp = true; // TODO: not implemented yet
3589 op.lower_vector_cmp = false;
3590 op.lower_bitops = false;
3591 op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
3592 op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
3593 op.lower_fdph = false;
3594 op.lower_fdot = false;
3595 op.fdot_replicates = false; // TODO
3596 op.lower_ffloor = false; // TODO
3597 op.lower_ffract = true;
3598 op.lower_fceil = false; // TODO
3599 op.lower_ftrunc = false;
3600 op.lower_ldexp = true;
3601 op.lower_pack_half_2x16 = true;
3602 op.lower_pack_unorm_2x16 = true;
3603 op.lower_pack_snorm_2x16 = true;
3604 op.lower_pack_unorm_4x8 = true;
3605 op.lower_pack_snorm_4x8 = true;
3606 op.lower_unpack_half_2x16 = true;
3607 op.lower_unpack_unorm_2x16 = true;
3608 op.lower_unpack_snorm_2x16 = true;
3609 op.lower_unpack_unorm_4x8 = true;
3610 op.lower_unpack_snorm_4x8 = true;
3611 op.lower_pack_split = false;
3612 op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
3613 op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
3614 op.lower_insert_byte = true;
3615 op.lower_insert_word = true;
3616 op.lower_all_io_to_temps = false;
3617 op.lower_all_io_to_elements = false;
3618 op.vertex_id_zero_based = false;
3619 op.lower_base_vertex = false;
3620 op.lower_helper_invocation = false;
3621 op.optimize_sample_mask_in = false;
3622 op.lower_cs_local_index_to_id = true;
3623 op.lower_cs_local_id_to_index = false;
3624 op.lower_device_index_to_zero = true;
3625 op.lower_wpos_pntc = false; // TODO
3626 op.lower_hadd = true; // TODO
3627 op.lower_uadd_sat = true; // TODO
3628 op.lower_usub_sat = true; // TODO
3629 op.lower_iadd_sat = true; // TODO
3630 op.vectorize_io = false;
3631 op.lower_to_scalar = false;
3632 op.unify_interfaces = false;
3633 op.lower_mul_2x32_64 = true; // TODO
3634 op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET);
3635 op.has_imul24 = false;
3636 op.has_fmulz = (chipset > NVISA_G80_CHIPSET);
3637 op.intel_vec4 = false;
3638 op.lower_uniforms_to_ubo = true;
3639 op.force_indirect_unrolling = (nir_variable_mode) (
3640 ((shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_out : 0) |
3641 /* HW doesn't support indirect addressing of fragment program inputs
3642 * on Volta. The binary driver generates a function to handle every
3643 * possible indirection, and indirectly calls the function to handle
3644 * this instead.
3645 */
3646 ((chipset >= NVISA_GV100_CHIPSET && shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_in : 0)
3647 );
3648 op.force_indirect_unrolling_sampler = (chipset < NVISA_GF100_CHIPSET);
3649 op.max_unroll_iterations = 32;
3650 op.lower_int64_options = (nir_lower_int64_options) (
3651 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
3652 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
3653 nir_lower_divmod64 |
3654 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
3655 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_bcsel64 : 0) |
3656 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
3657 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
3658 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
3659 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
3660 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
3661 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
3662 nir_lower_imul_2x32_64 |
3663 ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
3664 nir_lower_ufind_msb64 |
3665 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_conv64 : 0)
3666 );
3667 op.lower_doubles_options = (nir_lower_doubles_options) (
3668 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
3669 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
3670 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
3671 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
3672 nir_lower_dmod |
3673 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
3674 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
3675 );
3676 op.discard_is_demote = true;
3677 op.has_ddx_intrinsics = true;
3678 op.scalarize_ddx = true;
3679 op.support_indirect_inputs = (uint8_t)BITFIELD_MASK(MESA_SHADER_GEOMETRY + 1);
3680 op.support_indirect_outputs = (uint8_t)BITFIELD_MASK(MESA_SHADER_GEOMETRY + 1);
3681
3682 /* HW doesn't support indirect addressing of fragment program inputs
3683 * on Volta. The binary driver generates a function to handle every
3684 * possible indirection, and indirectly calls the function to handle
3685 * this instead.
3686 */
3687 if (chipset < NVISA_GV100_CHIPSET)
3688 op.support_indirect_outputs |= BITFIELD_BIT(MESA_SHADER_FRAGMENT);
3689
3690 return op;
3691 }
3692
3693 static const nir_shader_compiler_options g80_nir_shader_compiler_options =
3694 nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_TYPES);
3695 static const nir_shader_compiler_options g80_fs_nir_shader_compiler_options =
3696 nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_FRAGMENT);
3697 static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
3698 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_TYPES);
3699 static const nir_shader_compiler_options gf100_fs_nir_shader_compiler_options =
3700 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_FRAGMENT);
3701 static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
3702 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_TYPES);
3703 static const nir_shader_compiler_options gm107_fs_nir_shader_compiler_options =
3704 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_FRAGMENT);
3705 static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
3706 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_TYPES);
3707 static const nir_shader_compiler_options gv100_fs_nir_shader_compiler_options =
3708 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_FRAGMENT);
3709
3710 const nir_shader_compiler_options *
nv50_ir_nir_shader_compiler_options(int chipset,uint8_t shader_type)3711 nv50_ir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
3712 {
3713 if (chipset >= NVISA_GV100_CHIPSET) {
3714 if (shader_type == PIPE_SHADER_FRAGMENT) {
3715 return &gv100_fs_nir_shader_compiler_options;
3716 } else {
3717 return &gv100_nir_shader_compiler_options;
3718 }
3719 }
3720
3721 if (chipset >= NVISA_GM107_CHIPSET) {
3722 if (shader_type == PIPE_SHADER_FRAGMENT) {
3723 return &gm107_fs_nir_shader_compiler_options;
3724 } else {
3725 return &gm107_nir_shader_compiler_options;
3726 }
3727 }
3728
3729 if (chipset >= NVISA_GF100_CHIPSET) {
3730 if (shader_type == PIPE_SHADER_FRAGMENT) {
3731 return &gf100_fs_nir_shader_compiler_options;
3732 } else {
3733 return &gf100_nir_shader_compiler_options;
3734 }
3735 }
3736
3737 if (shader_type == PIPE_SHADER_FRAGMENT) {
3738 return &g80_fs_nir_shader_compiler_options;
3739 } else {
3740 return &g80_nir_shader_compiler_options;
3741 }
3742 }
3743