1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 #include "codegen/nv50_ir_driver.h"
26
27 extern "C" {
28 #include "nouveau_debug.h"
29 #include "nv50/nv50_program.h"
30 }
31
32 namespace nv50_ir {
33
Modifier(operation op)34 Modifier::Modifier(operation op)
35 {
36 switch (op) {
37 case OP_NEG: bits = NV50_IR_MOD_NEG; break;
38 case OP_ABS: bits = NV50_IR_MOD_ABS; break;
39 case OP_SAT: bits = NV50_IR_MOD_SAT; break;
40 case OP_NOT: bits = NV50_IR_MOD_NOT; break;
41 default:
42 bits = 0;
43 break;
44 }
45 }
46
operator *(const Modifier m) const47 Modifier Modifier::operator*(const Modifier m) const
48 {
49 unsigned int a, b, c;
50
51 b = m.bits;
52 if (this->bits & NV50_IR_MOD_ABS)
53 b &= ~NV50_IR_MOD_NEG;
54
55 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
56 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
57
58 return Modifier(a | c);
59 }
60
ValueRef(Value * v)61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
62 {
63 indirect[0] = -1;
64 indirect[1] = -1;
65 usedAsPtr = false;
66 set(v);
67 }
68
ValueRef(const ValueRef & ref)69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
70 {
71 set(ref);
72 usedAsPtr = ref.usedAsPtr;
73 }
74
~ValueRef()75 ValueRef::~ValueRef()
76 {
77 this->set(NULL);
78 }
79
getImmediate(ImmediateValue & imm) const80 bool ValueRef::getImmediate(ImmediateValue &imm) const
81 {
82 const ValueRef *src = this;
83 Modifier m;
84 DataType type = src->insn->sType;
85
86 while (src) {
87 if (src->mod) {
88 if (src->insn->sType != type)
89 break;
90 m *= src->mod;
91 }
92 if (src->getFile() == FILE_IMMEDIATE) {
93 imm = *(src->value->asImm());
94 // The immediate's type isn't required to match its use, it's
95 // more of a hint; applying a modifier makes use of that hint.
96 imm.reg.type = type;
97 m.applyTo(imm);
98 return true;
99 }
100
101 Instruction *insn = src->value->getUniqueInsn();
102
103 if (insn && insn->op == OP_MOV) {
104 src = &insn->src(0);
105 if (src->mod)
106 WARN("OP_MOV with modifier encountered !\n");
107 } else {
108 src = NULL;
109 }
110 }
111 return false;
112 }
113
ValueDef(Value * v)114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
115 {
116 set(v);
117 }
118
ValueDef(const ValueDef & def)119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
120 {
121 set(def.get());
122 }
123
~ValueDef()124 ValueDef::~ValueDef()
125 {
126 this->set(NULL);
127 }
128
129 void
set(const ValueRef & ref)130 ValueRef::set(const ValueRef &ref)
131 {
132 this->set(ref.get());
133 mod = ref.mod;
134 indirect[0] = ref.indirect[0];
135 indirect[1] = ref.indirect[1];
136 }
137
138 void
set(Value * refVal)139 ValueRef::set(Value *refVal)
140 {
141 if (value == refVal)
142 return;
143 if (value)
144 value->uses.erase(this);
145 if (refVal)
146 refVal->uses.insert(this);
147
148 value = refVal;
149 }
150
151 void
set(Value * defVal)152 ValueDef::set(Value *defVal)
153 {
154 if (value == defVal)
155 return;
156 if (value)
157 value->defs.remove(this);
158 if (defVal)
159 defVal->defs.push_back(this);
160
161 value = defVal;
162 }
163
164 // Check if we can replace this definition's value by the value in @rep,
165 // including the source modifiers, i.e. make sure that all uses support
166 // @rep.mod.
167 bool
mayReplace(const ValueRef & rep)168 ValueDef::mayReplace(const ValueRef &rep)
169 {
170 if (!rep.mod)
171 return true;
172
173 if (!insn || !insn->bb) // Unbound instruction ?
174 return false;
175
176 const Target *target = insn->bb->getProgram()->getTarget();
177
178 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
179 ++it) {
180 Instruction *insn = (*it)->getInsn();
181 int s = -1;
182
183 for (int i = 0; insn->srcExists(i); ++i) {
184 if (insn->src(i).get() == value) {
185 // If there are multiple references to us we'd have to check if the
186 // combination of mods is still supported, but just bail for now.
187 if (&insn->src(i) != (*it))
188 return false;
189 s = i;
190 }
191 }
192 assert(s >= 0); // integrity of uses list
193
194 if (!target->isModSupported(insn, s, rep.mod))
195 return false;
196 }
197 return true;
198 }
199
200 void
replace(const ValueRef & repVal,bool doSet)201 ValueDef::replace(const ValueRef &repVal, bool doSet)
202 {
203 assert(mayReplace(repVal));
204
205 if (value == repVal.get())
206 return;
207
208 while (!value->uses.empty()) {
209 ValueRef *ref = *value->uses.begin();
210 ref->set(repVal.get());
211 ref->mod *= repVal.mod;
212 }
213
214 if (doSet)
215 set(repVal.get());
216 }
217
Value()218 Value::Value()
219 {
220 join = this;
221 memset(®, 0, sizeof(reg));
222 reg.size = 4;
223 }
224
LValue(Function * fn,DataFile file)225 LValue::LValue(Function *fn, DataFile file)
226 {
227 reg.file = file;
228 reg.size = (file != FILE_PREDICATE) ? 4 : 1;
229 reg.data.id = -1;
230
231 compMask = 0;
232 compound = 0;
233 ssa = 0;
234 fixedReg = 0;
235 noSpill = 0;
236
237 fn->add(this, this->id);
238 }
239
LValue(Function * fn,LValue * lval)240 LValue::LValue(Function *fn, LValue *lval)
241 {
242 assert(lval);
243
244 reg.file = lval->reg.file;
245 reg.size = lval->reg.size;
246 reg.data.id = -1;
247
248 compMask = 0;
249 compound = 0;
250 ssa = 0;
251 fixedReg = 0;
252 noSpill = 0;
253
254 fn->add(this, this->id);
255 }
256
257 LValue *
clone(ClonePolicy<Function> & pol) const258 LValue::clone(ClonePolicy<Function>& pol) const
259 {
260 LValue *that = new_LValue(pol.context(), reg.file);
261
262 pol.set<Value>(this, that);
263
264 that->reg.size = this->reg.size;
265 that->reg.type = this->reg.type;
266 that->reg.data = this->reg.data;
267
268 return that;
269 }
270
271 bool
isUniform() const272 LValue::isUniform() const
273 {
274 if (defs.size() > 1)
275 return false;
276 Instruction *insn = getInsn();
277 if (!insn)
278 return false;
279 // let's not try too hard here for now ...
280 return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
281 }
282
Symbol(Program * prog,DataFile f,ubyte fidx)283 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
284 {
285 baseSym = NULL;
286
287 reg.file = f;
288 reg.fileIndex = fidx;
289 reg.data.offset = 0;
290
291 prog->add(this, this->id);
292 }
293
294 Symbol *
clone(ClonePolicy<Function> & pol) const295 Symbol::clone(ClonePolicy<Function>& pol) const
296 {
297 Program *prog = pol.context()->getProgram();
298
299 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
300
301 pol.set<Value>(this, that);
302
303 that->reg.size = this->reg.size;
304 that->reg.type = this->reg.type;
305 that->reg.data = this->reg.data;
306
307 that->baseSym = this->baseSym;
308
309 return that;
310 }
311
312 bool
isUniform() const313 Symbol::isUniform() const
314 {
315 return
316 reg.file != FILE_SYSTEM_VALUE &&
317 reg.file != FILE_MEMORY_LOCAL &&
318 reg.file != FILE_SHADER_INPUT;
319 }
320
ImmediateValue(Program * prog,uint32_t uval)321 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
322 {
323 memset(®, 0, sizeof(reg));
324
325 reg.file = FILE_IMMEDIATE;
326 reg.size = 4;
327 reg.type = TYPE_U32;
328
329 reg.data.u32 = uval;
330
331 prog->add(this, this->id);
332 }
333
ImmediateValue(Program * prog,float fval)334 ImmediateValue::ImmediateValue(Program *prog, float fval)
335 {
336 memset(®, 0, sizeof(reg));
337
338 reg.file = FILE_IMMEDIATE;
339 reg.size = 4;
340 reg.type = TYPE_F32;
341
342 reg.data.f32 = fval;
343
344 prog->add(this, this->id);
345 }
346
ImmediateValue(Program * prog,double dval)347 ImmediateValue::ImmediateValue(Program *prog, double dval)
348 {
349 memset(®, 0, sizeof(reg));
350
351 reg.file = FILE_IMMEDIATE;
352 reg.size = 8;
353 reg.type = TYPE_F64;
354
355 reg.data.f64 = dval;
356
357 prog->add(this, this->id);
358 }
359
ImmediateValue(const ImmediateValue * proto,DataType ty)360 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
361 {
362 reg = proto->reg;
363
364 reg.type = ty;
365 reg.size = typeSizeof(ty);
366 }
367
368 ImmediateValue *
clone(ClonePolicy<Function> & pol) const369 ImmediateValue::clone(ClonePolicy<Function>& pol) const
370 {
371 Program *prog = pol.context()->getProgram();
372 ImmediateValue *that = new_ImmediateValue(prog, 0u);
373
374 pol.set<Value>(this, that);
375
376 that->reg.size = this->reg.size;
377 that->reg.type = this->reg.type;
378 that->reg.data = this->reg.data;
379
380 return that;
381 }
382
383 bool
isInteger(const int i) const384 ImmediateValue::isInteger(const int i) const
385 {
386 switch (reg.type) {
387 case TYPE_S8:
388 return reg.data.s8 == i;
389 case TYPE_U8:
390 return reg.data.u8 == i;
391 case TYPE_S16:
392 return reg.data.s16 == i;
393 case TYPE_U16:
394 return reg.data.u16 == i;
395 case TYPE_S32:
396 case TYPE_U32:
397 return reg.data.s32 == i; // as if ...
398 case TYPE_S64:
399 case TYPE_U64:
400 return reg.data.s64 == i; // as if ...
401 case TYPE_F32:
402 return reg.data.f32 == static_cast<float>(i);
403 case TYPE_F64:
404 return reg.data.f64 == static_cast<double>(i);
405 default:
406 return false;
407 }
408 }
409
410 bool
isNegative() const411 ImmediateValue::isNegative() const
412 {
413 switch (reg.type) {
414 case TYPE_S8: return reg.data.s8 < 0;
415 case TYPE_S16: return reg.data.s16 < 0;
416 case TYPE_S32:
417 case TYPE_U32: return reg.data.s32 < 0;
418 case TYPE_F32: return reg.data.u32 & (1 << 31);
419 case TYPE_F64: return reg.data.u64 & (1ULL << 63);
420 default:
421 return false;
422 }
423 }
424
425 bool
isPow2() const426 ImmediateValue::isPow2() const
427 {
428 if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
429 return util_is_power_of_two_or_zero64(reg.data.u64);
430 else
431 return util_is_power_of_two_or_zero(reg.data.u32);
432 }
433
434 void
applyLog2()435 ImmediateValue::applyLog2()
436 {
437 switch (reg.type) {
438 case TYPE_S8:
439 case TYPE_S16:
440 case TYPE_S32:
441 assert(!this->isNegative());
442 // fall through
443 case TYPE_U8:
444 case TYPE_U16:
445 case TYPE_U32:
446 reg.data.u32 = util_logbase2(reg.data.u32);
447 break;
448 case TYPE_S64:
449 assert(!this->isNegative());
450 // fall through
451 case TYPE_U64:
452 reg.data.u64 = util_logbase2_64(reg.data.u64);
453 break;
454 case TYPE_F32:
455 reg.data.f32 = log2f(reg.data.f32);
456 break;
457 case TYPE_F64:
458 reg.data.f64 = log2(reg.data.f64);
459 break;
460 default:
461 assert(0);
462 break;
463 }
464 }
465
466 bool
compare(CondCode cc,float fval) const467 ImmediateValue::compare(CondCode cc, float fval) const
468 {
469 if (reg.type != TYPE_F32)
470 ERROR("immediate value is not of type f32");
471
472 switch (static_cast<CondCode>(cc & 7)) {
473 case CC_TR: return true;
474 case CC_FL: return false;
475 case CC_LT: return reg.data.f32 < fval;
476 case CC_LE: return reg.data.f32 <= fval;
477 case CC_GT: return reg.data.f32 > fval;
478 case CC_GE: return reg.data.f32 >= fval;
479 case CC_EQ: return reg.data.f32 == fval;
480 case CC_NE: return reg.data.f32 != fval;
481 default:
482 assert(0);
483 return false;
484 }
485 }
486
487 ImmediateValue&
operator =(const ImmediateValue & that)488 ImmediateValue::operator=(const ImmediateValue &that)
489 {
490 this->reg = that.reg;
491 return (*this);
492 }
493
494 bool
interfers(const Value * that) const495 Value::interfers(const Value *that) const
496 {
497 uint32_t idA, idB;
498
499 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
500 return false;
501 if (this->asImm())
502 return false;
503
504 if (this->asSym()) {
505 idA = this->join->reg.data.offset;
506 idB = that->join->reg.data.offset;
507 } else {
508 idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
509 idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
510 }
511
512 if (idA < idB)
513 return (idA + this->reg.size > idB);
514 else
515 if (idA > idB)
516 return (idB + that->reg.size > idA);
517 else
518 return (idA == idB);
519 }
520
521 bool
equals(const Value * that,bool strict) const522 Value::equals(const Value *that, bool strict) const
523 {
524 if (strict)
525 return this == that;
526
527 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
528 return false;
529 if (that->reg.size != this->reg.size)
530 return false;
531
532 if (that->reg.data.id != this->reg.data.id)
533 return false;
534
535 return true;
536 }
537
538 bool
equals(const Value * that,bool strict) const539 ImmediateValue::equals(const Value *that, bool strict) const
540 {
541 const ImmediateValue *imm = that->asImm();
542 if (!imm)
543 return false;
544 return reg.data.u64 == imm->reg.data.u64;
545 }
546
547 bool
equals(const Value * that,bool strict) const548 Symbol::equals(const Value *that, bool strict) const
549 {
550 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
551 return false;
552 assert(that->asSym());
553
554 if (this->baseSym != that->asSym()->baseSym)
555 return false;
556
557 if (reg.file == FILE_SYSTEM_VALUE)
558 return (this->reg.data.sv.sv == that->reg.data.sv.sv &&
559 this->reg.data.sv.index == that->reg.data.sv.index);
560 return this->reg.data.offset == that->reg.data.offset;
561 }
562
init()563 void Instruction::init()
564 {
565 next = prev = 0;
566
567 cc = CC_ALWAYS;
568 rnd = ROUND_N;
569 cache = CACHE_CA;
570 subOp = 0;
571
572 saturate = 0;
573 join = 0;
574 exit = 0;
575 terminator = 0;
576 ftz = 0;
577 dnz = 0;
578 perPatch = 0;
579 fixed = 0;
580 encSize = 0;
581 ipa = 0;
582 mask = 0;
583 precise = 0;
584
585 lanes = 0xf;
586
587 postFactor = 0;
588
589 predSrc = -1;
590 flagsDef = -1;
591 flagsSrc = -1;
592 }
593
Instruction()594 Instruction::Instruction()
595 {
596 init();
597
598 op = OP_NOP;
599 dType = sType = TYPE_F32;
600
601 id = -1;
602 bb = 0;
603 }
604
Instruction(Function * fn,operation opr,DataType ty)605 Instruction::Instruction(Function *fn, operation opr, DataType ty)
606 {
607 init();
608
609 op = opr;
610 dType = sType = ty;
611
612 fn->add(this, id);
613 }
614
~Instruction()615 Instruction::~Instruction()
616 {
617 if (bb) {
618 Function *fn = bb->getFunction();
619 bb->remove(this);
620 fn->allInsns.remove(id);
621 }
622
623 for (int s = 0; srcExists(s); ++s)
624 setSrc(s, NULL);
625 // must unlink defs too since the list pointers will get deallocated
626 for (int d = 0; defExists(d); ++d)
627 setDef(d, NULL);
628 }
629
630 void
setDef(int i,Value * val)631 Instruction::setDef(int i, Value *val)
632 {
633 int size = defs.size();
634 if (i >= size) {
635 defs.resize(i + 1);
636 while (size <= i)
637 defs[size++].setInsn(this);
638 }
639 defs[i].set(val);
640 }
641
642 void
setSrc(int s,Value * val)643 Instruction::setSrc(int s, Value *val)
644 {
645 int size = srcs.size();
646 if (s >= size) {
647 srcs.resize(s + 1);
648 while (size <= s)
649 srcs[size++].setInsn(this);
650 }
651 srcs[s].set(val);
652 }
653
654 void
setSrc(int s,const ValueRef & ref)655 Instruction::setSrc(int s, const ValueRef& ref)
656 {
657 setSrc(s, ref.get());
658 srcs[s].mod = ref.mod;
659 }
660
661 void
swapSources(int a,int b)662 Instruction::swapSources(int a, int b)
663 {
664 Value *value = srcs[a].get();
665 Modifier m = srcs[a].mod;
666
667 setSrc(a, srcs[b]);
668
669 srcs[b].set(value);
670 srcs[b].mod = m;
671 }
672
moveSourcesAdjustIndex(int8_t & index,int s,int delta)673 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
674 {
675 if (index >= s)
676 index += delta;
677 else
678 if ((delta < 0) && (index >= (s + delta)))
679 index = -1;
680 }
681
682 // Moves sources [@s,last_source] by @delta.
683 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
684 void
moveSources(const int s,const int delta)685 Instruction::moveSources(const int s, const int delta)
686 {
687 if (delta == 0)
688 return;
689 assert(s + delta >= 0);
690
691 int k;
692
693 for (k = 0; srcExists(k); ++k) {
694 for (int i = 0; i < 2; ++i)
695 moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
696 }
697 moveSourcesAdjustIndex(predSrc, s, delta);
698 moveSourcesAdjustIndex(flagsSrc, s, delta);
699 if (asTex()) {
700 TexInstruction *tex = asTex();
701 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
702 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
703 }
704
705 if (delta > 0) {
706 --k;
707 for (int p = k + delta; k >= s; --k, --p)
708 setSrc(p, src(k));
709 } else {
710 int p;
711 for (p = s; p < k; ++p)
712 setSrc(p + delta, src(p));
713 for (; (p + delta) < k; ++p)
714 setSrc(p + delta, NULL);
715 }
716 }
717
718 void
takeExtraSources(int s,Value * values[3])719 Instruction::takeExtraSources(int s, Value *values[3])
720 {
721 values[0] = getIndirect(s, 0);
722 if (values[0])
723 setIndirect(s, 0, NULL);
724
725 values[1] = getIndirect(s, 1);
726 if (values[1])
727 setIndirect(s, 1, NULL);
728
729 values[2] = getPredicate();
730 if (values[2])
731 setPredicate(cc, NULL);
732 }
733
734 void
putExtraSources(int s,Value * values[3])735 Instruction::putExtraSources(int s, Value *values[3])
736 {
737 if (values[0])
738 setIndirect(s, 0, values[0]);
739 if (values[1])
740 setIndirect(s, 1, values[1]);
741 if (values[2])
742 setPredicate(cc, values[2]);
743 }
744
745 Instruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const746 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
747 {
748 if (!i)
749 i = new_Instruction(pol.context(), op, dType);
750 #ifndef NDEBUG // non-conformant assert, so this is required
751 assert(typeid(*i) == typeid(*this));
752 #endif
753
754 pol.set<Instruction>(this, i);
755
756 i->sType = sType;
757
758 i->rnd = rnd;
759 i->cache = cache;
760 i->subOp = subOp;
761
762 i->saturate = saturate;
763 i->join = join;
764 i->exit = exit;
765 i->mask = mask;
766 i->ftz = ftz;
767 i->dnz = dnz;
768 i->ipa = ipa;
769 i->lanes = lanes;
770 i->perPatch = perPatch;
771
772 i->postFactor = postFactor;
773
774 for (int d = 0; defExists(d); ++d)
775 i->setDef(d, pol.get(getDef(d)));
776
777 for (int s = 0; srcExists(s); ++s) {
778 i->setSrc(s, pol.get(getSrc(s)));
779 i->src(s).mod = src(s).mod;
780 }
781
782 i->cc = cc;
783 i->predSrc = predSrc;
784 i->flagsDef = flagsDef;
785 i->flagsSrc = flagsSrc;
786
787 return i;
788 }
789
790 unsigned int
defCount(unsigned int mask,bool singleFile) const791 Instruction::defCount(unsigned int mask, bool singleFile) const
792 {
793 unsigned int i, n;
794
795 if (singleFile) {
796 unsigned int d = ffs(mask);
797 if (!d)
798 return 0;
799 for (i = d--; defExists(i); ++i)
800 if (getDef(i)->reg.file != getDef(d)->reg.file)
801 mask &= ~(1 << i);
802 }
803
804 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
805 n += mask & 1;
806 return n;
807 }
808
809 unsigned int
srcCount(unsigned int mask,bool singleFile) const810 Instruction::srcCount(unsigned int mask, bool singleFile) const
811 {
812 unsigned int i, n;
813
814 if (singleFile) {
815 unsigned int s = ffs(mask);
816 if (!s)
817 return 0;
818 for (i = s--; srcExists(i); ++i)
819 if (getSrc(i)->reg.file != getSrc(s)->reg.file)
820 mask &= ~(1 << i);
821 }
822
823 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
824 n += mask & 1;
825 return n;
826 }
827
828 bool
setIndirect(int s,int dim,Value * value)829 Instruction::setIndirect(int s, int dim, Value *value)
830 {
831 assert(this->srcExists(s));
832
833 int p = srcs[s].indirect[dim];
834 if (p < 0) {
835 if (!value)
836 return true;
837 p = srcs.size();
838 while (p > 0 && !srcExists(p - 1))
839 --p;
840 }
841 setSrc(p, value);
842 srcs[p].usedAsPtr = (value != 0);
843 srcs[s].indirect[dim] = value ? p : -1;
844 return true;
845 }
846
847 bool
setPredicate(CondCode ccode,Value * value)848 Instruction::setPredicate(CondCode ccode, Value *value)
849 {
850 cc = ccode;
851
852 if (!value) {
853 if (predSrc >= 0) {
854 srcs[predSrc].set(NULL);
855 predSrc = -1;
856 }
857 return true;
858 }
859
860 if (predSrc < 0) {
861 predSrc = srcs.size();
862 while (predSrc > 0 && !srcExists(predSrc - 1))
863 --predSrc;
864 }
865
866 setSrc(predSrc, value);
867 return true;
868 }
869
870 bool
writesPredicate() const871 Instruction::writesPredicate() const
872 {
873 for (int d = 0; defExists(d); ++d)
874 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
875 return true;
876 return false;
877 }
878
879 bool
canCommuteDefSrc(const Instruction * i) const880 Instruction::canCommuteDefSrc(const Instruction *i) const
881 {
882 for (int d = 0; defExists(d); ++d)
883 for (int s = 0; i->srcExists(s); ++s)
884 if (getDef(d)->interfers(i->getSrc(s)))
885 return false;
886 return true;
887 }
888
889 bool
canCommuteDefDef(const Instruction * i) const890 Instruction::canCommuteDefDef(const Instruction *i) const
891 {
892 for (int d = 0; defExists(d); ++d)
893 for (int c = 0; i->defExists(c); ++c)
894 if (getDef(d)->interfers(i->getDef(c)))
895 return false;
896 return true;
897 }
898
899 bool
isCommutationLegal(const Instruction * i) const900 Instruction::isCommutationLegal(const Instruction *i) const
901 {
902 return canCommuteDefDef(i) &&
903 canCommuteDefSrc(i) &&
904 i->canCommuteDefSrc(this);
905 }
906
TexInstruction(Function * fn,operation op)907 TexInstruction::TexInstruction(Function *fn, operation op)
908 : Instruction(fn, op, TYPE_F32), tex()
909 {
910 tex.rIndirectSrc = -1;
911 tex.sIndirectSrc = -1;
912
913 if (op == OP_TXF)
914 sType = TYPE_U32;
915 }
916
~TexInstruction()917 TexInstruction::~TexInstruction()
918 {
919 for (int c = 0; c < 3; ++c) {
920 dPdx[c].set(NULL);
921 dPdy[c].set(NULL);
922 }
923 for (int n = 0; n < 4; ++n)
924 for (int c = 0; c < 3; ++c)
925 offset[n][c].set(NULL);
926 }
927
928 TexInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const929 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
930 {
931 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
932 new_TexInstruction(pol.context(), op));
933
934 Instruction::clone(pol, tex);
935
936 tex->tex = this->tex;
937
938 if (op == OP_TXD) {
939 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
940 tex->dPdx[c].set(dPdx[c]);
941 tex->dPdy[c].set(dPdy[c]);
942 }
943 }
944
945 for (int n = 0; n < tex->tex.useOffsets; ++n)
946 for (int c = 0; c < 3; ++c)
947 tex->offset[n][c].set(offset[n][c]);
948
949 return tex;
950 }
951
952 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
953 {
954 { "1D", 1, 1, false, false, false },
955 { "2D", 2, 2, false, false, false },
956 { "2D_MS", 2, 3, false, false, false },
957 { "3D", 3, 3, false, false, false },
958 { "CUBE", 2, 3, false, true, false },
959 { "1D_SHADOW", 1, 1, false, false, true },
960 { "2D_SHADOW", 2, 2, false, false, true },
961 { "CUBE_SHADOW", 2, 3, false, true, true },
962 { "1D_ARRAY", 1, 2, true, false, false },
963 { "2D_ARRAY", 2, 3, true, false, false },
964 { "2D_MS_ARRAY", 2, 4, true, false, false },
965 { "CUBE_ARRAY", 2, 4, true, true, false },
966 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
967 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
968 { "RECT", 2, 2, false, false, false },
969 { "RECT_SHADOW", 2, 2, false, false, true },
970 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
971 { "BUFFER", 1, 1, false, false, false },
972 };
973
974 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
975 {
976 { "NONE", 0, { 0, 0, 0, 0 }, UINT },
977
978 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT },
979 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT },
980 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT },
981 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT },
982 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT },
983 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT },
984 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT },
985
986 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT },
987 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT },
988 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT },
989 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT },
990 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT },
991 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT },
992 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT },
993 { "R32UI", 1, { 32, 0, 0, 0 }, UINT },
994 { "R16UI", 1, { 16, 0, 0, 0 }, UINT },
995 { "R8UI", 1, { 8, 0, 0, 0 }, UINT },
996
997 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT },
998 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT },
999 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT },
1000 { "RG32I", 2, { 32, 32, 0, 0 }, SINT },
1001 { "RG16I", 2, { 16, 16, 0, 0 }, SINT },
1002 { "RG8I", 2, { 8, 8, 0, 0 }, SINT },
1003 { "R32I", 1, { 32, 0, 0, 0 }, SINT },
1004 { "R16I", 1, { 16, 0, 0, 0 }, SINT },
1005 { "R8I", 1, { 8, 0, 0, 0 }, SINT },
1006
1007 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM },
1008 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM },
1009 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM },
1010 { "RG16", 2, { 16, 16, 0, 0 }, UNORM },
1011 { "RG8", 2, { 8, 8, 0, 0 }, UNORM },
1012 { "R16", 1, { 16, 0, 0, 0 }, UNORM },
1013 { "R8", 1, { 8, 0, 0, 0 }, UNORM },
1014
1015 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1016 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM },
1017 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM },
1018 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM },
1019 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM },
1020 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM },
1021
1022 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true },
1023 };
1024
1025 const struct TexInstruction::ImgFormatDesc *
translateImgFormat(enum pipe_format format)1026 TexInstruction::translateImgFormat(enum pipe_format format)
1027 {
1028
1029 #define FMT_CASE(a, b) \
1030 case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1031
1032 switch (format) {
1033 FMT_CASE(NONE, NONE);
1034
1035 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1036 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1037 FMT_CASE(R32G32_FLOAT, RG32F);
1038 FMT_CASE(R16G16_FLOAT, RG16F);
1039 FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1040 FMT_CASE(R32_FLOAT, R32F);
1041 FMT_CASE(R16_FLOAT, R16F);
1042
1043 FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1044 FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1045 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1046 FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1047 FMT_CASE(R32G32_UINT, RG32UI);
1048 FMT_CASE(R16G16_UINT, RG16UI);
1049 FMT_CASE(R8G8_UINT, RG8UI);
1050 FMT_CASE(R32_UINT, R32UI);
1051 FMT_CASE(R16_UINT, R16UI);
1052 FMT_CASE(R8_UINT, R8UI);
1053
1054 FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1055 FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1056 FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1057 FMT_CASE(R32G32_SINT, RG32I);
1058 FMT_CASE(R16G16_SINT, RG16I);
1059 FMT_CASE(R8G8_SINT, RG8I);
1060 FMT_CASE(R32_SINT, R32I);
1061 FMT_CASE(R16_SINT, R16I);
1062 FMT_CASE(R8_SINT, R8I);
1063
1064 FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1065 FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1066 FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1067 FMT_CASE(R16G16_UNORM, RG16);
1068 FMT_CASE(R8G8_UNORM, RG8);
1069 FMT_CASE(R16_UNORM, R16);
1070 FMT_CASE(R8_UNORM, R8);
1071
1072 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1073 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1074 FMT_CASE(R16G16_SNORM, RG16_SNORM);
1075 FMT_CASE(R8G8_SNORM, RG8_SNORM);
1076 FMT_CASE(R16_SNORM, R16_SNORM);
1077 FMT_CASE(R8_SNORM, R8_SNORM);
1078
1079 FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1080
1081 default:
1082 assert(!"Unexpected format");
1083 return &formatTable[nv50_ir::FMT_NONE];
1084 }
1085 }
1086
1087 void
setIndirectR(Value * v)1088 TexInstruction::setIndirectR(Value *v)
1089 {
1090 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1091 if (p >= 0) {
1092 tex.rIndirectSrc = p;
1093 setSrc(p, v);
1094 srcs[p].usedAsPtr = !!v;
1095 }
1096 }
1097
1098 void
setIndirectS(Value * v)1099 TexInstruction::setIndirectS(Value *v)
1100 {
1101 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1102 if (p >= 0) {
1103 tex.sIndirectSrc = p;
1104 setSrc(p, v);
1105 srcs[p].usedAsPtr = !!v;
1106 }
1107 }
1108
CmpInstruction(Function * fn,operation op)1109 CmpInstruction::CmpInstruction(Function *fn, operation op)
1110 : Instruction(fn, op, TYPE_F32)
1111 {
1112 setCond = CC_ALWAYS;
1113 }
1114
1115 CmpInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1116 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1117 {
1118 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1119 new_CmpInstruction(pol.context(), op));
1120 cmp->dType = dType;
1121 Instruction::clone(pol, cmp);
1122 cmp->setCond = setCond;
1123 return cmp;
1124 }
1125
FlowInstruction(Function * fn,operation op,void * targ)1126 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1127 : Instruction(fn, op, TYPE_NONE)
1128 {
1129 if (op == OP_CALL)
1130 target.fn = reinterpret_cast<Function *>(targ);
1131 else
1132 target.bb = reinterpret_cast<BasicBlock *>(targ);
1133
1134 if (op == OP_BRA ||
1135 op == OP_CONT || op == OP_BREAK ||
1136 op == OP_RET || op == OP_EXIT)
1137 terminator = 1;
1138 else
1139 if (op == OP_JOIN)
1140 terminator = targ ? 1 : 0;
1141
1142 allWarp = absolute = limit = builtin = indirect = 0;
1143 }
1144
1145 FlowInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1146 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1147 {
1148 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1149 new_FlowInstruction(pol.context(), op, NULL));
1150
1151 Instruction::clone(pol, flow);
1152 flow->allWarp = allWarp;
1153 flow->absolute = absolute;
1154 flow->limit = limit;
1155 flow->builtin = builtin;
1156
1157 if (builtin)
1158 flow->target.builtin = target.builtin;
1159 else
1160 if (op == OP_CALL)
1161 flow->target.fn = target.fn;
1162 else
1163 if (target.bb)
1164 flow->target.bb = pol.get<BasicBlock>(target.bb);
1165
1166 return flow;
1167 }
1168
Program(Type type,Target * arch)1169 Program::Program(Type type, Target *arch)
1170 : progType(type),
1171 target(arch),
1172 mem_Instruction(sizeof(Instruction), 6),
1173 mem_CmpInstruction(sizeof(CmpInstruction), 4),
1174 mem_TexInstruction(sizeof(TexInstruction), 4),
1175 mem_FlowInstruction(sizeof(FlowInstruction), 4),
1176 mem_LValue(sizeof(LValue), 8),
1177 mem_Symbol(sizeof(Symbol), 7),
1178 mem_ImmediateValue(sizeof(ImmediateValue), 7)
1179 {
1180 code = NULL;
1181 binSize = 0;
1182
1183 maxGPR = -1;
1184 fp64 = false;
1185 persampleInvocation = false;
1186
1187 main = new Function(this, "MAIN", ~0);
1188 calls.insert(&main->call);
1189
1190 dbgFlags = 0;
1191 optLevel = 0;
1192
1193 targetPriv = NULL;
1194 }
1195
~Program()1196 Program::~Program()
1197 {
1198 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1199 delete reinterpret_cast<Function *>(it.get());
1200
1201 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1202 releaseValue(reinterpret_cast<Value *>(it.get()));
1203 }
1204
releaseInstruction(Instruction * insn)1205 void Program::releaseInstruction(Instruction *insn)
1206 {
1207 // TODO: make this not suck so much
1208
1209 insn->~Instruction();
1210
1211 if (insn->asCmp())
1212 mem_CmpInstruction.release(insn);
1213 else
1214 if (insn->asTex())
1215 mem_TexInstruction.release(insn);
1216 else
1217 if (insn->asFlow())
1218 mem_FlowInstruction.release(insn);
1219 else
1220 mem_Instruction.release(insn);
1221 }
1222
releaseValue(Value * value)1223 void Program::releaseValue(Value *value)
1224 {
1225 value->~Value();
1226
1227 if (value->asLValue())
1228 mem_LValue.release(value);
1229 else
1230 if (value->asImm())
1231 mem_ImmediateValue.release(value);
1232 else
1233 if (value->asSym())
1234 mem_Symbol.release(value);
1235 }
1236
1237
1238 } // namespace nv50_ir
1239
1240 extern "C" {
1241
1242 static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1243 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1244 struct nv50_ir_prog_info_out *info_out)
1245 {
1246 info_out->target = info->target;
1247 info_out->type = info->type;
1248 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1249 info_out->prop.tp.domain = PIPE_PRIM_MAX;
1250 info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1251 }
1252 if (info->type == PIPE_SHADER_GEOMETRY) {
1253 info_out->prop.gp.instanceCount = 1;
1254 info_out->prop.gp.maxVertices = 1;
1255 }
1256 if (info->type == PIPE_SHADER_COMPUTE) {
1257 info->prop.cp.numThreads[0] =
1258 info->prop.cp.numThreads[1] =
1259 info->prop.cp.numThreads[2] = 1;
1260 }
1261 info_out->bin.smemSize = info->bin.smemSize;
1262 info_out->io.genUserClip = info->io.genUserClip;
1263 info_out->io.instanceId = 0xff;
1264 info_out->io.vertexId = 0xff;
1265 info_out->io.edgeFlagIn = 0xff;
1266 info_out->io.edgeFlagOut = 0xff;
1267 info_out->io.fragDepth = 0xff;
1268 info_out->io.sampleMask = 0xff;
1269 }
1270
1271 int
nv50_ir_generate_code(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1272 nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1273 struct nv50_ir_prog_info_out *info_out)
1274 {
1275 int ret = 0;
1276
1277 nv50_ir::Program::Type type;
1278
1279 nv50_ir_init_prog_info(info, info_out);
1280
1281 #define PROG_TYPE_CASE(a, b) \
1282 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1283
1284 switch (info->type) {
1285 PROG_TYPE_CASE(VERTEX, VERTEX);
1286 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1287 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1288 PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1289 PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1290 PROG_TYPE_CASE(COMPUTE, COMPUTE);
1291 default:
1292 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1293 return -1;
1294 }
1295 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1296
1297 nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1298 if (!targ)
1299 return -1;
1300
1301 nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1302 if (!prog) {
1303 nv50_ir::Target::destroy(targ);
1304 return -1;
1305 }
1306 prog->driver = info;
1307 prog->driver_out = info_out;
1308 prog->dbgFlags = info->dbgFlags;
1309 prog->optLevel = info->optLevel;
1310
1311 switch (info->bin.sourceRep) {
1312 case PIPE_SHADER_IR_NIR:
1313 ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1314 break;
1315 case PIPE_SHADER_IR_TGSI:
1316 ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1317 break;
1318 default:
1319 ret = -1;
1320 break;
1321 }
1322 if (ret < 0)
1323 goto out;
1324 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1325 prog->print();
1326
1327 targ->parseDriverInfo(info, info_out);
1328 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1329
1330 prog->convertToSSA();
1331
1332 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1333 prog->print();
1334
1335 prog->optimizeSSA(info->optLevel);
1336 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1337
1338 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1339 prog->print();
1340
1341 if (!prog->registerAllocation()) {
1342 ret = -4;
1343 goto out;
1344 }
1345 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1346
1347 prog->optimizePostRA(info->optLevel);
1348
1349 if (!prog->emitBinary(info_out)) {
1350 ret = -5;
1351 goto out;
1352 }
1353
1354 out:
1355 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1356
1357 info_out->bin.maxGPR = prog->maxGPR;
1358 info_out->bin.code = prog->code;
1359 info_out->bin.codeSize = prog->binSize;
1360 info_out->bin.tlsSpace = prog->tlsSize;
1361
1362 delete prog;
1363 nv50_ir::Target::destroy(targ);
1364
1365 return ret;
1366 }
1367
1368 } // extern "C"
1369