1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_driver.h"
26
27 namespace nv50_ir {
28
Modifier(operation op)29 Modifier::Modifier(operation op)
30 {
31 switch (op) {
32 case OP_NEG: bits = NV50_IR_MOD_NEG; break;
33 case OP_ABS: bits = NV50_IR_MOD_ABS; break;
34 case OP_SAT: bits = NV50_IR_MOD_SAT; break;
35 case OP_NOT: bits = NV50_IR_MOD_NOT; break;
36 default:
37 bits = 0;
38 break;
39 }
40 }
41
operator *(const Modifier m) const42 Modifier Modifier::operator*(const Modifier m) const
43 {
44 unsigned int a, b, c;
45
46 b = m.bits;
47 if (this->bits & NV50_IR_MOD_ABS)
48 b &= ~NV50_IR_MOD_NEG;
49
50 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
51 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
52
53 return Modifier(a | c);
54 }
55
ValueRef(Value * v)56 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
57 {
58 indirect[0] = -1;
59 indirect[1] = -1;
60 usedAsPtr = false;
61 set(v);
62 }
63
ValueRef(const ValueRef & ref)64 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
65 {
66 set(ref);
67 usedAsPtr = ref.usedAsPtr;
68 }
69
~ValueRef()70 ValueRef::~ValueRef()
71 {
72 this->set(NULL);
73 }
74
getImmediate(ImmediateValue & imm) const75 bool ValueRef::getImmediate(ImmediateValue &imm) const
76 {
77 const ValueRef *src = this;
78 Modifier m;
79 DataType type = src->insn->sType;
80
81 while (src) {
82 if (src->mod) {
83 if (src->insn->sType != type)
84 break;
85 m *= src->mod;
86 }
87 if (src->getFile() == FILE_IMMEDIATE) {
88 imm = *(src->value->asImm());
89 // The immediate's type isn't required to match its use, it's
90 // more of a hint; applying a modifier makes use of that hint.
91 imm.reg.type = type;
92 m.applyTo(imm);
93 return true;
94 }
95
96 Instruction *insn = src->value->getUniqueInsn();
97
98 if (insn && insn->op == OP_MOV) {
99 src = &insn->src(0);
100 if (src->mod)
101 WARN("OP_MOV with modifier encountered !\n");
102 } else {
103 src = NULL;
104 }
105 }
106 return false;
107 }
108
ValueDef(Value * v)109 ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL)
110 {
111 set(v);
112 }
113
ValueDef(const ValueDef & def)114 ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL)
115 {
116 set(def.get());
117 }
118
~ValueDef()119 ValueDef::~ValueDef()
120 {
121 this->set(NULL);
122 }
123
124 void
set(const ValueRef & ref)125 ValueRef::set(const ValueRef &ref)
126 {
127 this->set(ref.get());
128 mod = ref.mod;
129 indirect[0] = ref.indirect[0];
130 indirect[1] = ref.indirect[1];
131 }
132
133 void
set(Value * refVal)134 ValueRef::set(Value *refVal)
135 {
136 if (value == refVal)
137 return;
138 if (value)
139 value->uses.erase(this);
140 if (refVal)
141 refVal->uses.insert(this);
142
143 value = refVal;
144 }
145
146 void
set(Value * defVal)147 ValueDef::set(Value *defVal)
148 {
149 if (value == defVal)
150 return;
151 if (value)
152 value->defs.remove(this);
153 if (defVal)
154 defVal->defs.push_back(this);
155
156 value = defVal;
157 }
158
159 // Check if we can replace this definition's value by the value in @rep,
160 // including the source modifiers, i.e. make sure that all uses support
161 // @rep.mod.
162 bool
mayReplace(const ValueRef & rep)163 ValueDef::mayReplace(const ValueRef &rep)
164 {
165 if (!rep.mod)
166 return true;
167
168 if (!insn || !insn->bb) // Unbound instruction ?
169 return false;
170
171 const Target *target = insn->bb->getProgram()->getTarget();
172
173 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
174 ++it) {
175 Instruction *insn = (*it)->getInsn();
176 int s = -1;
177
178 for (int i = 0; insn->srcExists(i); ++i) {
179 if (insn->src(i).get() == value) {
180 // If there are multiple references to us we'd have to check if the
181 // combination of mods is still supported, but just bail for now.
182 if (&insn->src(i) != (*it))
183 return false;
184 s = i;
185 }
186 }
187 assert(s >= 0); // integrity of uses list
188
189 if (!target->isModSupported(insn, s, rep.mod))
190 return false;
191 }
192 return true;
193 }
194
195 void
replace(const ValueRef & repVal,bool doSet)196 ValueDef::replace(const ValueRef &repVal, bool doSet)
197 {
198 assert(mayReplace(repVal));
199
200 if (value == repVal.get())
201 return;
202
203 while (!value->uses.empty()) {
204 ValueRef *ref = *value->uses.begin();
205 ref->set(repVal.get());
206 ref->mod *= repVal.mod;
207 }
208
209 if (doSet)
210 set(repVal.get());
211 }
212
Value()213 Value::Value() : id(-1)
214 {
215 join = this;
216 memset(®, 0, sizeof(reg));
217 reg.size = 4;
218 }
219
LValue(Function * fn,DataFile file)220 LValue::LValue(Function *fn, DataFile file)
221 {
222 reg.file = file;
223 reg.size = (file != FILE_PREDICATE) ? 4 : 1;
224 reg.data.id = -1;
225
226 compMask = 0;
227 compound = 0;
228 ssa = 0;
229 fixedReg = 0;
230 noSpill = 0;
231
232 fn->add(this, this->id);
233 }
234
LValue(Function * fn,LValue * lval)235 LValue::LValue(Function *fn, LValue *lval)
236 {
237 assert(lval);
238
239 reg.file = lval->reg.file;
240 reg.size = lval->reg.size;
241 reg.data.id = -1;
242
243 compMask = 0;
244 compound = 0;
245 ssa = 0;
246 fixedReg = 0;
247 noSpill = 0;
248
249 fn->add(this, this->id);
250 }
251
252 LValue *
clone(ClonePolicy<Function> & pol) const253 LValue::clone(ClonePolicy<Function>& pol) const
254 {
255 LValue *that = new_LValue(pol.context(), reg.file);
256
257 pol.set<Value>(this, that);
258
259 that->reg.size = this->reg.size;
260 that->reg.type = this->reg.type;
261 that->reg.data = this->reg.data;
262
263 return that;
264 }
265
266 bool
isUniform() const267 LValue::isUniform() const
268 {
269 if (defs.size() > 1)
270 return false;
271 Instruction *insn = getInsn();
272 if (!insn)
273 return false;
274 // let's not try too hard here for now ...
275 return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
276 }
277
Symbol(Program * prog,DataFile f,uint8_t fidx)278 Symbol::Symbol(Program *prog, DataFile f, uint8_t fidx)
279 {
280 baseSym = NULL;
281
282 reg.file = f;
283 reg.fileIndex = fidx;
284 reg.data.offset = 0;
285
286 prog->add(this, this->id);
287 }
288
289 Symbol *
clone(ClonePolicy<Function> & pol) const290 Symbol::clone(ClonePolicy<Function>& pol) const
291 {
292 Program *prog = pol.context()->getProgram();
293
294 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
295
296 pol.set<Value>(this, that);
297
298 that->reg.size = this->reg.size;
299 that->reg.type = this->reg.type;
300 that->reg.data = this->reg.data;
301
302 that->baseSym = this->baseSym;
303
304 return that;
305 }
306
307 bool
isUniform() const308 Symbol::isUniform() const
309 {
310 return
311 reg.file != FILE_SYSTEM_VALUE &&
312 reg.file != FILE_MEMORY_LOCAL &&
313 reg.file != FILE_SHADER_INPUT;
314 }
315
ImmediateValue(Program * prog,uint32_t uval)316 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
317 {
318 memset(®, 0, sizeof(reg));
319
320 reg.file = FILE_IMMEDIATE;
321 reg.size = 4;
322 reg.type = TYPE_U32;
323
324 reg.data.u32 = uval;
325
326 prog->add(this, this->id);
327 }
328
ImmediateValue(Program * prog,float fval)329 ImmediateValue::ImmediateValue(Program *prog, float fval)
330 {
331 memset(®, 0, sizeof(reg));
332
333 reg.file = FILE_IMMEDIATE;
334 reg.size = 4;
335 reg.type = TYPE_F32;
336
337 reg.data.f32 = fval;
338
339 prog->add(this, this->id);
340 }
341
ImmediateValue(Program * prog,double dval)342 ImmediateValue::ImmediateValue(Program *prog, double dval)
343 {
344 memset(®, 0, sizeof(reg));
345
346 reg.file = FILE_IMMEDIATE;
347 reg.size = 8;
348 reg.type = TYPE_F64;
349
350 reg.data.f64 = dval;
351
352 prog->add(this, this->id);
353 }
354
ImmediateValue(const ImmediateValue * proto,DataType ty)355 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
356 {
357 reg = proto->reg;
358
359 reg.type = ty;
360 reg.size = typeSizeof(ty);
361 }
362
363 ImmediateValue *
clone(ClonePolicy<Function> & pol) const364 ImmediateValue::clone(ClonePolicy<Function>& pol) const
365 {
366 Program *prog = pol.context()->getProgram();
367 ImmediateValue *that = new_ImmediateValue(prog, 0u);
368
369 pol.set<Value>(this, that);
370
371 that->reg.size = this->reg.size;
372 that->reg.type = this->reg.type;
373 that->reg.data = this->reg.data;
374
375 return that;
376 }
377
378 bool
isInteger(const int i) const379 ImmediateValue::isInteger(const int i) const
380 {
381 switch (reg.type) {
382 case TYPE_S8:
383 return reg.data.s8 == i;
384 case TYPE_U8:
385 return reg.data.u8 == i;
386 case TYPE_S16:
387 return reg.data.s16 == i;
388 case TYPE_U16:
389 return reg.data.u16 == i;
390 case TYPE_S32:
391 case TYPE_U32:
392 return reg.data.s32 == i; // as if ...
393 case TYPE_S64:
394 case TYPE_U64:
395 return reg.data.s64 == i; // as if ...
396 case TYPE_F32:
397 return reg.data.f32 == static_cast<float>(i);
398 case TYPE_F64:
399 return reg.data.f64 == static_cast<double>(i);
400 default:
401 return false;
402 }
403 }
404
405 bool
isNegative() const406 ImmediateValue::isNegative() const
407 {
408 switch (reg.type) {
409 case TYPE_S8: return reg.data.s8 < 0;
410 case TYPE_S16: return reg.data.s16 < 0;
411 case TYPE_S32:
412 case TYPE_U32: return reg.data.s32 < 0;
413 case TYPE_F32: return reg.data.u32 & (1 << 31);
414 case TYPE_F64: return reg.data.u64 & (1ULL << 63);
415 default:
416 return false;
417 }
418 }
419
420 bool
isPow2() const421 ImmediateValue::isPow2() const
422 {
423 if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
424 return util_is_power_of_two_or_zero64(reg.data.u64);
425 else
426 return util_is_power_of_two_or_zero(reg.data.u32);
427 }
428
429 void
applyLog2()430 ImmediateValue::applyLog2()
431 {
432 switch (reg.type) {
433 case TYPE_S8:
434 case TYPE_S16:
435 case TYPE_S32:
436 assert(!this->isNegative());
437 FALLTHROUGH;
438 case TYPE_U8:
439 case TYPE_U16:
440 case TYPE_U32:
441 reg.data.u32 = util_logbase2(reg.data.u32);
442 break;
443 case TYPE_S64:
444 assert(!this->isNegative());
445 FALLTHROUGH;
446 case TYPE_U64:
447 reg.data.u64 = util_logbase2_64(reg.data.u64);
448 break;
449 case TYPE_F32:
450 reg.data.f32 = log2f(reg.data.f32);
451 break;
452 case TYPE_F64:
453 reg.data.f64 = log2(reg.data.f64);
454 break;
455 default:
456 assert(0);
457 break;
458 }
459 }
460
461 bool
compare(CondCode cc,float fval) const462 ImmediateValue::compare(CondCode cc, float fval) const
463 {
464 if (reg.type != TYPE_F32)
465 ERROR("immediate value is not of type f32");
466
467 switch (static_cast<CondCode>(cc & 7)) {
468 case CC_TR: return true;
469 case CC_FL: return false;
470 case CC_LT: return reg.data.f32 < fval;
471 case CC_LE: return reg.data.f32 <= fval;
472 case CC_GT: return reg.data.f32 > fval;
473 case CC_GE: return reg.data.f32 >= fval;
474 case CC_EQ: return reg.data.f32 == fval;
475 case CC_NE: return reg.data.f32 != fval;
476 default:
477 assert(0);
478 return false;
479 }
480 }
481
482 ImmediateValue&
operator =(const ImmediateValue & that)483 ImmediateValue::operator=(const ImmediateValue &that)
484 {
485 this->reg = that.reg;
486 return (*this);
487 }
488
489 bool
interfers(const Value * that) const490 Value::interfers(const Value *that) const
491 {
492 uint32_t idA, idB;
493
494 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
495 return false;
496 if (this->asImm())
497 return false;
498
499 if (this->asSym()) {
500 idA = this->join->reg.data.offset;
501 idB = that->join->reg.data.offset;
502 } else {
503 idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
504 idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
505 }
506
507 if (idA < idB)
508 return (idA + this->reg.size > idB);
509 else
510 if (idA > idB)
511 return (idB + that->reg.size > idA);
512 else
513 return (idA == idB);
514 }
515
516 bool
equals(const Value * that,bool strict) const517 Value::equals(const Value *that, bool strict) const
518 {
519 if (strict)
520 return this == that;
521
522 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
523 return false;
524 if (that->reg.size != this->reg.size)
525 return false;
526
527 if (that->reg.data.id != this->reg.data.id)
528 return false;
529
530 return true;
531 }
532
533 bool
equals(const Value * that,bool strict) const534 ImmediateValue::equals(const Value *that, bool strict) const
535 {
536 const ImmediateValue *imm = that->asImm();
537 if (!imm)
538 return false;
539 return reg.data.u64 == imm->reg.data.u64;
540 }
541
542 bool
equals(const Value * that,bool strict) const543 Symbol::equals(const Value *that, bool strict) const
544 {
545 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
546 return false;
547 assert(that->asSym());
548
549 if (this->baseSym != that->asSym()->baseSym)
550 return false;
551
552 if (reg.file == FILE_SYSTEM_VALUE)
553 return (this->reg.data.sv.sv == that->reg.data.sv.sv &&
554 this->reg.data.sv.index == that->reg.data.sv.index);
555 return this->reg.data.offset == that->reg.data.offset;
556 }
557
init()558 void Instruction::init()
559 {
560 next = prev = 0;
561 serial = 0;
562
563 cc = CC_ALWAYS;
564 rnd = ROUND_N;
565 cache = CACHE_CA;
566 subOp = 0;
567
568 saturate = 0;
569 join = 0;
570 exit = 0;
571 terminator = 0;
572 ftz = 0;
573 dnz = 0;
574 perPatch = 0;
575 fixed = 0;
576 encSize = 0;
577 ipa = 0;
578 mask = 0;
579 precise = 0;
580
581 lanes = 0xf;
582
583 postFactor = 0;
584
585 predSrc = -1;
586 flagsDef = -1;
587 flagsSrc = -1;
588
589 sched = 0;
590 bb = NULL;
591 }
592
Instruction()593 Instruction::Instruction()
594 {
595 init();
596
597 op = OP_NOP;
598 dType = sType = TYPE_F32;
599
600 id = -1;
601 }
602
Instruction(Function * fn,operation opr,DataType ty)603 Instruction::Instruction(Function *fn, operation opr, DataType ty)
604 {
605 init();
606
607 op = opr;
608 dType = sType = ty;
609
610 fn->add(this, id);
611 }
612
~Instruction()613 Instruction::~Instruction()
614 {
615 if (bb) {
616 Function *fn = bb->getFunction();
617 bb->remove(this);
618 fn->allInsns.remove(id);
619 }
620
621 for (int s = 0; srcExists(s); ++s)
622 setSrc(s, NULL);
623 // must unlink defs too since the list pointers will get deallocated
624 for (int d = 0; defExists(d); ++d)
625 setDef(d, NULL);
626 }
627
628 void
setDef(int i,Value * val)629 Instruction::setDef(int i, Value *val)
630 {
631 int size = defs.size();
632 if (i >= size) {
633 defs.resize(i + 1);
634 while (size <= i)
635 defs[size++].setInsn(this);
636 }
637 defs[i].set(val);
638 }
639
640 void
setSrc(int s,Value * val)641 Instruction::setSrc(int s, Value *val)
642 {
643 int size = srcs.size();
644 if (s >= size) {
645 srcs.resize(s + 1);
646 while (size <= s)
647 srcs[size++].setInsn(this);
648 }
649 srcs[s].set(val);
650 }
651
652 void
setSrc(int s,const ValueRef & ref)653 Instruction::setSrc(int s, const ValueRef& ref)
654 {
655 setSrc(s, ref.get());
656 srcs[s].mod = ref.mod;
657 }
658
659 void
swapSources(int a,int b)660 Instruction::swapSources(int a, int b)
661 {
662 Value *value = srcs[a].get();
663 Modifier m = srcs[a].mod;
664
665 setSrc(a, srcs[b]);
666
667 srcs[b].set(value);
668 srcs[b].mod = m;
669 }
670
moveSourcesAdjustIndex(int8_t & index,int s,int delta)671 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
672 {
673 if (index >= s)
674 index += delta;
675 else
676 if ((delta < 0) && (index >= (s + delta)))
677 index = -1;
678 }
679
680 // Moves sources [@s,last_source] by @delta.
681 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
682 void
moveSources(const int s,const int delta)683 Instruction::moveSources(const int s, const int delta)
684 {
685 if (delta == 0)
686 return;
687 assert(s + delta >= 0);
688
689 int k;
690
691 for (k = 0; srcExists(k); ++k) {
692 for (int i = 0; i < 2; ++i)
693 moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
694 }
695 moveSourcesAdjustIndex(predSrc, s, delta);
696 moveSourcesAdjustIndex(flagsSrc, s, delta);
697 if (asTex()) {
698 TexInstruction *tex = asTex();
699 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
700 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
701 }
702
703 if (delta > 0) {
704 --k;
705 for (int p = k + delta; k >= s; --k, --p)
706 setSrc(p, src(k));
707 } else {
708 int p;
709 for (p = s; p < k; ++p)
710 setSrc(p + delta, src(p));
711 for (; (p + delta) < k; ++p)
712 setSrc(p + delta, NULL);
713 }
714 }
715
716 void
takeExtraSources(int s,Value * values[3])717 Instruction::takeExtraSources(int s, Value *values[3])
718 {
719 values[0] = getIndirect(s, 0);
720 if (values[0])
721 setIndirect(s, 0, NULL);
722
723 values[1] = getIndirect(s, 1);
724 if (values[1])
725 setIndirect(s, 1, NULL);
726
727 values[2] = getPredicate();
728 if (values[2])
729 setPredicate(cc, NULL);
730 }
731
732 void
putExtraSources(int s,Value * values[3])733 Instruction::putExtraSources(int s, Value *values[3])
734 {
735 if (values[0])
736 setIndirect(s, 0, values[0]);
737 if (values[1])
738 setIndirect(s, 1, values[1]);
739 if (values[2])
740 setPredicate(cc, values[2]);
741 }
742
743 Instruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const744 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
745 {
746 if (!i)
747 i = new_Instruction(pol.context(), op, dType);
748 #if !defined(NDEBUG) && defined(__cpp_rtti)
749 assert(typeid(*i) == typeid(*this));
750 #endif
751
752 pol.set<Instruction>(this, i);
753
754 i->sType = sType;
755
756 i->rnd = rnd;
757 i->cache = cache;
758 i->subOp = subOp;
759
760 i->saturate = saturate;
761 i->join = join;
762 i->exit = exit;
763 i->mask = mask;
764 i->ftz = ftz;
765 i->dnz = dnz;
766 i->ipa = ipa;
767 i->lanes = lanes;
768 i->perPatch = perPatch;
769
770 i->postFactor = postFactor;
771
772 for (int d = 0; defExists(d); ++d)
773 i->setDef(d, pol.get(getDef(d)));
774
775 for (int s = 0; srcExists(s); ++s) {
776 i->setSrc(s, pol.get(getSrc(s)));
777 i->src(s).mod = src(s).mod;
778 }
779
780 i->cc = cc;
781 i->predSrc = predSrc;
782 i->flagsDef = flagsDef;
783 i->flagsSrc = flagsSrc;
784
785 return i;
786 }
787
788 unsigned int
defCount(unsigned int mask,bool singleFile) const789 Instruction::defCount(unsigned int mask, bool singleFile) const
790 {
791 unsigned int i, n;
792
793 if (singleFile) {
794 unsigned int d = ffs(mask);
795 if (!d)
796 return 0;
797 for (i = d--; defExists(i); ++i)
798 if (getDef(i)->reg.file != getDef(d)->reg.file)
799 mask &= ~(1 << i);
800 }
801
802 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
803 n += mask & 1;
804 return n;
805 }
806
807 unsigned int
srcCount(unsigned int mask,bool singleFile) const808 Instruction::srcCount(unsigned int mask, bool singleFile) const
809 {
810 unsigned int i, n;
811
812 if (singleFile) {
813 unsigned int s = ffs(mask);
814 if (!s)
815 return 0;
816 for (i = s--; srcExists(i); ++i)
817 if (getSrc(i)->reg.file != getSrc(s)->reg.file)
818 mask &= ~(1 << i);
819 }
820
821 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
822 n += mask & 1;
823 return n;
824 }
825
826 bool
setIndirect(int s,int dim,Value * value)827 Instruction::setIndirect(int s, int dim, Value *value)
828 {
829 assert(this->srcExists(s));
830
831 int p = srcs[s].indirect[dim];
832 if (p < 0) {
833 if (!value)
834 return true;
835 p = srcs.size();
836 while (p > 0 && !srcExists(p - 1))
837 --p;
838 }
839 setSrc(p, value);
840 srcs[p].usedAsPtr = (value != 0);
841 srcs[s].indirect[dim] = value ? p : -1;
842 return true;
843 }
844
845 bool
setPredicate(CondCode ccode,Value * value)846 Instruction::setPredicate(CondCode ccode, Value *value)
847 {
848 cc = ccode;
849
850 if (!value) {
851 if (predSrc >= 0) {
852 srcs[predSrc].set(NULL);
853 predSrc = -1;
854 }
855 return true;
856 }
857
858 if (predSrc < 0) {
859 predSrc = srcs.size();
860 while (predSrc > 0 && !srcExists(predSrc - 1))
861 --predSrc;
862 }
863
864 setSrc(predSrc, value);
865 return true;
866 }
867
868 bool
writesPredicate() const869 Instruction::writesPredicate() const
870 {
871 for (int d = 0; defExists(d); ++d)
872 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
873 return true;
874 return false;
875 }
876
877 bool
canCommuteDefSrc(const Instruction * i) const878 Instruction::canCommuteDefSrc(const Instruction *i) const
879 {
880 for (int d = 0; defExists(d); ++d)
881 for (int s = 0; i->srcExists(s); ++s)
882 if (getDef(d)->interfers(i->getSrc(s)))
883 return false;
884 return true;
885 }
886
887 bool
canCommuteDefDef(const Instruction * i) const888 Instruction::canCommuteDefDef(const Instruction *i) const
889 {
890 for (int d = 0; defExists(d); ++d)
891 for (int c = 0; i->defExists(c); ++c)
892 if (getDef(d)->interfers(i->getDef(c)))
893 return false;
894 return true;
895 }
896
897 bool
isCommutationLegal(const Instruction * i) const898 Instruction::isCommutationLegal(const Instruction *i) const
899 {
900 return canCommuteDefDef(i) &&
901 canCommuteDefSrc(i) &&
902 i->canCommuteDefSrc(this);
903 }
904
TexInstruction(Function * fn,operation op)905 TexInstruction::TexInstruction(Function *fn, operation op)
906 : Instruction(fn, op, TYPE_F32), tex()
907 {
908 tex.rIndirectSrc = -1;
909 tex.sIndirectSrc = -1;
910
911 if (op == OP_TXF)
912 sType = TYPE_U32;
913 }
914
~TexInstruction()915 TexInstruction::~TexInstruction()
916 {
917 for (int c = 0; c < 3; ++c) {
918 dPdx[c].set(NULL);
919 dPdy[c].set(NULL);
920 }
921 for (int n = 0; n < 4; ++n)
922 for (int c = 0; c < 3; ++c)
923 offset[n][c].set(NULL);
924 }
925
926 TexInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const927 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
928 {
929 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
930 new_TexInstruction(pol.context(), op));
931
932 Instruction::clone(pol, tex);
933
934 tex->tex = this->tex;
935
936 if (op == OP_TXD) {
937 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
938 tex->dPdx[c].set(dPdx[c]);
939 tex->dPdy[c].set(dPdy[c]);
940 }
941 }
942
943 for (int n = 0; n < tex->tex.useOffsets; ++n)
944 for (int c = 0; c < 3; ++c)
945 tex->offset[n][c].set(offset[n][c]);
946
947 return tex;
948 }
949
950 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
951 {
952 { "1D", 1, 1, false, false, false },
953 { "2D", 2, 2, false, false, false },
954 { "2D_MS", 2, 3, false, false, false },
955 { "3D", 3, 3, false, false, false },
956 { "CUBE", 2, 3, false, true, false },
957 { "1D_SHADOW", 1, 1, false, false, true },
958 { "2D_SHADOW", 2, 2, false, false, true },
959 { "CUBE_SHADOW", 2, 3, false, true, true },
960 { "1D_ARRAY", 1, 2, true, false, false },
961 { "2D_ARRAY", 2, 3, true, false, false },
962 { "2D_MS_ARRAY", 2, 4, true, false, false },
963 { "CUBE_ARRAY", 2, 4, true, true, false },
964 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
965 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
966 { "RECT", 2, 2, false, false, false },
967 { "RECT_SHADOW", 2, 2, false, false, true },
968 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
969 { "BUFFER", 1, 1, false, false, false },
970 };
971
972 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
973 {
974 { "NONE", 0, { 0, 0, 0, 0 }, UINT },
975
976 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT },
977 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT },
978 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT },
979 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT },
980 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT },
981 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT },
982 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT },
983
984 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT },
985 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT },
986 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT },
987 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT },
988 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT },
989 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT },
990 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT },
991 { "R32UI", 1, { 32, 0, 0, 0 }, UINT },
992 { "R16UI", 1, { 16, 0, 0, 0 }, UINT },
993 { "R8UI", 1, { 8, 0, 0, 0 }, UINT },
994
995 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT },
996 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT },
997 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT },
998 { "RG32I", 2, { 32, 32, 0, 0 }, SINT },
999 { "RG16I", 2, { 16, 16, 0, 0 }, SINT },
1000 { "RG8I", 2, { 8, 8, 0, 0 }, SINT },
1001 { "R32I", 1, { 32, 0, 0, 0 }, SINT },
1002 { "R16I", 1, { 16, 0, 0, 0 }, SINT },
1003 { "R8I", 1, { 8, 0, 0, 0 }, SINT },
1004
1005 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM },
1006 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM },
1007 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM },
1008 { "RG16", 2, { 16, 16, 0, 0 }, UNORM },
1009 { "RG8", 2, { 8, 8, 0, 0 }, UNORM },
1010 { "R16", 1, { 16, 0, 0, 0 }, UNORM },
1011 { "R8", 1, { 8, 0, 0, 0 }, UNORM },
1012
1013 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1014 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM },
1015 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM },
1016 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM },
1017 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM },
1018 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM },
1019
1020 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true },
1021 };
1022
1023 const struct TexInstruction::ImgFormatDesc *
translateImgFormat(enum pipe_format format)1024 TexInstruction::translateImgFormat(enum pipe_format format)
1025 {
1026
1027 #define FMT_CASE(a, b) \
1028 case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1029
1030 switch (format) {
1031 FMT_CASE(NONE, NONE);
1032
1033 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1034 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1035 FMT_CASE(R32G32_FLOAT, RG32F);
1036 FMT_CASE(R16G16_FLOAT, RG16F);
1037 FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1038 FMT_CASE(R32_FLOAT, R32F);
1039 FMT_CASE(R16_FLOAT, R16F);
1040
1041 FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1042 FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1043 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1044 FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1045 FMT_CASE(R32G32_UINT, RG32UI);
1046 FMT_CASE(R16G16_UINT, RG16UI);
1047 FMT_CASE(R8G8_UINT, RG8UI);
1048 FMT_CASE(R32_UINT, R32UI);
1049 FMT_CASE(R16_UINT, R16UI);
1050 FMT_CASE(R8_UINT, R8UI);
1051
1052 FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1053 FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1054 FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1055 FMT_CASE(R32G32_SINT, RG32I);
1056 FMT_CASE(R16G16_SINT, RG16I);
1057 FMT_CASE(R8G8_SINT, RG8I);
1058 FMT_CASE(R32_SINT, R32I);
1059 FMT_CASE(R16_SINT, R16I);
1060 FMT_CASE(R8_SINT, R8I);
1061
1062 FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1063 FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1064 FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1065 FMT_CASE(R16G16_UNORM, RG16);
1066 FMT_CASE(R8G8_UNORM, RG8);
1067 FMT_CASE(R16_UNORM, R16);
1068 FMT_CASE(R8_UNORM, R8);
1069
1070 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1071 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1072 FMT_CASE(R16G16_SNORM, RG16_SNORM);
1073 FMT_CASE(R8G8_SNORM, RG8_SNORM);
1074 FMT_CASE(R16_SNORM, R16_SNORM);
1075 FMT_CASE(R8_SNORM, R8_SNORM);
1076
1077 FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1078
1079 default:
1080 assert(!"Unexpected format");
1081 return &formatTable[nv50_ir::FMT_NONE];
1082 }
1083 }
1084
1085 void
setIndirectR(Value * v)1086 TexInstruction::setIndirectR(Value *v)
1087 {
1088 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1089 if (p >= 0) {
1090 tex.rIndirectSrc = p;
1091 setSrc(p, v);
1092 srcs[p].usedAsPtr = !!v;
1093 }
1094 }
1095
1096 void
setIndirectS(Value * v)1097 TexInstruction::setIndirectS(Value *v)
1098 {
1099 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1100 if (p >= 0) {
1101 tex.sIndirectSrc = p;
1102 setSrc(p, v);
1103 srcs[p].usedAsPtr = !!v;
1104 }
1105 }
1106
CmpInstruction(Function * fn,operation op)1107 CmpInstruction::CmpInstruction(Function *fn, operation op)
1108 : Instruction(fn, op, TYPE_F32)
1109 {
1110 setCond = CC_ALWAYS;
1111 }
1112
1113 CmpInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1114 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1115 {
1116 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1117 new_CmpInstruction(pol.context(), op));
1118 cmp->dType = dType;
1119 Instruction::clone(pol, cmp);
1120 cmp->setCond = setCond;
1121 return cmp;
1122 }
1123
FlowInstruction(Function * fn,operation op,void * targ)1124 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1125 : Instruction(fn, op, TYPE_NONE)
1126 {
1127 if (op == OP_CALL)
1128 target.fn = reinterpret_cast<Function *>(targ);
1129 else
1130 target.bb = reinterpret_cast<BasicBlock *>(targ);
1131
1132 if (op == OP_BRA ||
1133 op == OP_CONT || op == OP_BREAK ||
1134 op == OP_RET || op == OP_EXIT)
1135 terminator = 1;
1136 else
1137 if (op == OP_JOIN)
1138 terminator = targ ? 1 : 0;
1139
1140 allWarp = absolute = limit = builtin = indirect = 0;
1141 }
1142
1143 FlowInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1144 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1145 {
1146 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1147 new_FlowInstruction(pol.context(), op, NULL));
1148
1149 Instruction::clone(pol, flow);
1150 flow->allWarp = allWarp;
1151 flow->absolute = absolute;
1152 flow->limit = limit;
1153 flow->builtin = builtin;
1154
1155 if (builtin)
1156 flow->target.builtin = target.builtin;
1157 else
1158 if (op == OP_CALL)
1159 flow->target.fn = target.fn;
1160 else
1161 if (target.bb)
1162 flow->target.bb = pol.get<BasicBlock>(target.bb);
1163
1164 return flow;
1165 }
1166
Program(Type type,Target * arch)1167 Program::Program(Type type, Target *arch)
1168 : progType(type),
1169 target(arch),
1170 tlsSize(0),
1171 mem_Instruction(sizeof(Instruction), 6),
1172 mem_CmpInstruction(sizeof(CmpInstruction), 4),
1173 mem_TexInstruction(sizeof(TexInstruction), 4),
1174 mem_FlowInstruction(sizeof(FlowInstruction), 4),
1175 mem_LValue(sizeof(LValue), 8),
1176 mem_Symbol(sizeof(Symbol), 7),
1177 mem_ImmediateValue(sizeof(ImmediateValue), 7),
1178 driver(NULL),
1179 driver_out(NULL)
1180 {
1181 code = NULL;
1182 binSize = 0;
1183
1184 maxGPR = -1;
1185 fp64 = false;
1186 persampleInvocation = false;
1187
1188 main = new Function(this, "MAIN", ~0);
1189 calls.insert(&main->call);
1190
1191 dbgFlags = 0;
1192 optLevel = 0;
1193
1194 targetPriv = NULL;
1195 }
1196
~Program()1197 Program::~Program()
1198 {
1199 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1200 delete reinterpret_cast<Function *>(it.get());
1201
1202 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1203 releaseValue(reinterpret_cast<Value *>(it.get()));
1204 }
1205
releaseInstruction(Instruction * insn)1206 void Program::releaseInstruction(Instruction *insn)
1207 {
1208 // TODO: make this not suck so much
1209
1210 insn->~Instruction();
1211
1212 if (insn->asCmp())
1213 mem_CmpInstruction.release(insn);
1214 else
1215 if (insn->asTex())
1216 mem_TexInstruction.release(insn);
1217 else
1218 if (insn->asFlow())
1219 mem_FlowInstruction.release(insn);
1220 else
1221 mem_Instruction.release(insn);
1222 }
1223
releaseValue(Value * value)1224 void Program::releaseValue(Value *value)
1225 {
1226 value->~Value();
1227
1228 if (value->asLValue())
1229 mem_LValue.release(value);
1230 else
1231 if (value->asImm())
1232 mem_ImmediateValue.release(value);
1233 else
1234 if (value->asSym())
1235 mem_Symbol.release(value);
1236 }
1237
1238
1239 } // namespace nv50_ir
1240
1241 extern "C" {
1242
1243 static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1244 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1245 struct nv50_ir_prog_info_out *info_out)
1246 {
1247 info_out->target = info->target;
1248 info_out->type = info->type;
1249 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1250 info_out->prop.tp.domain = PIPE_PRIM_MAX;
1251 info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1252 }
1253 if (info->type == PIPE_SHADER_GEOMETRY) {
1254 info_out->prop.gp.instanceCount = 1;
1255 info_out->prop.gp.maxVertices = 1;
1256 }
1257 if (info->type == PIPE_SHADER_COMPUTE) {
1258 info->prop.cp.numThreads[0] =
1259 info->prop.cp.numThreads[1] =
1260 info->prop.cp.numThreads[2] = 1;
1261 }
1262 info_out->bin.smemSize = info->bin.smemSize;
1263 info_out->io.genUserClip = info->io.genUserClip;
1264 info_out->io.instanceId = 0xff;
1265 info_out->io.vertexId = 0xff;
1266 info_out->io.edgeFlagIn = 0xff;
1267 info_out->io.edgeFlagOut = 0xff;
1268 info_out->io.fragDepth = 0xff;
1269 info_out->io.sampleMask = 0xff;
1270 }
1271
1272 int
nv50_ir_generate_code(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1273 nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1274 struct nv50_ir_prog_info_out *info_out)
1275 {
1276 int ret = 0;
1277
1278 nv50_ir::Program::Type type;
1279
1280 nv50_ir_init_prog_info(info, info_out);
1281
1282 #define PROG_TYPE_CASE(a, b) \
1283 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1284
1285 switch (info->type) {
1286 PROG_TYPE_CASE(VERTEX, VERTEX);
1287 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1288 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1289 PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1290 PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1291 PROG_TYPE_CASE(COMPUTE, COMPUTE);
1292 default:
1293 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1294 return -1;
1295 }
1296 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1297
1298 nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1299 if (!targ)
1300 return -1;
1301
1302 nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1303 if (!prog) {
1304 nv50_ir::Target::destroy(targ);
1305 return -1;
1306 }
1307 prog->driver = info;
1308 prog->driver_out = info_out;
1309 prog->dbgFlags = info->dbgFlags;
1310 prog->optLevel = info->optLevel;
1311
1312 switch (info->bin.sourceRep) {
1313 case PIPE_SHADER_IR_NIR:
1314 ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1315 break;
1316 case PIPE_SHADER_IR_TGSI:
1317 ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1318 break;
1319 default:
1320 ret = -1;
1321 break;
1322 }
1323 if (ret < 0)
1324 goto out;
1325 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1326 prog->print();
1327
1328 targ->parseDriverInfo(info, info_out);
1329 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1330
1331 prog->convertToSSA();
1332
1333 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1334 prog->print();
1335
1336 prog->optimizeSSA(info->optLevel);
1337 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1338
1339 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1340 prog->print();
1341
1342 if (!prog->registerAllocation()) {
1343 ret = -4;
1344 goto out;
1345 }
1346 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1347
1348 prog->optimizePostRA(info->optLevel);
1349
1350 if (!prog->emitBinary(info_out)) {
1351 ret = -5;
1352 goto out;
1353 }
1354
1355 out:
1356 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1357
1358 info_out->bin.maxGPR = prog->maxGPR;
1359 info_out->bin.code = prog->code;
1360 info_out->bin.codeSize = prog->binSize;
1361 info_out->bin.tlsSpace = ALIGN(prog->tlsSize, 0x10);
1362
1363 delete prog;
1364 nv50_ir::Target::destroy(targ);
1365
1366 return ret;
1367 }
1368
1369 } // extern "C"
1370