• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 #include "codegen/nv50_ir_driver.h"
26 
27 extern "C" {
28 #include "nouveau_debug.h"
29 #include "nv50/nv50_program.h"
30 }
31 
32 namespace nv50_ir {
33 
Modifier(operation op)34 Modifier::Modifier(operation op)
35 {
36    switch (op) {
37    case OP_NEG: bits = NV50_IR_MOD_NEG; break;
38    case OP_ABS: bits = NV50_IR_MOD_ABS; break;
39    case OP_SAT: bits = NV50_IR_MOD_SAT; break;
40    case OP_NOT: bits = NV50_IR_MOD_NOT; break;
41    default:
42       bits = 0;
43       break;
44    }
45 }
46 
operator *(const Modifier m) const47 Modifier Modifier::operator*(const Modifier m) const
48 {
49    unsigned int a, b, c;
50 
51    b = m.bits;
52    if (this->bits & NV50_IR_MOD_ABS)
53       b &= ~NV50_IR_MOD_NEG;
54 
55    a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
56    c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
57 
58    return Modifier(a | c);
59 }
60 
ValueRef(Value * v)61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
62 {
63    indirect[0] = -1;
64    indirect[1] = -1;
65    usedAsPtr = false;
66    set(v);
67 }
68 
ValueRef(const ValueRef & ref)69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
70 {
71    set(ref);
72    usedAsPtr = ref.usedAsPtr;
73 }
74 
~ValueRef()75 ValueRef::~ValueRef()
76 {
77    this->set(NULL);
78 }
79 
getImmediate(ImmediateValue & imm) const80 bool ValueRef::getImmediate(ImmediateValue &imm) const
81 {
82    const ValueRef *src = this;
83    Modifier m;
84    DataType type = src->insn->sType;
85 
86    while (src) {
87       if (src->mod) {
88          if (src->insn->sType != type)
89             break;
90          m *= src->mod;
91       }
92       if (src->getFile() == FILE_IMMEDIATE) {
93          imm = *(src->value->asImm());
94          // The immediate's type isn't required to match its use, it's
95          // more of a hint; applying a modifier makes use of that hint.
96          imm.reg.type = type;
97          m.applyTo(imm);
98          return true;
99       }
100 
101       Instruction *insn = src->value->getUniqueInsn();
102 
103       if (insn && insn->op == OP_MOV) {
104          src = &insn->src(0);
105          if (src->mod)
106             WARN("OP_MOV with modifier encountered !\n");
107       } else {
108          src = NULL;
109       }
110    }
111    return false;
112 }
113 
ValueDef(Value * v)114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
115 {
116    set(v);
117 }
118 
ValueDef(const ValueDef & def)119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
120 {
121    set(def.get());
122 }
123 
~ValueDef()124 ValueDef::~ValueDef()
125 {
126    this->set(NULL);
127 }
128 
129 void
set(const ValueRef & ref)130 ValueRef::set(const ValueRef &ref)
131 {
132    this->set(ref.get());
133    mod = ref.mod;
134    indirect[0] = ref.indirect[0];
135    indirect[1] = ref.indirect[1];
136 }
137 
138 void
set(Value * refVal)139 ValueRef::set(Value *refVal)
140 {
141    if (value == refVal)
142       return;
143    if (value)
144       value->uses.erase(this);
145    if (refVal)
146       refVal->uses.insert(this);
147 
148    value = refVal;
149 }
150 
151 void
set(Value * defVal)152 ValueDef::set(Value *defVal)
153 {
154    if (value == defVal)
155       return;
156    if (value)
157       value->defs.remove(this);
158    if (defVal)
159       defVal->defs.push_back(this);
160 
161    value = defVal;
162 }
163 
164 // Check if we can replace this definition's value by the value in @rep,
165 // including the source modifiers, i.e. make sure that all uses support
166 // @rep.mod.
167 bool
mayReplace(const ValueRef & rep)168 ValueDef::mayReplace(const ValueRef &rep)
169 {
170    if (!rep.mod)
171       return true;
172 
173    if (!insn || !insn->bb) // Unbound instruction ?
174       return false;
175 
176    const Target *target = insn->bb->getProgram()->getTarget();
177 
178    for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
179         ++it) {
180       Instruction *insn = (*it)->getInsn();
181       int s = -1;
182 
183       for (int i = 0; insn->srcExists(i); ++i) {
184          if (insn->src(i).get() == value) {
185             // If there are multiple references to us we'd have to check if the
186             // combination of mods is still supported, but just bail for now.
187             if (&insn->src(i) != (*it))
188                return false;
189             s = i;
190          }
191       }
192       assert(s >= 0); // integrity of uses list
193 
194       if (!target->isModSupported(insn, s, rep.mod))
195          return false;
196    }
197    return true;
198 }
199 
200 void
replace(const ValueRef & repVal,bool doSet)201 ValueDef::replace(const ValueRef &repVal, bool doSet)
202 {
203    assert(mayReplace(repVal));
204 
205    if (value == repVal.get())
206       return;
207 
208    while (!value->uses.empty()) {
209       ValueRef *ref = *value->uses.begin();
210       ref->set(repVal.get());
211       ref->mod *= repVal.mod;
212    }
213 
214    if (doSet)
215       set(repVal.get());
216 }
217 
Value()218 Value::Value()
219 {
220   join = this;
221   memset(&reg, 0, sizeof(reg));
222   reg.size = 4;
223 }
224 
LValue(Function * fn,DataFile file)225 LValue::LValue(Function *fn, DataFile file)
226 {
227    reg.file = file;
228    reg.size = (file != FILE_PREDICATE) ? 4 : 1;
229    reg.data.id = -1;
230 
231    compMask = 0;
232    compound = 0;
233    ssa = 0;
234    fixedReg = 0;
235    noSpill = 0;
236 
237    fn->add(this, this->id);
238 }
239 
LValue(Function * fn,LValue * lval)240 LValue::LValue(Function *fn, LValue *lval)
241 {
242    assert(lval);
243 
244    reg.file = lval->reg.file;
245    reg.size = lval->reg.size;
246    reg.data.id = -1;
247 
248    compMask = 0;
249    compound = 0;
250    ssa = 0;
251    fixedReg = 0;
252    noSpill = 0;
253 
254    fn->add(this, this->id);
255 }
256 
257 LValue *
clone(ClonePolicy<Function> & pol) const258 LValue::clone(ClonePolicy<Function>& pol) const
259 {
260    LValue *that = new_LValue(pol.context(), reg.file);
261 
262    pol.set<Value>(this, that);
263 
264    that->reg.size = this->reg.size;
265    that->reg.type = this->reg.type;
266    that->reg.data = this->reg.data;
267 
268    return that;
269 }
270 
271 bool
isUniform() const272 LValue::isUniform() const
273 {
274    if (defs.size() > 1)
275       return false;
276    Instruction *insn = getInsn();
277    // let's not try too hard here for now ...
278    return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
279 }
280 
Symbol(Program * prog,DataFile f,ubyte fidx)281 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
282 {
283    baseSym = NULL;
284 
285    reg.file = f;
286    reg.fileIndex = fidx;
287    reg.data.offset = 0;
288 
289    prog->add(this, this->id);
290 }
291 
292 Symbol *
clone(ClonePolicy<Function> & pol) const293 Symbol::clone(ClonePolicy<Function>& pol) const
294 {
295    Program *prog = pol.context()->getProgram();
296 
297    Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
298 
299    pol.set<Value>(this, that);
300 
301    that->reg.size = this->reg.size;
302    that->reg.type = this->reg.type;
303    that->reg.data = this->reg.data;
304 
305    that->baseSym = this->baseSym;
306 
307    return that;
308 }
309 
310 bool
isUniform() const311 Symbol::isUniform() const
312 {
313    return
314       reg.file != FILE_SYSTEM_VALUE &&
315       reg.file != FILE_MEMORY_LOCAL &&
316       reg.file != FILE_SHADER_INPUT;
317 }
318 
ImmediateValue(Program * prog,uint32_t uval)319 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
320 {
321    memset(&reg, 0, sizeof(reg));
322 
323    reg.file = FILE_IMMEDIATE;
324    reg.size = 4;
325    reg.type = TYPE_U32;
326 
327    reg.data.u32 = uval;
328 
329    prog->add(this, this->id);
330 }
331 
ImmediateValue(Program * prog,float fval)332 ImmediateValue::ImmediateValue(Program *prog, float fval)
333 {
334    memset(&reg, 0, sizeof(reg));
335 
336    reg.file = FILE_IMMEDIATE;
337    reg.size = 4;
338    reg.type = TYPE_F32;
339 
340    reg.data.f32 = fval;
341 
342    prog->add(this, this->id);
343 }
344 
ImmediateValue(Program * prog,double dval)345 ImmediateValue::ImmediateValue(Program *prog, double dval)
346 {
347    memset(&reg, 0, sizeof(reg));
348 
349    reg.file = FILE_IMMEDIATE;
350    reg.size = 8;
351    reg.type = TYPE_F64;
352 
353    reg.data.f64 = dval;
354 
355    prog->add(this, this->id);
356 }
357 
ImmediateValue(const ImmediateValue * proto,DataType ty)358 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
359 {
360    reg = proto->reg;
361 
362    reg.type = ty;
363    reg.size = typeSizeof(ty);
364 }
365 
366 ImmediateValue *
clone(ClonePolicy<Function> & pol) const367 ImmediateValue::clone(ClonePolicy<Function>& pol) const
368 {
369    Program *prog = pol.context()->getProgram();
370    ImmediateValue *that = new_ImmediateValue(prog, 0u);
371 
372    pol.set<Value>(this, that);
373 
374    that->reg.size = this->reg.size;
375    that->reg.type = this->reg.type;
376    that->reg.data = this->reg.data;
377 
378    return that;
379 }
380 
381 bool
isInteger(const int i) const382 ImmediateValue::isInteger(const int i) const
383 {
384    switch (reg.type) {
385    case TYPE_S8:
386       return reg.data.s8 == i;
387    case TYPE_U8:
388       return reg.data.u8 == i;
389    case TYPE_S16:
390       return reg.data.s16 == i;
391    case TYPE_U16:
392       return reg.data.u16 == i;
393    case TYPE_S32:
394    case TYPE_U32:
395       return reg.data.s32 == i; // as if ...
396    case TYPE_S64:
397    case TYPE_U64:
398       return reg.data.s64 == i; // as if ...
399    case TYPE_F32:
400       return reg.data.f32 == static_cast<float>(i);
401    case TYPE_F64:
402       return reg.data.f64 == static_cast<double>(i);
403    default:
404       return false;
405    }
406 }
407 
408 bool
isNegative() const409 ImmediateValue::isNegative() const
410 {
411    switch (reg.type) {
412    case TYPE_S8:  return reg.data.s8 < 0;
413    case TYPE_S16: return reg.data.s16 < 0;
414    case TYPE_S32:
415    case TYPE_U32: return reg.data.s32 < 0;
416    case TYPE_F32: return reg.data.u32 & (1 << 31);
417    case TYPE_F64: return reg.data.u64 & (1ULL << 63);
418    default:
419       return false;
420    }
421 }
422 
423 bool
isPow2() const424 ImmediateValue::isPow2() const
425 {
426    switch (reg.type) {
427    case TYPE_U8:
428    case TYPE_U16:
429    case TYPE_U32: return util_is_power_of_two(reg.data.u32);
430    default:
431       return false;
432    }
433 }
434 
435 void
applyLog2()436 ImmediateValue::applyLog2()
437 {
438    switch (reg.type) {
439    case TYPE_S8:
440    case TYPE_S16:
441    case TYPE_S32:
442       assert(!this->isNegative());
443       // fall through
444    case TYPE_U8:
445    case TYPE_U16:
446    case TYPE_U32:
447       reg.data.u32 = util_logbase2(reg.data.u32);
448       break;
449    case TYPE_F32:
450       reg.data.f32 = log2f(reg.data.f32);
451       break;
452    case TYPE_F64:
453       reg.data.f64 = log2(reg.data.f64);
454       break;
455    default:
456       assert(0);
457       break;
458    }
459 }
460 
461 bool
compare(CondCode cc,float fval) const462 ImmediateValue::compare(CondCode cc, float fval) const
463 {
464    if (reg.type != TYPE_F32)
465       ERROR("immediate value is not of type f32");
466 
467    switch (static_cast<CondCode>(cc & 7)) {
468    case CC_TR: return true;
469    case CC_FL: return false;
470    case CC_LT: return reg.data.f32 <  fval;
471    case CC_LE: return reg.data.f32 <= fval;
472    case CC_GT: return reg.data.f32 >  fval;
473    case CC_GE: return reg.data.f32 >= fval;
474    case CC_EQ: return reg.data.f32 == fval;
475    case CC_NE: return reg.data.f32 != fval;
476    default:
477       assert(0);
478       return false;
479    }
480 }
481 
482 ImmediateValue&
operator =(const ImmediateValue & that)483 ImmediateValue::operator=(const ImmediateValue &that)
484 {
485    this->reg = that.reg;
486    return (*this);
487 }
488 
489 bool
interfers(const Value * that) const490 Value::interfers(const Value *that) const
491 {
492    uint32_t idA, idB;
493 
494    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
495       return false;
496    if (this->asImm())
497       return false;
498 
499    if (this->asSym()) {
500       idA = this->join->reg.data.offset;
501       idB = that->join->reg.data.offset;
502    } else {
503       idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
504       idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
505    }
506 
507    if (idA < idB)
508       return (idA + this->reg.size > idB);
509    else
510    if (idA > idB)
511       return (idB + that->reg.size > idA);
512    else
513       return (idA == idB);
514 }
515 
516 bool
equals(const Value * that,bool strict) const517 Value::equals(const Value *that, bool strict) const
518 {
519    if (strict)
520       return this == that;
521 
522    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
523       return false;
524    if (that->reg.size != this->reg.size)
525       return false;
526 
527    if (that->reg.data.id != this->reg.data.id)
528       return false;
529 
530    return true;
531 }
532 
533 bool
equals(const Value * that,bool strict) const534 ImmediateValue::equals(const Value *that, bool strict) const
535 {
536    const ImmediateValue *imm = that->asImm();
537    if (!imm)
538       return false;
539    return reg.data.u64 == imm->reg.data.u64;
540 }
541 
542 bool
equals(const Value * that,bool strict) const543 Symbol::equals(const Value *that, bool strict) const
544 {
545    if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
546       return false;
547    assert(that->asSym());
548 
549    if (this->baseSym != that->asSym()->baseSym)
550       return false;
551 
552    if (reg.file == FILE_SYSTEM_VALUE)
553       return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
554               this->reg.data.sv.index == that->reg.data.sv.index);
555    return this->reg.data.offset == that->reg.data.offset;
556 }
557 
init()558 void Instruction::init()
559 {
560    next = prev = 0;
561 
562    cc = CC_ALWAYS;
563    rnd = ROUND_N;
564    cache = CACHE_CA;
565    subOp = 0;
566 
567    saturate = 0;
568    join = 0;
569    exit = 0;
570    terminator = 0;
571    ftz = 0;
572    dnz = 0;
573    perPatch = 0;
574    fixed = 0;
575    encSize = 0;
576    ipa = 0;
577    mask = 0;
578 
579    lanes = 0xf;
580 
581    postFactor = 0;
582 
583    predSrc = -1;
584    flagsDef = -1;
585    flagsSrc = -1;
586 }
587 
Instruction()588 Instruction::Instruction()
589 {
590    init();
591 
592    op = OP_NOP;
593    dType = sType = TYPE_F32;
594 
595    id = -1;
596    bb = 0;
597 }
598 
Instruction(Function * fn,operation opr,DataType ty)599 Instruction::Instruction(Function *fn, operation opr, DataType ty)
600 {
601    init();
602 
603    op = opr;
604    dType = sType = ty;
605 
606    fn->add(this, id);
607 }
608 
~Instruction()609 Instruction::~Instruction()
610 {
611    if (bb) {
612       Function *fn = bb->getFunction();
613       bb->remove(this);
614       fn->allInsns.remove(id);
615    }
616 
617    for (int s = 0; srcExists(s); ++s)
618       setSrc(s, NULL);
619    // must unlink defs too since the list pointers will get deallocated
620    for (int d = 0; defExists(d); ++d)
621       setDef(d, NULL);
622 }
623 
624 void
setDef(int i,Value * val)625 Instruction::setDef(int i, Value *val)
626 {
627    int size = defs.size();
628    if (i >= size) {
629       defs.resize(i + 1);
630       while (size <= i)
631          defs[size++].setInsn(this);
632    }
633    defs[i].set(val);
634 }
635 
636 void
setSrc(int s,Value * val)637 Instruction::setSrc(int s, Value *val)
638 {
639    int size = srcs.size();
640    if (s >= size) {
641       srcs.resize(s + 1);
642       while (size <= s)
643          srcs[size++].setInsn(this);
644    }
645    srcs[s].set(val);
646 }
647 
648 void
setSrc(int s,const ValueRef & ref)649 Instruction::setSrc(int s, const ValueRef& ref)
650 {
651    setSrc(s, ref.get());
652    srcs[s].mod = ref.mod;
653 }
654 
655 void
swapSources(int a,int b)656 Instruction::swapSources(int a, int b)
657 {
658    Value *value = srcs[a].get();
659    Modifier m = srcs[a].mod;
660 
661    setSrc(a, srcs[b]);
662 
663    srcs[b].set(value);
664    srcs[b].mod = m;
665 }
666 
moveSourcesAdjustIndex(int8_t & index,int s,int delta)667 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
668 {
669    if (index >= s)
670       index += delta;
671    else
672    if ((delta < 0) && (index >= (s + delta)))
673       index = -1;
674 }
675 
676 // Moves sources [@s,last_source] by @delta.
677 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
678 void
moveSources(const int s,const int delta)679 Instruction::moveSources(const int s, const int delta)
680 {
681    if (delta == 0)
682       return;
683    assert(s + delta >= 0);
684 
685    int k;
686 
687    for (k = 0; srcExists(k); ++k) {
688       for (int i = 0; i < 2; ++i)
689          moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
690    }
691    moveSourcesAdjustIndex(predSrc, s, delta);
692    moveSourcesAdjustIndex(flagsSrc, s, delta);
693    if (asTex()) {
694       TexInstruction *tex = asTex();
695       moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
696       moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
697    }
698 
699    if (delta > 0) {
700       --k;
701       for (int p = k + delta; k >= s; --k, --p)
702          setSrc(p, src(k));
703    } else {
704       int p;
705       for (p = s; p < k; ++p)
706          setSrc(p + delta, src(p));
707       for (; (p + delta) < k; ++p)
708          setSrc(p + delta, NULL);
709    }
710 }
711 
712 void
takeExtraSources(int s,Value * values[3])713 Instruction::takeExtraSources(int s, Value *values[3])
714 {
715    values[0] = getIndirect(s, 0);
716    if (values[0])
717       setIndirect(s, 0, NULL);
718 
719    values[1] = getIndirect(s, 1);
720    if (values[1])
721       setIndirect(s, 1, NULL);
722 
723    values[2] = getPredicate();
724    if (values[2])
725       setPredicate(cc, NULL);
726 }
727 
728 void
putExtraSources(int s,Value * values[3])729 Instruction::putExtraSources(int s, Value *values[3])
730 {
731    if (values[0])
732       setIndirect(s, 0, values[0]);
733    if (values[1])
734       setIndirect(s, 1, values[1]);
735    if (values[2])
736       setPredicate(cc, values[2]);
737 }
738 
739 Instruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const740 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
741 {
742    if (!i)
743       i = new_Instruction(pol.context(), op, dType);
744 #ifndef NDEBUG // non-conformant assert, so this is required
745    assert(typeid(*i) == typeid(*this));
746 #endif
747 
748    pol.set<Instruction>(this, i);
749 
750    i->sType = sType;
751 
752    i->rnd = rnd;
753    i->cache = cache;
754    i->subOp = subOp;
755 
756    i->saturate = saturate;
757    i->join = join;
758    i->exit = exit;
759    i->mask = mask;
760    i->ftz = ftz;
761    i->dnz = dnz;
762    i->ipa = ipa;
763    i->lanes = lanes;
764    i->perPatch = perPatch;
765 
766    i->postFactor = postFactor;
767 
768    for (int d = 0; defExists(d); ++d)
769       i->setDef(d, pol.get(getDef(d)));
770 
771    for (int s = 0; srcExists(s); ++s) {
772       i->setSrc(s, pol.get(getSrc(s)));
773       i->src(s).mod = src(s).mod;
774    }
775 
776    i->cc = cc;
777    i->predSrc = predSrc;
778    i->flagsDef = flagsDef;
779    i->flagsSrc = flagsSrc;
780 
781    return i;
782 }
783 
784 unsigned int
defCount(unsigned int mask,bool singleFile) const785 Instruction::defCount(unsigned int mask, bool singleFile) const
786 {
787    unsigned int i, n;
788 
789    if (singleFile) {
790       unsigned int d = ffs(mask);
791       if (!d)
792          return 0;
793       for (i = d--; defExists(i); ++i)
794          if (getDef(i)->reg.file != getDef(d)->reg.file)
795             mask &= ~(1 << i);
796    }
797 
798    for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
799       n += mask & 1;
800    return n;
801 }
802 
803 unsigned int
srcCount(unsigned int mask,bool singleFile) const804 Instruction::srcCount(unsigned int mask, bool singleFile) const
805 {
806    unsigned int i, n;
807 
808    if (singleFile) {
809       unsigned int s = ffs(mask);
810       if (!s)
811          return 0;
812       for (i = s--; srcExists(i); ++i)
813          if (getSrc(i)->reg.file != getSrc(s)->reg.file)
814             mask &= ~(1 << i);
815    }
816 
817    for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
818       n += mask & 1;
819    return n;
820 }
821 
822 bool
setIndirect(int s,int dim,Value * value)823 Instruction::setIndirect(int s, int dim, Value *value)
824 {
825    assert(this->srcExists(s));
826 
827    int p = srcs[s].indirect[dim];
828    if (p < 0) {
829       if (!value)
830          return true;
831       p = srcs.size();
832       while (p > 0 && !srcExists(p - 1))
833          --p;
834    }
835    setSrc(p, value);
836    srcs[p].usedAsPtr = (value != 0);
837    srcs[s].indirect[dim] = value ? p : -1;
838    return true;
839 }
840 
841 bool
setPredicate(CondCode ccode,Value * value)842 Instruction::setPredicate(CondCode ccode, Value *value)
843 {
844    cc = ccode;
845 
846    if (!value) {
847       if (predSrc >= 0) {
848          srcs[predSrc].set(NULL);
849          predSrc = -1;
850       }
851       return true;
852    }
853 
854    if (predSrc < 0) {
855       predSrc = srcs.size();
856       while (predSrc > 0 && !srcExists(predSrc - 1))
857          --predSrc;
858    }
859 
860    setSrc(predSrc, value);
861    return true;
862 }
863 
864 bool
writesPredicate() const865 Instruction::writesPredicate() const
866 {
867    for (int d = 0; defExists(d); ++d)
868       if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
869          return true;
870    return false;
871 }
872 
873 bool
canCommuteDefSrc(const Instruction * i) const874 Instruction::canCommuteDefSrc(const Instruction *i) const
875 {
876    for (int d = 0; defExists(d); ++d)
877       for (int s = 0; i->srcExists(s); ++s)
878          if (getDef(d)->interfers(i->getSrc(s)))
879             return false;
880    return true;
881 }
882 
883 bool
canCommuteDefDef(const Instruction * i) const884 Instruction::canCommuteDefDef(const Instruction *i) const
885 {
886    for (int d = 0; defExists(d); ++d)
887       for (int c = 0; i->defExists(c); ++c)
888          if (getDef(d)->interfers(i->getDef(c)))
889             return false;
890    return true;
891 }
892 
893 bool
isCommutationLegal(const Instruction * i) const894 Instruction::isCommutationLegal(const Instruction *i) const
895 {
896    return canCommuteDefDef(i) &&
897       canCommuteDefSrc(i) &&
898       i->canCommuteDefSrc(this);
899 }
900 
TexInstruction(Function * fn,operation op)901 TexInstruction::TexInstruction(Function *fn, operation op)
902    : Instruction(fn, op, TYPE_F32)
903 {
904    memset(&tex, 0, sizeof(tex));
905 
906    tex.rIndirectSrc = -1;
907    tex.sIndirectSrc = -1;
908 }
909 
~TexInstruction()910 TexInstruction::~TexInstruction()
911 {
912    for (int c = 0; c < 3; ++c) {
913       dPdx[c].set(NULL);
914       dPdy[c].set(NULL);
915    }
916    for (int n = 0; n < 4; ++n)
917       for (int c = 0; c < 3; ++c)
918          offset[n][c].set(NULL);
919 }
920 
921 TexInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const922 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
923 {
924    TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
925                           new_TexInstruction(pol.context(), op));
926 
927    Instruction::clone(pol, tex);
928 
929    tex->tex = this->tex;
930 
931    if (op == OP_TXD) {
932       for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
933          tex->dPdx[c].set(dPdx[c]);
934          tex->dPdy[c].set(dPdy[c]);
935       }
936    }
937 
938    for (int n = 0; n < tex->tex.useOffsets; ++n)
939       for (int c = 0; c < 3; ++c)
940          tex->offset[n][c].set(offset[n][c]);
941 
942    return tex;
943 }
944 
945 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
946 {
947    { "1D",                1, 1, false, false, false },
948    { "2D",                2, 2, false, false, false },
949    { "2D_MS",             2, 3, false, false, false },
950    { "3D",                3, 3, false, false, false },
951    { "CUBE",              2, 3, false, true,  false },
952    { "1D_SHADOW",         1, 1, false, false, true  },
953    { "2D_SHADOW",         2, 2, false, false, true  },
954    { "CUBE_SHADOW",       2, 3, false, true,  true  },
955    { "1D_ARRAY",          1, 2, true,  false, false },
956    { "2D_ARRAY",          2, 3, true,  false, false },
957    { "2D_MS_ARRAY",       2, 4, true,  false, false },
958    { "CUBE_ARRAY",        2, 4, true,  true,  false },
959    { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
960    { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
961    { "RECT",              2, 2, false, false, false },
962    { "RECT_SHADOW",       2, 2, false, false, true  },
963    { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
964    { "BUFFER",            1, 1, false, false, false },
965 };
966 
967 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
968 {
969    { "NONE",         0, {  0,  0,  0,  0 },  UINT },
970 
971    { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
972    { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
973    { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
974    { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
975    { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
976    { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
977    { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
978 
979    { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
980    { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
981    { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
982    { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
983    { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
984    { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
985    { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
986    { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
987    { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
988    { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
989 
990    { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
991    { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
992    { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
993    { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
994    { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
995    { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
996    { "R32I",         1, { 32,  0,  0,  0 },  SINT },
997    { "R16I",         1, { 16,  0,  0,  0 },  SINT },
998    { "R8I",          1, {  8,  0,  0,  0 },  SINT },
999 
1000    { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
1001    { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
1002    { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
1003    { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
1004    { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
1005    { "R16",          1, { 16,  0,  0,  0 }, UNORM },
1006    { "R8",           1, {  8,  0,  0,  0 }, UNORM },
1007 
1008    { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1009    { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
1010    { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
1011    { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
1012    { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
1013    { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
1014 
1015    { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
1016 };
1017 
1018 void
setIndirectR(Value * v)1019 TexInstruction::setIndirectR(Value *v)
1020 {
1021    int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1022    if (p >= 0) {
1023       tex.rIndirectSrc = p;
1024       setSrc(p, v);
1025       srcs[p].usedAsPtr = !!v;
1026    }
1027 }
1028 
1029 void
setIndirectS(Value * v)1030 TexInstruction::setIndirectS(Value *v)
1031 {
1032    int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1033    if (p >= 0) {
1034       tex.sIndirectSrc = p;
1035       setSrc(p, v);
1036       srcs[p].usedAsPtr = !!v;
1037    }
1038 }
1039 
CmpInstruction(Function * fn,operation op)1040 CmpInstruction::CmpInstruction(Function *fn, operation op)
1041    : Instruction(fn, op, TYPE_F32)
1042 {
1043    setCond = CC_ALWAYS;
1044 }
1045 
1046 CmpInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1047 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1048 {
1049    CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1050                           new_CmpInstruction(pol.context(), op));
1051    cmp->dType = dType;
1052    Instruction::clone(pol, cmp);
1053    cmp->setCond = setCond;
1054    return cmp;
1055 }
1056 
FlowInstruction(Function * fn,operation op,void * targ)1057 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1058    : Instruction(fn, op, TYPE_NONE)
1059 {
1060    if (op == OP_CALL)
1061       target.fn = reinterpret_cast<Function *>(targ);
1062    else
1063       target.bb = reinterpret_cast<BasicBlock *>(targ);
1064 
1065    if (op == OP_BRA ||
1066        op == OP_CONT || op == OP_BREAK ||
1067        op == OP_RET || op == OP_EXIT)
1068       terminator = 1;
1069    else
1070    if (op == OP_JOIN)
1071       terminator = targ ? 1 : 0;
1072 
1073    allWarp = absolute = limit = builtin = indirect = 0;
1074 }
1075 
1076 FlowInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1077 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1078 {
1079    FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1080                             new_FlowInstruction(pol.context(), op, NULL));
1081 
1082    Instruction::clone(pol, flow);
1083    flow->allWarp = allWarp;
1084    flow->absolute = absolute;
1085    flow->limit = limit;
1086    flow->builtin = builtin;
1087 
1088    if (builtin)
1089       flow->target.builtin = target.builtin;
1090    else
1091    if (op == OP_CALL)
1092       flow->target.fn = target.fn;
1093    else
1094    if (target.bb)
1095       flow->target.bb = pol.get<BasicBlock>(target.bb);
1096 
1097    return flow;
1098 }
1099 
Program(Type type,Target * arch)1100 Program::Program(Type type, Target *arch)
1101    : progType(type),
1102      target(arch),
1103      mem_Instruction(sizeof(Instruction), 6),
1104      mem_CmpInstruction(sizeof(CmpInstruction), 4),
1105      mem_TexInstruction(sizeof(TexInstruction), 4),
1106      mem_FlowInstruction(sizeof(FlowInstruction), 4),
1107      mem_LValue(sizeof(LValue), 8),
1108      mem_Symbol(sizeof(Symbol), 7),
1109      mem_ImmediateValue(sizeof(ImmediateValue), 7)
1110 {
1111    code = NULL;
1112    binSize = 0;
1113 
1114    maxGPR = -1;
1115 
1116    main = new Function(this, "MAIN", ~0);
1117    calls.insert(&main->call);
1118 
1119    dbgFlags = 0;
1120    optLevel = 0;
1121 
1122    targetPriv = NULL;
1123 }
1124 
~Program()1125 Program::~Program()
1126 {
1127    for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1128       delete reinterpret_cast<Function *>(it.get());
1129 
1130    for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1131       releaseValue(reinterpret_cast<Value *>(it.get()));
1132 }
1133 
releaseInstruction(Instruction * insn)1134 void Program::releaseInstruction(Instruction *insn)
1135 {
1136    // TODO: make this not suck so much
1137 
1138    insn->~Instruction();
1139 
1140    if (insn->asCmp())
1141       mem_CmpInstruction.release(insn);
1142    else
1143    if (insn->asTex())
1144       mem_TexInstruction.release(insn);
1145    else
1146    if (insn->asFlow())
1147       mem_FlowInstruction.release(insn);
1148    else
1149       mem_Instruction.release(insn);
1150 }
1151 
releaseValue(Value * value)1152 void Program::releaseValue(Value *value)
1153 {
1154    value->~Value();
1155 
1156    if (value->asLValue())
1157       mem_LValue.release(value);
1158    else
1159    if (value->asImm())
1160       mem_ImmediateValue.release(value);
1161    else
1162    if (value->asSym())
1163       mem_Symbol.release(value);
1164 }
1165 
1166 
1167 } // namespace nv50_ir
1168 
1169 extern "C" {
1170 
1171 static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info * info)1172 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
1173 {
1174    if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1175       info->prop.tp.domain = PIPE_PRIM_MAX;
1176       info->prop.tp.outputPrim = PIPE_PRIM_MAX;
1177    }
1178    if (info->type == PIPE_SHADER_GEOMETRY) {
1179       info->prop.gp.instanceCount = 1;
1180       info->prop.gp.maxVertices = 1;
1181    }
1182    info->prop.cp.numThreads = 1;
1183    info->io.pointSize = 0xff;
1184    info->io.instanceId = 0xff;
1185    info->io.vertexId = 0xff;
1186    info->io.edgeFlagIn = 0xff;
1187    info->io.edgeFlagOut = 0xff;
1188    info->io.fragDepth = 0xff;
1189    info->io.sampleMask = 0xff;
1190    info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
1191 }
1192 
1193 int
nv50_ir_generate_code(struct nv50_ir_prog_info * info)1194 nv50_ir_generate_code(struct nv50_ir_prog_info *info)
1195 {
1196    int ret = 0;
1197 
1198    nv50_ir::Program::Type type;
1199 
1200    nv50_ir_init_prog_info(info);
1201 
1202 #define PROG_TYPE_CASE(a, b)                                      \
1203    case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1204 
1205    switch (info->type) {
1206    PROG_TYPE_CASE(VERTEX, VERTEX);
1207    PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1208    PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1209    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1210    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1211    PROG_TYPE_CASE(COMPUTE, COMPUTE);
1212    default:
1213       type = nv50_ir::Program::TYPE_COMPUTE;
1214       break;
1215    }
1216    INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1217 
1218    nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1219    if (!targ)
1220       return -1;
1221 
1222    nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1223    if (!prog)
1224       return -1;
1225    prog->driver = info;
1226    prog->dbgFlags = info->dbgFlags;
1227    prog->optLevel = info->optLevel;
1228 
1229    switch (info->bin.sourceRep) {
1230 #if 0
1231    case PIPE_IR_LLVM:
1232    case PIPE_IR_GLSL:
1233       return -1;
1234    case PIPE_IR_SM4:
1235       ret = prog->makeFromSM4(info) ? 0 : -2;
1236       break;
1237    case PIPE_IR_TGSI:
1238 #endif
1239    default:
1240       ret = prog->makeFromTGSI(info) ? 0 : -2;
1241       break;
1242    }
1243    if (ret < 0)
1244       goto out;
1245    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1246       prog->print();
1247 
1248    targ->parseDriverInfo(info);
1249    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1250 
1251    prog->convertToSSA();
1252 
1253    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1254       prog->print();
1255 
1256    prog->optimizeSSA(info->optLevel);
1257    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1258 
1259    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1260       prog->print();
1261 
1262    if (!prog->registerAllocation()) {
1263       ret = -4;
1264       goto out;
1265    }
1266    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1267 
1268    prog->optimizePostRA(info->optLevel);
1269 
1270    if (!prog->emitBinary(info)) {
1271       ret = -5;
1272       goto out;
1273    }
1274 
1275 out:
1276    INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1277 
1278    info->bin.maxGPR = prog->maxGPR;
1279    info->bin.code = prog->code;
1280    info->bin.codeSize = prog->binSize;
1281    info->bin.tlsSpace = prog->tlsSize;
1282 
1283    delete prog;
1284    nv50_ir::Target::destroy(targ);
1285 
1286    return ret;
1287 }
1288 
1289 } // extern "C"
1290