• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_driver.h"
26 
27 namespace nv50_ir {
28 
Modifier(operation op)29 Modifier::Modifier(operation op)
30 {
31    switch (op) {
32    case OP_NEG: bits = NV50_IR_MOD_NEG; break;
33    case OP_ABS: bits = NV50_IR_MOD_ABS; break;
34    case OP_SAT: bits = NV50_IR_MOD_SAT; break;
35    case OP_NOT: bits = NV50_IR_MOD_NOT; break;
36    default:
37       bits = 0;
38       break;
39    }
40 }
41 
operator *(const Modifier m) const42 Modifier Modifier::operator*(const Modifier m) const
43 {
44    unsigned int a, b, c;
45 
46    b = m.bits;
47    if (this->bits & NV50_IR_MOD_ABS)
48       b &= ~NV50_IR_MOD_NEG;
49 
50    a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
51    c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
52 
53    return Modifier(a | c);
54 }
55 
ValueRef(Value * v)56 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
57 {
58    indirect[0] = -1;
59    indirect[1] = -1;
60    usedAsPtr = false;
61    set(v);
62 }
63 
ValueRef(const ValueRef & ref)64 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
65 {
66    set(ref);
67    usedAsPtr = ref.usedAsPtr;
68 }
69 
~ValueRef()70 ValueRef::~ValueRef()
71 {
72    this->set(NULL);
73 }
74 
getImmediate(ImmediateValue & imm) const75 bool ValueRef::getImmediate(ImmediateValue &imm) const
76 {
77    const ValueRef *src = this;
78    Modifier m;
79    DataType type = src->insn->sType;
80 
81    while (src) {
82       if (src->mod) {
83          if (src->insn->sType != type)
84             break;
85          m *= src->mod;
86       }
87       if (src->getFile() == FILE_IMMEDIATE) {
88          imm = *(src->value->asImm());
89          // The immediate's type isn't required to match its use, it's
90          // more of a hint; applying a modifier makes use of that hint.
91          imm.reg.type = type;
92          m.applyTo(imm);
93          return true;
94       }
95 
96       Instruction *insn = src->value->getUniqueInsn();
97 
98       if (insn && insn->op == OP_MOV) {
99          src = &insn->src(0);
100          if (src->mod)
101             WARN("OP_MOV with modifier encountered !\n");
102       } else {
103          src = NULL;
104       }
105    }
106    return false;
107 }
108 
ValueDef(Value * v)109 ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL)
110 {
111    set(v);
112 }
113 
ValueDef(const ValueDef & def)114 ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL)
115 {
116    set(def.get());
117 }
118 
~ValueDef()119 ValueDef::~ValueDef()
120 {
121    this->set(NULL);
122 }
123 
124 void
set(const ValueRef & ref)125 ValueRef::set(const ValueRef &ref)
126 {
127    this->set(ref.get());
128    mod = ref.mod;
129    indirect[0] = ref.indirect[0];
130    indirect[1] = ref.indirect[1];
131 }
132 
133 void
set(Value * refVal)134 ValueRef::set(Value *refVal)
135 {
136    if (value == refVal)
137       return;
138    if (value)
139       value->uses.erase(this);
140    if (refVal)
141       refVal->uses.insert(this);
142 
143    value = refVal;
144 }
145 
146 void
set(Value * defVal)147 ValueDef::set(Value *defVal)
148 {
149    if (value == defVal)
150       return;
151    if (value)
152       value->defs.remove(this);
153    if (defVal)
154       defVal->defs.push_back(this);
155 
156    value = defVal;
157 }
158 
159 // Check if we can replace this definition's value by the value in @rep,
160 // including the source modifiers, i.e. make sure that all uses support
161 // @rep.mod.
162 bool
mayReplace(const ValueRef & rep)163 ValueDef::mayReplace(const ValueRef &rep)
164 {
165    if (!rep.mod)
166       return true;
167 
168    if (!insn || !insn->bb) // Unbound instruction ?
169       return false;
170 
171    const Target *target = insn->bb->getProgram()->getTarget();
172 
173    for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
174         ++it) {
175       Instruction *insn = (*it)->getInsn();
176       int s = -1;
177 
178       for (int i = 0; insn->srcExists(i); ++i) {
179          if (insn->src(i).get() == value) {
180             // If there are multiple references to us we'd have to check if the
181             // combination of mods is still supported, but just bail for now.
182             if (&insn->src(i) != (*it))
183                return false;
184             s = i;
185          }
186       }
187       assert(s >= 0); // integrity of uses list
188 
189       if (!target->isModSupported(insn, s, rep.mod))
190          return false;
191    }
192    return true;
193 }
194 
195 void
replace(const ValueRef & repVal,bool doSet)196 ValueDef::replace(const ValueRef &repVal, bool doSet)
197 {
198    assert(mayReplace(repVal));
199 
200    if (value == repVal.get())
201       return;
202 
203    while (!value->uses.empty()) {
204       ValueRef *ref = *value->uses.begin();
205       ref->set(repVal.get());
206       ref->mod *= repVal.mod;
207    }
208 
209    if (doSet)
210       set(repVal.get());
211 }
212 
Value()213 Value::Value() : id(-1)
214 {
215   join = this;
216   memset(&reg, 0, sizeof(reg));
217   reg.size = 4;
218 }
219 
LValue(Function * fn,DataFile file)220 LValue::LValue(Function *fn, DataFile file)
221 {
222    reg.file = file;
223    reg.size = (file != FILE_PREDICATE) ? 4 : 1;
224    reg.data.id = -1;
225 
226    compMask = 0;
227    compound = 0;
228    ssa = 0;
229    fixedReg = 0;
230    noSpill = 0;
231 
232    fn->add(this, this->id);
233 }
234 
LValue(Function * fn,LValue * lval)235 LValue::LValue(Function *fn, LValue *lval)
236 {
237    assert(lval);
238 
239    reg.file = lval->reg.file;
240    reg.size = lval->reg.size;
241    reg.data.id = -1;
242 
243    compMask = 0;
244    compound = 0;
245    ssa = 0;
246    fixedReg = 0;
247    noSpill = 0;
248 
249    fn->add(this, this->id);
250 }
251 
252 LValue *
clone(ClonePolicy<Function> & pol) const253 LValue::clone(ClonePolicy<Function>& pol) const
254 {
255    LValue *that = new_LValue(pol.context(), reg.file);
256 
257    pol.set<Value>(this, that);
258 
259    that->reg.size = this->reg.size;
260    that->reg.type = this->reg.type;
261    that->reg.data = this->reg.data;
262 
263    return that;
264 }
265 
266 bool
isUniform() const267 LValue::isUniform() const
268 {
269    if (defs.size() > 1)
270       return false;
271    Instruction *insn = getInsn();
272    if (!insn)
273       return false;
274    // let's not try too hard here for now ...
275    return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
276 }
277 
Symbol(Program * prog,DataFile f,uint8_t fidx)278 Symbol::Symbol(Program *prog, DataFile f, uint8_t fidx)
279 {
280    baseSym = NULL;
281 
282    reg.file = f;
283    reg.fileIndex = fidx;
284    reg.data.offset = 0;
285 
286    prog->add(this, this->id);
287 }
288 
289 Symbol *
clone(ClonePolicy<Function> & pol) const290 Symbol::clone(ClonePolicy<Function>& pol) const
291 {
292    Program *prog = pol.context()->getProgram();
293 
294    Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
295 
296    pol.set<Value>(this, that);
297 
298    that->reg.size = this->reg.size;
299    that->reg.type = this->reg.type;
300    that->reg.data = this->reg.data;
301 
302    that->baseSym = this->baseSym;
303 
304    return that;
305 }
306 
307 bool
isUniform() const308 Symbol::isUniform() const
309 {
310    return
311       reg.file != FILE_SYSTEM_VALUE &&
312       reg.file != FILE_MEMORY_LOCAL &&
313       reg.file != FILE_SHADER_INPUT;
314 }
315 
ImmediateValue(Program * prog,uint32_t uval)316 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
317 {
318    memset(&reg, 0, sizeof(reg));
319 
320    reg.file = FILE_IMMEDIATE;
321    reg.size = 4;
322    reg.type = TYPE_U32;
323 
324    reg.data.u32 = uval;
325 
326    prog->add(this, this->id);
327 }
328 
ImmediateValue(Program * prog,float fval)329 ImmediateValue::ImmediateValue(Program *prog, float fval)
330 {
331    memset(&reg, 0, sizeof(reg));
332 
333    reg.file = FILE_IMMEDIATE;
334    reg.size = 4;
335    reg.type = TYPE_F32;
336 
337    reg.data.f32 = fval;
338 
339    prog->add(this, this->id);
340 }
341 
ImmediateValue(Program * prog,double dval)342 ImmediateValue::ImmediateValue(Program *prog, double dval)
343 {
344    memset(&reg, 0, sizeof(reg));
345 
346    reg.file = FILE_IMMEDIATE;
347    reg.size = 8;
348    reg.type = TYPE_F64;
349 
350    reg.data.f64 = dval;
351 
352    prog->add(this, this->id);
353 }
354 
ImmediateValue(const ImmediateValue * proto,DataType ty)355 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
356 {
357    reg = proto->reg;
358 
359    reg.type = ty;
360    reg.size = typeSizeof(ty);
361 }
362 
363 ImmediateValue *
clone(ClonePolicy<Function> & pol) const364 ImmediateValue::clone(ClonePolicy<Function>& pol) const
365 {
366    Program *prog = pol.context()->getProgram();
367    ImmediateValue *that = new_ImmediateValue(prog, 0u);
368 
369    pol.set<Value>(this, that);
370 
371    that->reg.size = this->reg.size;
372    that->reg.type = this->reg.type;
373    that->reg.data = this->reg.data;
374 
375    return that;
376 }
377 
378 bool
isInteger(const int i) const379 ImmediateValue::isInteger(const int i) const
380 {
381    switch (reg.type) {
382    case TYPE_S8:
383       return reg.data.s8 == i;
384    case TYPE_U8:
385       return reg.data.u8 == i;
386    case TYPE_S16:
387       return reg.data.s16 == i;
388    case TYPE_U16:
389       return reg.data.u16 == i;
390    case TYPE_S32:
391    case TYPE_U32:
392       return reg.data.s32 == i; // as if ...
393    case TYPE_S64:
394    case TYPE_U64:
395       return reg.data.s64 == i; // as if ...
396    case TYPE_F32:
397       return reg.data.f32 == static_cast<float>(i);
398    case TYPE_F64:
399       return reg.data.f64 == static_cast<double>(i);
400    default:
401       return false;
402    }
403 }
404 
405 bool
isNegative() const406 ImmediateValue::isNegative() const
407 {
408    switch (reg.type) {
409    case TYPE_S8:  return reg.data.s8 < 0;
410    case TYPE_S16: return reg.data.s16 < 0;
411    case TYPE_S32:
412    case TYPE_U32: return reg.data.s32 < 0;
413    case TYPE_F32: return reg.data.u32 & (1 << 31);
414    case TYPE_F64: return reg.data.u64 & (1ULL << 63);
415    default:
416       return false;
417    }
418 }
419 
420 bool
isPow2() const421 ImmediateValue::isPow2() const
422 {
423    if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
424       return util_is_power_of_two_or_zero64(reg.data.u64);
425    else
426       return util_is_power_of_two_or_zero(reg.data.u32);
427 }
428 
429 void
applyLog2()430 ImmediateValue::applyLog2()
431 {
432    switch (reg.type) {
433    case TYPE_S8:
434    case TYPE_S16:
435    case TYPE_S32:
436       assert(!this->isNegative());
437       FALLTHROUGH;
438    case TYPE_U8:
439    case TYPE_U16:
440    case TYPE_U32:
441       reg.data.u32 = util_logbase2(reg.data.u32);
442       break;
443    case TYPE_S64:
444       assert(!this->isNegative());
445       FALLTHROUGH;
446    case TYPE_U64:
447       reg.data.u64 = util_logbase2_64(reg.data.u64);
448       break;
449    case TYPE_F32:
450       reg.data.f32 = log2f(reg.data.f32);
451       break;
452    case TYPE_F64:
453       reg.data.f64 = log2(reg.data.f64);
454       break;
455    default:
456       assert(0);
457       break;
458    }
459 }
460 
461 bool
compare(CondCode cc,float fval) const462 ImmediateValue::compare(CondCode cc, float fval) const
463 {
464    if (reg.type != TYPE_F32)
465       ERROR("immediate value is not of type f32");
466 
467    switch (static_cast<CondCode>(cc & 7)) {
468    case CC_TR: return true;
469    case CC_FL: return false;
470    case CC_LT: return reg.data.f32 <  fval;
471    case CC_LE: return reg.data.f32 <= fval;
472    case CC_GT: return reg.data.f32 >  fval;
473    case CC_GE: return reg.data.f32 >= fval;
474    case CC_EQ: return reg.data.f32 == fval;
475    case CC_NE: return reg.data.f32 != fval;
476    default:
477       assert(0);
478       return false;
479    }
480 }
481 
482 ImmediateValue&
operator =(const ImmediateValue & that)483 ImmediateValue::operator=(const ImmediateValue &that)
484 {
485    this->reg = that.reg;
486    return (*this);
487 }
488 
489 bool
interfers(const Value * that) const490 Value::interfers(const Value *that) const
491 {
492    uint32_t idA, idB;
493 
494    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
495       return false;
496    if (this->asImm())
497       return false;
498 
499    if (this->asSym()) {
500       idA = this->join->reg.data.offset;
501       idB = that->join->reg.data.offset;
502    } else {
503       idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
504       idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
505    }
506 
507    if (idA < idB)
508       return (idA + this->reg.size > idB);
509    else
510    if (idA > idB)
511       return (idB + that->reg.size > idA);
512    else
513       return (idA == idB);
514 }
515 
516 bool
equals(const Value * that,bool strict) const517 Value::equals(const Value *that, bool strict) const
518 {
519    if (strict)
520       return this == that;
521 
522    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
523       return false;
524    if (that->reg.size != this->reg.size)
525       return false;
526 
527    if (that->reg.data.id != this->reg.data.id)
528       return false;
529 
530    return true;
531 }
532 
533 bool
equals(const Value * that,bool strict) const534 ImmediateValue::equals(const Value *that, bool strict) const
535 {
536    const ImmediateValue *imm = that->asImm();
537    if (!imm)
538       return false;
539    return reg.data.u64 == imm->reg.data.u64;
540 }
541 
542 bool
equals(const Value * that,bool strict) const543 Symbol::equals(const Value *that, bool strict) const
544 {
545    if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
546       return false;
547    assert(that->asSym());
548 
549    if (this->baseSym != that->asSym()->baseSym)
550       return false;
551 
552    if (reg.file == FILE_SYSTEM_VALUE)
553       return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
554               this->reg.data.sv.index == that->reg.data.sv.index);
555    return this->reg.data.offset == that->reg.data.offset;
556 }
557 
init()558 void Instruction::init()
559 {
560    next = prev = 0;
561    serial = 0;
562 
563    cc = CC_ALWAYS;
564    rnd = ROUND_N;
565    cache = CACHE_CA;
566    subOp = 0;
567 
568    saturate = 0;
569    join = 0;
570    exit = 0;
571    terminator = 0;
572    ftz = 0;
573    dnz = 0;
574    perPatch = 0;
575    fixed = 0;
576    encSize = 0;
577    ipa = 0;
578    mask = 0;
579    precise = 0;
580 
581    lanes = 0xf;
582 
583    postFactor = 0;
584 
585    predSrc = -1;
586    flagsDef = -1;
587    flagsSrc = -1;
588 
589    sched = 0;
590    bb = NULL;
591 }
592 
Instruction()593 Instruction::Instruction()
594 {
595    init();
596 
597    op = OP_NOP;
598    dType = sType = TYPE_F32;
599 
600    id = -1;
601 }
602 
Instruction(Function * fn,operation opr,DataType ty)603 Instruction::Instruction(Function *fn, operation opr, DataType ty)
604 {
605    init();
606 
607    op = opr;
608    dType = sType = ty;
609 
610    fn->add(this, id);
611 }
612 
~Instruction()613 Instruction::~Instruction()
614 {
615    if (bb) {
616       Function *fn = bb->getFunction();
617       bb->remove(this);
618       fn->allInsns.remove(id);
619    }
620 
621    for (int s = 0; srcExists(s); ++s)
622       setSrc(s, NULL);
623    // must unlink defs too since the list pointers will get deallocated
624    for (int d = 0; defExists(d); ++d)
625       setDef(d, NULL);
626 }
627 
628 void
setDef(int i,Value * val)629 Instruction::setDef(int i, Value *val)
630 {
631    int size = defs.size();
632    if (i >= size) {
633       defs.resize(i + 1);
634       while (size <= i)
635          defs[size++].setInsn(this);
636    }
637    defs[i].set(val);
638 }
639 
640 void
setSrc(int s,Value * val)641 Instruction::setSrc(int s, Value *val)
642 {
643    int size = srcs.size();
644    if (s >= size) {
645       srcs.resize(s + 1);
646       while (size <= s)
647          srcs[size++].setInsn(this);
648    }
649    srcs[s].set(val);
650 }
651 
652 void
setSrc(int s,const ValueRef & ref)653 Instruction::setSrc(int s, const ValueRef& ref)
654 {
655    setSrc(s, ref.get());
656    srcs[s].mod = ref.mod;
657 }
658 
659 void
swapSources(int a,int b)660 Instruction::swapSources(int a, int b)
661 {
662    Value *value = srcs[a].get();
663    Modifier m = srcs[a].mod;
664 
665    setSrc(a, srcs[b]);
666 
667    srcs[b].set(value);
668    srcs[b].mod = m;
669 }
670 
moveSourcesAdjustIndex(int8_t & index,int s,int delta)671 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
672 {
673    if (index >= s)
674       index += delta;
675    else
676    if ((delta < 0) && (index >= (s + delta)))
677       index = -1;
678 }
679 
680 // Moves sources [@s,last_source] by @delta.
681 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
682 void
moveSources(const int s,const int delta)683 Instruction::moveSources(const int s, const int delta)
684 {
685    if (delta == 0)
686       return;
687    assert(s + delta >= 0);
688 
689    int k;
690 
691    for (k = 0; srcExists(k); ++k) {
692       for (int i = 0; i < 2; ++i)
693          moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
694    }
695    moveSourcesAdjustIndex(predSrc, s, delta);
696    moveSourcesAdjustIndex(flagsSrc, s, delta);
697    if (asTex()) {
698       TexInstruction *tex = asTex();
699       moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
700       moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
701    }
702 
703    if (delta > 0) {
704       --k;
705       for (int p = k + delta; k >= s; --k, --p)
706          setSrc(p, src(k));
707    } else {
708       int p;
709       for (p = s; p < k; ++p)
710          setSrc(p + delta, src(p));
711       for (; (p + delta) < k; ++p)
712          setSrc(p + delta, NULL);
713    }
714 }
715 
716 void
takeExtraSources(int s,Value * values[3])717 Instruction::takeExtraSources(int s, Value *values[3])
718 {
719    values[0] = getIndirect(s, 0);
720    if (values[0])
721       setIndirect(s, 0, NULL);
722 
723    values[1] = getIndirect(s, 1);
724    if (values[1])
725       setIndirect(s, 1, NULL);
726 
727    values[2] = getPredicate();
728    if (values[2])
729       setPredicate(cc, NULL);
730 }
731 
732 void
putExtraSources(int s,Value * values[3])733 Instruction::putExtraSources(int s, Value *values[3])
734 {
735    if (values[0])
736       setIndirect(s, 0, values[0]);
737    if (values[1])
738       setIndirect(s, 1, values[1]);
739    if (values[2])
740       setPredicate(cc, values[2]);
741 }
742 
743 Instruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const744 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
745 {
746    if (!i)
747       i = new_Instruction(pol.context(), op, dType);
748 #if !defined(NDEBUG) && defined(__cpp_rtti)
749    assert(typeid(*i) == typeid(*this));
750 #endif
751 
752    pol.set<Instruction>(this, i);
753 
754    i->sType = sType;
755 
756    i->rnd = rnd;
757    i->cache = cache;
758    i->subOp = subOp;
759 
760    i->saturate = saturate;
761    i->join = join;
762    i->exit = exit;
763    i->mask = mask;
764    i->ftz = ftz;
765    i->dnz = dnz;
766    i->ipa = ipa;
767    i->lanes = lanes;
768    i->perPatch = perPatch;
769 
770    i->postFactor = postFactor;
771 
772    for (int d = 0; defExists(d); ++d)
773       i->setDef(d, pol.get(getDef(d)));
774 
775    for (int s = 0; srcExists(s); ++s) {
776       i->setSrc(s, pol.get(getSrc(s)));
777       i->src(s).mod = src(s).mod;
778    }
779 
780    i->cc = cc;
781    i->predSrc = predSrc;
782    i->flagsDef = flagsDef;
783    i->flagsSrc = flagsSrc;
784 
785    return i;
786 }
787 
788 unsigned int
defCount(unsigned int mask,bool singleFile) const789 Instruction::defCount(unsigned int mask, bool singleFile) const
790 {
791    unsigned int i, n;
792 
793    if (singleFile) {
794       unsigned int d = ffs(mask);
795       if (!d)
796          return 0;
797       for (i = d--; defExists(i); ++i)
798          if (getDef(i)->reg.file != getDef(d)->reg.file)
799             mask &= ~(1 << i);
800    }
801 
802    for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
803       n += mask & 1;
804    return n;
805 }
806 
807 unsigned int
srcCount(unsigned int mask,bool singleFile) const808 Instruction::srcCount(unsigned int mask, bool singleFile) const
809 {
810    unsigned int i, n;
811 
812    if (singleFile) {
813       unsigned int s = ffs(mask);
814       if (!s)
815          return 0;
816       for (i = s--; srcExists(i); ++i)
817          if (getSrc(i)->reg.file != getSrc(s)->reg.file)
818             mask &= ~(1 << i);
819    }
820 
821    for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
822       n += mask & 1;
823    return n;
824 }
825 
826 bool
setIndirect(int s,int dim,Value * value)827 Instruction::setIndirect(int s, int dim, Value *value)
828 {
829    assert(this->srcExists(s));
830 
831    int p = srcs[s].indirect[dim];
832    if (p < 0) {
833       if (!value)
834          return true;
835       p = srcs.size();
836       while (p > 0 && !srcExists(p - 1))
837          --p;
838    }
839    setSrc(p, value);
840    srcs[p].usedAsPtr = (value != 0);
841    srcs[s].indirect[dim] = value ? p : -1;
842    return true;
843 }
844 
845 bool
setPredicate(CondCode ccode,Value * value)846 Instruction::setPredicate(CondCode ccode, Value *value)
847 {
848    cc = ccode;
849 
850    if (!value) {
851       if (predSrc >= 0) {
852          srcs[predSrc].set(NULL);
853          predSrc = -1;
854       }
855       return true;
856    }
857 
858    if (predSrc < 0) {
859       predSrc = srcs.size();
860       while (predSrc > 0 && !srcExists(predSrc - 1))
861          --predSrc;
862    }
863 
864    setSrc(predSrc, value);
865    return true;
866 }
867 
868 bool
writesPredicate() const869 Instruction::writesPredicate() const
870 {
871    for (int d = 0; defExists(d); ++d)
872       if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
873          return true;
874    return false;
875 }
876 
877 bool
canCommuteDefSrc(const Instruction * i) const878 Instruction::canCommuteDefSrc(const Instruction *i) const
879 {
880    for (int d = 0; defExists(d); ++d)
881       for (int s = 0; i->srcExists(s); ++s)
882          if (getDef(d)->interfers(i->getSrc(s)))
883             return false;
884    return true;
885 }
886 
887 bool
canCommuteDefDef(const Instruction * i) const888 Instruction::canCommuteDefDef(const Instruction *i) const
889 {
890    for (int d = 0; defExists(d); ++d)
891       for (int c = 0; i->defExists(c); ++c)
892          if (getDef(d)->interfers(i->getDef(c)))
893             return false;
894    return true;
895 }
896 
897 bool
isCommutationLegal(const Instruction * i) const898 Instruction::isCommutationLegal(const Instruction *i) const
899 {
900    return canCommuteDefDef(i) &&
901       canCommuteDefSrc(i) &&
902       i->canCommuteDefSrc(this);
903 }
904 
TexInstruction(Function * fn,operation op)905 TexInstruction::TexInstruction(Function *fn, operation op)
906    : Instruction(fn, op, TYPE_F32), tex()
907 {
908    tex.rIndirectSrc = -1;
909    tex.sIndirectSrc = -1;
910 
911    if (op == OP_TXF)
912       sType = TYPE_U32;
913 }
914 
~TexInstruction()915 TexInstruction::~TexInstruction()
916 {
917    for (int c = 0; c < 3; ++c) {
918       dPdx[c].set(NULL);
919       dPdy[c].set(NULL);
920    }
921    for (int n = 0; n < 4; ++n)
922       for (int c = 0; c < 3; ++c)
923          offset[n][c].set(NULL);
924 }
925 
926 TexInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const927 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
928 {
929    TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
930                           new_TexInstruction(pol.context(), op));
931 
932    Instruction::clone(pol, tex);
933 
934    tex->tex = this->tex;
935 
936    if (op == OP_TXD) {
937       for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
938          tex->dPdx[c].set(dPdx[c]);
939          tex->dPdy[c].set(dPdy[c]);
940       }
941    }
942 
943    for (int n = 0; n < tex->tex.useOffsets; ++n)
944       for (int c = 0; c < 3; ++c)
945          tex->offset[n][c].set(offset[n][c]);
946 
947    return tex;
948 }
949 
950 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
951 {
952    { "1D",                1, 1, false, false, false },
953    { "2D",                2, 2, false, false, false },
954    { "2D_MS",             2, 3, false, false, false },
955    { "3D",                3, 3, false, false, false },
956    { "CUBE",              2, 3, false, true,  false },
957    { "1D_SHADOW",         1, 1, false, false, true  },
958    { "2D_SHADOW",         2, 2, false, false, true  },
959    { "CUBE_SHADOW",       2, 3, false, true,  true  },
960    { "1D_ARRAY",          1, 2, true,  false, false },
961    { "2D_ARRAY",          2, 3, true,  false, false },
962    { "2D_MS_ARRAY",       2, 4, true,  false, false },
963    { "CUBE_ARRAY",        2, 4, true,  true,  false },
964    { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
965    { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
966    { "RECT",              2, 2, false, false, false },
967    { "RECT_SHADOW",       2, 2, false, false, true  },
968    { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
969    { "BUFFER",            1, 1, false, false, false },
970 };
971 
972 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
973 {
974    { "NONE",         0, {  0,  0,  0,  0 },  UINT },
975 
976    { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
977    { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
978    { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
979    { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
980    { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
981    { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
982    { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
983 
984    { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
985    { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
986    { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
987    { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
988    { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
989    { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
990    { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
991    { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
992    { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
993    { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
994 
995    { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
996    { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
997    { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
998    { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
999    { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
1000    { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
1001    { "R32I",         1, { 32,  0,  0,  0 },  SINT },
1002    { "R16I",         1, { 16,  0,  0,  0 },  SINT },
1003    { "R8I",          1, {  8,  0,  0,  0 },  SINT },
1004 
1005    { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
1006    { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
1007    { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
1008    { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
1009    { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
1010    { "R16",          1, { 16,  0,  0,  0 }, UNORM },
1011    { "R8",           1, {  8,  0,  0,  0 }, UNORM },
1012 
1013    { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1014    { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
1015    { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
1016    { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
1017    { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
1018    { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
1019 
1020    { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
1021 };
1022 
1023 const struct TexInstruction::ImgFormatDesc *
translateImgFormat(enum pipe_format format)1024 TexInstruction::translateImgFormat(enum pipe_format format)
1025 {
1026 
1027 #define FMT_CASE(a, b) \
1028   case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1029 
1030    switch (format) {
1031    FMT_CASE(NONE, NONE);
1032 
1033    FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1034    FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1035    FMT_CASE(R32G32_FLOAT, RG32F);
1036    FMT_CASE(R16G16_FLOAT, RG16F);
1037    FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1038    FMT_CASE(R32_FLOAT, R32F);
1039    FMT_CASE(R16_FLOAT, R16F);
1040 
1041    FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1042    FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1043    FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1044    FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1045    FMT_CASE(R32G32_UINT, RG32UI);
1046    FMT_CASE(R16G16_UINT, RG16UI);
1047    FMT_CASE(R8G8_UINT, RG8UI);
1048    FMT_CASE(R32_UINT, R32UI);
1049    FMT_CASE(R16_UINT, R16UI);
1050    FMT_CASE(R8_UINT, R8UI);
1051 
1052    FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1053    FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1054    FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1055    FMT_CASE(R32G32_SINT, RG32I);
1056    FMT_CASE(R16G16_SINT, RG16I);
1057    FMT_CASE(R8G8_SINT, RG8I);
1058    FMT_CASE(R32_SINT, R32I);
1059    FMT_CASE(R16_SINT, R16I);
1060    FMT_CASE(R8_SINT, R8I);
1061 
1062    FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1063    FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1064    FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1065    FMT_CASE(R16G16_UNORM, RG16);
1066    FMT_CASE(R8G8_UNORM, RG8);
1067    FMT_CASE(R16_UNORM, R16);
1068    FMT_CASE(R8_UNORM, R8);
1069 
1070    FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1071    FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1072    FMT_CASE(R16G16_SNORM, RG16_SNORM);
1073    FMT_CASE(R8G8_SNORM, RG8_SNORM);
1074    FMT_CASE(R16_SNORM, R16_SNORM);
1075    FMT_CASE(R8_SNORM, R8_SNORM);
1076 
1077    FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1078 
1079    default:
1080       assert(!"Unexpected format");
1081       return &formatTable[nv50_ir::FMT_NONE];
1082    }
1083 }
1084 
1085 void
setIndirectR(Value * v)1086 TexInstruction::setIndirectR(Value *v)
1087 {
1088    int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1089    if (p >= 0) {
1090       tex.rIndirectSrc = p;
1091       setSrc(p, v);
1092       srcs[p].usedAsPtr = !!v;
1093    }
1094 }
1095 
1096 void
setIndirectS(Value * v)1097 TexInstruction::setIndirectS(Value *v)
1098 {
1099    int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1100    if (p >= 0) {
1101       tex.sIndirectSrc = p;
1102       setSrc(p, v);
1103       srcs[p].usedAsPtr = !!v;
1104    }
1105 }
1106 
CmpInstruction(Function * fn,operation op)1107 CmpInstruction::CmpInstruction(Function *fn, operation op)
1108    : Instruction(fn, op, TYPE_F32)
1109 {
1110    setCond = CC_ALWAYS;
1111 }
1112 
1113 CmpInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1114 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1115 {
1116    CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1117                           new_CmpInstruction(pol.context(), op));
1118    cmp->dType = dType;
1119    Instruction::clone(pol, cmp);
1120    cmp->setCond = setCond;
1121    return cmp;
1122 }
1123 
FlowInstruction(Function * fn,operation op,void * targ)1124 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1125    : Instruction(fn, op, TYPE_NONE)
1126 {
1127    if (op == OP_CALL)
1128       target.fn = reinterpret_cast<Function *>(targ);
1129    else
1130       target.bb = reinterpret_cast<BasicBlock *>(targ);
1131 
1132    if (op == OP_BRA ||
1133        op == OP_CONT || op == OP_BREAK ||
1134        op == OP_RET || op == OP_EXIT)
1135       terminator = 1;
1136    else
1137    if (op == OP_JOIN)
1138       terminator = targ ? 1 : 0;
1139 
1140    allWarp = absolute = limit = builtin = indirect = 0;
1141 }
1142 
1143 FlowInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1144 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1145 {
1146    FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1147                             new_FlowInstruction(pol.context(), op, NULL));
1148 
1149    Instruction::clone(pol, flow);
1150    flow->allWarp = allWarp;
1151    flow->absolute = absolute;
1152    flow->limit = limit;
1153    flow->builtin = builtin;
1154 
1155    if (builtin)
1156       flow->target.builtin = target.builtin;
1157    else
1158    if (op == OP_CALL)
1159       flow->target.fn = target.fn;
1160    else
1161    if (target.bb)
1162       flow->target.bb = pol.get<BasicBlock>(target.bb);
1163 
1164    return flow;
1165 }
1166 
Program(Type type,Target * arch)1167 Program::Program(Type type, Target *arch)
1168    : progType(type),
1169      target(arch),
1170      tlsSize(0),
1171      mem_Instruction(sizeof(Instruction), 6),
1172      mem_CmpInstruction(sizeof(CmpInstruction), 4),
1173      mem_TexInstruction(sizeof(TexInstruction), 4),
1174      mem_FlowInstruction(sizeof(FlowInstruction), 4),
1175      mem_LValue(sizeof(LValue), 8),
1176      mem_Symbol(sizeof(Symbol), 7),
1177      mem_ImmediateValue(sizeof(ImmediateValue), 7),
1178      driver(NULL),
1179      driver_out(NULL)
1180 {
1181    code = NULL;
1182    binSize = 0;
1183 
1184    maxGPR = -1;
1185    fp64 = false;
1186    persampleInvocation = false;
1187 
1188    main = new Function(this, "MAIN", ~0);
1189    calls.insert(&main->call);
1190 
1191    dbgFlags = 0;
1192    optLevel = 0;
1193 
1194    targetPriv = NULL;
1195 }
1196 
~Program()1197 Program::~Program()
1198 {
1199    for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1200       delete reinterpret_cast<Function *>(it.get());
1201 
1202    for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1203       releaseValue(reinterpret_cast<Value *>(it.get()));
1204 }
1205 
releaseInstruction(Instruction * insn)1206 void Program::releaseInstruction(Instruction *insn)
1207 {
1208    // TODO: make this not suck so much
1209 
1210    insn->~Instruction();
1211 
1212    if (insn->asCmp())
1213       mem_CmpInstruction.release(insn);
1214    else
1215    if (insn->asTex())
1216       mem_TexInstruction.release(insn);
1217    else
1218    if (insn->asFlow())
1219       mem_FlowInstruction.release(insn);
1220    else
1221       mem_Instruction.release(insn);
1222 }
1223 
releaseValue(Value * value)1224 void Program::releaseValue(Value *value)
1225 {
1226    value->~Value();
1227 
1228    if (value->asLValue())
1229       mem_LValue.release(value);
1230    else
1231    if (value->asImm())
1232       mem_ImmediateValue.release(value);
1233    else
1234    if (value->asSym())
1235       mem_Symbol.release(value);
1236 }
1237 
1238 
1239 } // namespace nv50_ir
1240 
1241 extern "C" {
1242 
1243 static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1244 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1245                        struct nv50_ir_prog_info_out *info_out)
1246 {
1247    info_out->target = info->target;
1248    info_out->type = info->type;
1249    if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1250       info_out->prop.tp.domain = PIPE_PRIM_MAX;
1251       info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1252    }
1253    if (info->type == PIPE_SHADER_GEOMETRY) {
1254       info_out->prop.gp.instanceCount = 1;
1255       info_out->prop.gp.maxVertices = 1;
1256    }
1257    if (info->type == PIPE_SHADER_COMPUTE) {
1258       info->prop.cp.numThreads[0] =
1259       info->prop.cp.numThreads[1] =
1260       info->prop.cp.numThreads[2] = 1;
1261    }
1262    info_out->bin.smemSize = info->bin.smemSize;
1263    info_out->io.genUserClip = info->io.genUserClip;
1264    info_out->io.instanceId = 0xff;
1265    info_out->io.vertexId = 0xff;
1266    info_out->io.edgeFlagIn = 0xff;
1267    info_out->io.edgeFlagOut = 0xff;
1268    info_out->io.fragDepth = 0xff;
1269    info_out->io.sampleMask = 0xff;
1270 }
1271 
1272 int
nv50_ir_generate_code(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1273 nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1274                       struct nv50_ir_prog_info_out *info_out)
1275 {
1276    int ret = 0;
1277 
1278    nv50_ir::Program::Type type;
1279 
1280    nv50_ir_init_prog_info(info, info_out);
1281 
1282 #define PROG_TYPE_CASE(a, b)                                      \
1283    case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1284 
1285    switch (info->type) {
1286    PROG_TYPE_CASE(VERTEX, VERTEX);
1287    PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1288    PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1289    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1290    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1291    PROG_TYPE_CASE(COMPUTE, COMPUTE);
1292    default:
1293       INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1294       return -1;
1295    }
1296    INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1297 
1298    nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1299    if (!targ)
1300       return -1;
1301 
1302    nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1303    if (!prog) {
1304       nv50_ir::Target::destroy(targ);
1305       return -1;
1306    }
1307    prog->driver = info;
1308    prog->driver_out = info_out;
1309    prog->dbgFlags = info->dbgFlags;
1310    prog->optLevel = info->optLevel;
1311 
1312    switch (info->bin.sourceRep) {
1313    case PIPE_SHADER_IR_NIR:
1314       ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1315       break;
1316    case PIPE_SHADER_IR_TGSI:
1317       ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1318       break;
1319    default:
1320       ret = -1;
1321       break;
1322    }
1323    if (ret < 0)
1324       goto out;
1325    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1326       prog->print();
1327 
1328    targ->parseDriverInfo(info, info_out);
1329    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1330 
1331    prog->convertToSSA();
1332 
1333    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1334       prog->print();
1335 
1336    prog->optimizeSSA(info->optLevel);
1337    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1338 
1339    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1340       prog->print();
1341 
1342    if (!prog->registerAllocation()) {
1343       ret = -4;
1344       goto out;
1345    }
1346    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1347 
1348    prog->optimizePostRA(info->optLevel);
1349 
1350    if (!prog->emitBinary(info_out)) {
1351       ret = -5;
1352       goto out;
1353    }
1354 
1355 out:
1356    INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1357 
1358    info_out->bin.maxGPR = prog->maxGPR;
1359    info_out->bin.code = prog->code;
1360    info_out->bin.codeSize = prog->binSize;
1361    info_out->bin.tlsSpace = ALIGN(prog->tlsSize, 0x10);
1362 
1363    delete prog;
1364    nv50_ir::Target::destroy(targ);
1365 
1366    return ret;
1367 }
1368 
1369 } // extern "C"
1370