• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_build_util.h"
25 
26 #include "nv50_ir_target_nvc0.h"
27 
28 #include <limits>
29 
30 namespace nv50_ir {
31 
32 #define QOP_ADD  0
33 #define QOP_SUBR 1
34 #define QOP_SUB  2
35 #define QOP_MOV2 3
36 
37 //             UL UR LL LR
38 #define QUADOP(q, r, s, t)                      \
39    ((QOP_##q << 6) | (QOP_##r << 4) |           \
40     (QOP_##s << 2) | (QOP_##t << 0))
41 
42 class NVC0LegalizeSSA : public Pass
43 {
44 private:
45    virtual bool visit(BasicBlock *);
46    virtual bool visit(Function *);
47 
48    // we want to insert calls to the builtin library only after optimization
49    void handleDIV(Instruction *); // integer division, modulus
50    void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
51 
52 private:
53    BuildUtil bld;
54 };
55 
56 void
handleDIV(Instruction * i)57 NVC0LegalizeSSA::handleDIV(Instruction *i)
58 {
59    FlowInstruction *call;
60    int builtin;
61    Value *def[2];
62 
63    bld.setPosition(i, false);
64    def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
65    def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
66    switch (i->dType) {
67    case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
68    case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
69    default:
70       return;
71    }
72    call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
73    bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
74    bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
75    bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
76 
77    call->fixed = 1;
78    call->absolute = call->builtin = 1;
79    call->target.builtin = builtin;
80    delete_Instruction(prog, i);
81 }
82 
83 void
handleRCPRSQ(Instruction * i)84 NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
85 {
86    // TODO
87 }
88 
89 bool
visit(Function * fn)90 NVC0LegalizeSSA::visit(Function *fn)
91 {
92    bld.setProgram(fn->getProgram());
93    return true;
94 }
95 
96 bool
visit(BasicBlock * bb)97 NVC0LegalizeSSA::visit(BasicBlock *bb)
98 {
99    Instruction *next;
100    for (Instruction *i = bb->getEntry(); i; i = next) {
101       next = i->next;
102       if (i->dType == TYPE_F32)
103          continue;
104       switch (i->op) {
105       case OP_DIV:
106       case OP_MOD:
107          handleDIV(i);
108          break;
109       case OP_RCP:
110       case OP_RSQ:
111          if (i->dType == TYPE_F64)
112             handleRCPRSQ(i);
113          break;
114       default:
115          break;
116       }
117    }
118    return true;
119 }
120 
121 class NVC0LegalizePostRA : public Pass
122 {
123 public:
124    NVC0LegalizePostRA(const Program *);
125 
126 private:
127    virtual bool visit(Function *);
128    virtual bool visit(BasicBlock *);
129 
130    void replaceZero(Instruction *);
131    void split64BitOp(Instruction *);
132    bool tryReplaceContWithBra(BasicBlock *);
133    void propagateJoin(BasicBlock *);
134 
135    struct TexUse
136    {
TexUsenv50_ir::NVC0LegalizePostRA::TexUse137       TexUse(Instruction *use, const Instruction *tex)
138          : insn(use), tex(tex), level(-1) { }
139       Instruction *insn;
140       const Instruction *tex; // or split / mov
141       int level;
142    };
143    struct Limits
144    {
Limitsnv50_ir::NVC0LegalizePostRA::Limits145       Limits() { }
Limitsnv50_ir::NVC0LegalizePostRA::Limits146       Limits(int min, int max) : min(min), max(max) { }
147       int min, max;
148    };
149    bool insertTextureBarriers(Function *);
150    inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
151    void findFirstUses(const Instruction *tex, const Instruction *def,
152                       std::list<TexUse>&);
153    void findOverwritingDefs(const Instruction *tex, Instruction *insn,
154                             const BasicBlock *term,
155                             std::list<TexUse>&);
156    void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
157    const Instruction *recurseDef(const Instruction *);
158 
159 private:
160    LValue *r63;
161    const bool needTexBar;
162 };
163 
NVC0LegalizePostRA(const Program * prog)164 NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
165    : needTexBar(prog->getTarget()->getChipset() >= 0xe0)
166 {
167 }
168 
169 bool
insnDominatedBy(const Instruction * later,const Instruction * early) const170 NVC0LegalizePostRA::insnDominatedBy(const Instruction *later,
171                                     const Instruction *early) const
172 {
173    if (early->bb == later->bb)
174       return early->serial < later->serial;
175    return later->bb->dominatedBy(early->bb);
176 }
177 
178 void
addTexUse(std::list<TexUse> & uses,Instruction * usei,const Instruction * insn)179 NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
180                               Instruction *usei, const Instruction *insn)
181 {
182    bool add = true;
183    for (std::list<TexUse>::iterator it = uses.begin();
184         it != uses.end();) {
185       if (insnDominatedBy(usei, it->insn)) {
186          add = false;
187          break;
188       }
189       if (insnDominatedBy(it->insn, usei))
190          it = uses.erase(it);
191       else
192          ++it;
193    }
194    if (add)
195       uses.push_back(TexUse(usei, insn));
196 }
197 
198 void
findOverwritingDefs(const Instruction * texi,Instruction * insn,const BasicBlock * term,std::list<TexUse> & uses)199 NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
200                                         Instruction *insn,
201                                         const BasicBlock *term,
202                                         std::list<TexUse> &uses)
203 {
204    while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
205       insn = insn->getSrc(0)->getUniqueInsn();
206 
207    if (!insn || !insn->bb->reachableBy(texi->bb, term))
208       return;
209 
210    switch (insn->op) {
211    /* Values not connected to the tex's definition through any of these should
212     * not be conflicting.
213     */
214    case OP_SPLIT:
215    case OP_MERGE:
216    case OP_PHI:
217    case OP_UNION:
218       /* recurse again */
219       for (int s = 0; insn->srcExists(s); ++s)
220          findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term,
221                              uses);
222       break;
223    default:
224       // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ?
225       addTexUse(uses, insn, texi);
226       break;
227    }
228 }
229 
230 void
findFirstUses(const Instruction * texi,const Instruction * insn,std::list<TexUse> & uses)231 NVC0LegalizePostRA::findFirstUses(const Instruction *texi,
232                                   const Instruction *insn,
233                                   std::list<TexUse> &uses)
234 {
235    for (int d = 0; insn->defExists(d); ++d) {
236       Value *v = insn->getDef(d);
237       for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
238          Instruction *usei = (*u)->getInsn();
239 
240          if (usei->op == OP_PHI || usei->op == OP_UNION) {
241             // need a barrier before WAW cases
242             for (int s = 0; usei->srcExists(s); ++s) {
243                Instruction *defi = usei->getSrc(s)->getUniqueInsn();
244                if (defi && &usei->src(s) != *u)
245                   findOverwritingDefs(texi, defi, usei->bb, uses);
246             }
247          }
248 
249          if (usei->op == OP_SPLIT ||
250              usei->op == OP_MERGE ||
251              usei->op == OP_PHI ||
252              usei->op == OP_UNION) {
253             // these uses don't manifest in the machine code
254             findFirstUses(texi, usei, uses);
255          } else
256          if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
257              usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
258             findFirstUses(texi, usei, uses);
259          } else {
260             addTexUse(uses, usei, insn);
261          }
262       }
263    }
264 }
265 
266 // Texture barriers:
267 // This pass is a bit long and ugly and can probably be optimized.
268 //
269 // 1. obtain a list of TEXes and their outputs' first use(s)
270 // 2. calculate the barrier level of each first use (minimal number of TEXes,
271 //    over all paths, between the TEX and the use in question)
272 // 3. for each barrier, if all paths from the source TEX to that barrier
273 //    contain a barrier of lesser level, it can be culled
274 bool
insertTextureBarriers(Function * fn)275 NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
276 {
277    std::list<TexUse> *uses;
278    std::vector<Instruction *> texes;
279    std::vector<int> bbFirstTex;
280    std::vector<int> bbFirstUse;
281    std::vector<int> texCounts;
282    std::vector<TexUse> useVec;
283    ArrayList insns;
284 
285    fn->orderInstructions(insns);
286 
287    texCounts.resize(fn->allBBlocks.getSize(), 0);
288    bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize());
289    bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize());
290 
291    // tag BB CFG nodes by their id for later
292    for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) {
293       BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get());
294       if (bb)
295          bb->cfg.tag = bb->getId();
296    }
297 
298    // gather the first uses for each TEX
299    for (int i = 0; i < insns.getSize(); ++i) {
300       Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i));
301       if (isTextureOp(tex->op)) {
302          texes.push_back(tex);
303          if (!texCounts.at(tex->bb->getId()))
304             bbFirstTex[tex->bb->getId()] = texes.size() - 1;
305          texCounts[tex->bb->getId()]++;
306       }
307    }
308    insns.clear();
309    if (texes.empty())
310       return false;
311    uses = new std::list<TexUse>[texes.size()];
312    if (!uses)
313       return false;
314    for (size_t i = 0; i < texes.size(); ++i)
315       findFirstUses(texes[i], texes[i], uses[i]);
316 
317    // determine the barrier level at each use
318    for (size_t i = 0; i < texes.size(); ++i) {
319       for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end();
320            ++u) {
321          BasicBlock *tb = texes[i]->bb;
322          BasicBlock *ub = u->insn->bb;
323          if (tb == ub) {
324             u->level = 0;
325             for (size_t j = i + 1; j < texes.size() &&
326                     texes[j]->bb == tb && texes[j]->serial < u->insn->serial;
327                  ++j)
328                u->level++;
329          } else {
330             u->level = fn->cfg.findLightestPathWeight(&tb->cfg,
331                                                       &ub->cfg, texCounts);
332             if (u->level < 0) {
333                WARN("Failed to find path TEX -> TEXBAR\n");
334                u->level = 0;
335                continue;
336             }
337             // this counted all TEXes in the origin block, correct that
338             u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */;
339             // and did not count the TEXes in the destination block, add those
340             for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() &&
341                     texes[j]->bb == ub && texes[j]->serial < u->insn->serial;
342                  ++j)
343                u->level++;
344          }
345          assert(u->level >= 0);
346          useVec.push_back(*u);
347       }
348    }
349    delete[] uses;
350    uses = NULL;
351 
352    // insert the barriers
353    for (size_t i = 0; i < useVec.size(); ++i) {
354       Instruction *prev = useVec[i].insn->prev;
355       if (useVec[i].level < 0)
356          continue;
357       if (prev && prev->op == OP_TEXBAR) {
358          if (prev->subOp > useVec[i].level)
359             prev->subOp = useVec[i].level;
360          prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0));
361       } else {
362          Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE);
363          bar->fixed = 1;
364          bar->subOp = useVec[i].level;
365          // make use explicit to ease latency calculation
366          bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0));
367          useVec[i].insn->bb->insertBefore(useVec[i].insn, bar);
368       }
369    }
370 
371    if (fn->getProgram()->optLevel < 3) {
372       if (uses)
373          delete[] uses;
374       return true;
375    }
376 
377    std::vector<Limits> limitT, limitB, limitS; // entry, exit, single
378 
379    limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0));
380    limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0));
381    limitS.resize(fn->allBBlocks.getSize());
382 
383    // cull unneeded barriers (should do that earlier, but for simplicity)
384    IteratorRef bi = fn->cfg.iteratorCFG();
385    // first calculate min/max outstanding TEXes for each BB
386    for (bi->reset(); !bi->end(); bi->next()) {
387       Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
388       BasicBlock *bb = BasicBlock::get(n);
389       int min = 0;
390       int max = std::numeric_limits<int>::max();
391       for (Instruction *i = bb->getFirst(); i; i = i->next) {
392          if (isTextureOp(i->op)) {
393             min++;
394             if (max < std::numeric_limits<int>::max())
395                max++;
396          } else
397          if (i->op == OP_TEXBAR) {
398             min = MIN2(min, i->subOp);
399             max = MIN2(max, i->subOp);
400          }
401       }
402       // limits when looking at an isolated block
403       limitS[bb->getId()].min = min;
404       limitS[bb->getId()].max = max;
405    }
406    // propagate the min/max values
407    for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) {
408       for (bi->reset(); !bi->end(); bi->next()) {
409          Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
410          BasicBlock *bb = BasicBlock::get(n);
411          const int bbId = bb->getId();
412          for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) {
413             BasicBlock *in = BasicBlock::get(ei.getNode());
414             const int inId = in->getId();
415             limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min);
416             limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max);
417          }
418          // I just hope this is correct ...
419          if (limitS[bbId].max == std::numeric_limits<int>::max()) {
420             // no barrier
421             limitB[bbId].min = limitT[bbId].min + limitS[bbId].min;
422             limitB[bbId].max = limitT[bbId].max + limitS[bbId].min;
423          } else {
424             // block contained a barrier
425             limitB[bbId].min = MIN2(limitS[bbId].max,
426                                     limitT[bbId].min + limitS[bbId].min);
427             limitB[bbId].max = MIN2(limitS[bbId].max,
428                                     limitT[bbId].max + limitS[bbId].min);
429          }
430       }
431    }
432    // finally delete unnecessary barriers
433    for (bi->reset(); !bi->end(); bi->next()) {
434       Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
435       BasicBlock *bb = BasicBlock::get(n);
436       Instruction *prev = NULL;
437       Instruction *next;
438       int max = limitT[bb->getId()].max;
439       for (Instruction *i = bb->getFirst(); i; i = next) {
440          next = i->next;
441          if (i->op == OP_TEXBAR) {
442             if (i->subOp >= max) {
443                delete_Instruction(prog, i);
444             } else {
445                max = i->subOp;
446                if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) {
447                   delete_Instruction(prog, prev);
448                   prev = NULL;
449                }
450             }
451          } else
452          if (isTextureOp(i->op)) {
453             max++;
454          }
455          if (!i->isNop())
456             prev = i;
457       }
458    }
459    if (uses)
460       delete[] uses;
461    return true;
462 }
463 
464 bool
visit(Function * fn)465 NVC0LegalizePostRA::visit(Function *fn)
466 {
467    if (needTexBar)
468       insertTextureBarriers(fn);
469 
470    r63 = new_LValue(fn, FILE_GPR);
471    r63->reg.data.id = 63;
472    return true;
473 }
474 
475 void
replaceZero(Instruction * i)476 NVC0LegalizePostRA::replaceZero(Instruction *i)
477 {
478    for (int s = 0; i->srcExists(s); ++s) {
479       ImmediateValue *imm = i->getSrc(s)->asImm();
480       if (imm && imm->reg.data.u64 == 0)
481          i->setSrc(s, r63);
482    }
483 }
484 
485 void
split64BitOp(Instruction * i)486 NVC0LegalizePostRA::split64BitOp(Instruction *i)
487 {
488    if (i->dType == TYPE_F64) {
489       if (i->op == OP_MAD)
490          i->op = OP_FMA;
491       if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
492           i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
493           i->op == OP_SET)
494          return;
495       i->dType = i->sType = TYPE_U32;
496 
497       i->bb->insertAfter(i, cloneForward(func, i));
498    }
499 }
500 
501 // replace CONT with BRA for single unconditional continue
502 bool
tryReplaceContWithBra(BasicBlock * bb)503 NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
504 {
505    if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT)
506       return false;
507    Graph::EdgeIterator ei = bb->cfg.incident();
508    if (ei.getType() != Graph::Edge::BACK)
509       ei.next();
510    if (ei.getType() != Graph::Edge::BACK)
511       return false;
512    BasicBlock *contBB = BasicBlock::get(ei.getNode());
513 
514    if (!contBB->getExit() || contBB->getExit()->op != OP_CONT ||
515        contBB->getExit()->getPredicate())
516       return false;
517    contBB->getExit()->op = OP_BRA;
518    bb->remove(bb->getEntry()); // delete PRECONT
519 
520    ei.next();
521    assert(ei.end() || ei.getType() != Graph::Edge::BACK);
522    return true;
523 }
524 
525 // replace branches to join blocks with join ops
526 void
propagateJoin(BasicBlock * bb)527 NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
528 {
529    if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit)
530       return;
531    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
532       BasicBlock *in = BasicBlock::get(ei.getNode());
533       Instruction *exit = in->getExit();
534       if (!exit) {
535          in->insertTail(new FlowInstruction(func, OP_JOIN, bb));
536          // there should always be a terminator instruction
537          WARN("inserted missing terminator in BB:%i\n", in->getId());
538       } else
539       if (exit->op == OP_BRA) {
540          exit->op = OP_JOIN;
541          exit->asFlow()->limit = 1; // must-not-propagate marker
542       }
543    }
544    bb->remove(bb->getEntry());
545 }
546 
547 bool
visit(BasicBlock * bb)548 NVC0LegalizePostRA::visit(BasicBlock *bb)
549 {
550    Instruction *i, *next;
551 
552    // remove pseudo operations and non-fixed no-ops, split 64 bit operations
553    for (i = bb->getFirst(); i; i = next) {
554       next = i->next;
555       if (i->op == OP_EMIT || i->op == OP_RESTART) {
556          if (!i->getDef(0)->refCount())
557             i->setDef(0, NULL);
558          if (i->src(0).getFile() == FILE_IMMEDIATE)
559             i->setSrc(0, r63); // initial value must be 0
560       } else
561       if (i->isNop()) {
562          bb->remove(i);
563       } else {
564          if (i->op != OP_MOV && i->op != OP_PFETCH)
565             replaceZero(i);
566          if (typeSizeof(i->dType) == 8)
567             split64BitOp(i);
568       }
569    }
570    if (!bb->getEntry())
571       return true;
572 
573    if (!tryReplaceContWithBra(bb))
574       propagateJoin(bb);
575 
576    return true;
577 }
578 
579 class NVC0LoweringPass : public Pass
580 {
581 public:
582    NVC0LoweringPass(Program *);
583 
584 private:
585    virtual bool visit(Function *);
586    virtual bool visit(BasicBlock *);
587    virtual bool visit(Instruction *);
588 
589    bool handleRDSV(Instruction *);
590    bool handleWRSV(Instruction *);
591    bool handleEXPORT(Instruction *);
592    bool handleOUT(Instruction *);
593    bool handleDIV(Instruction *);
594    bool handleMOD(Instruction *);
595    bool handleSQRT(Instruction *);
596    bool handlePOW(Instruction *);
597    bool handleTEX(TexInstruction *);
598    bool handleTXD(TexInstruction *);
599    bool handleTXQ(TexInstruction *);
600    bool handleManualTXD(TexInstruction *);
601 
602    void checkPredicate(Instruction *);
603 
604    void readTessCoord(LValue *dst, int c);
605 
606 private:
607    const Target *const targ;
608 
609    BuildUtil bld;
610 
611    LValue *gpEmitAddress;
612 };
613 
NVC0LoweringPass(Program * prog)614 NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
615 {
616    bld.setProgram(prog);
617 }
618 
619 bool
visit(Function * fn)620 NVC0LoweringPass::visit(Function *fn)
621 {
622    if (prog->getType() == Program::TYPE_GEOMETRY) {
623       assert(!strncmp(fn->getName(), "MAIN", 4));
624       // TODO: when we generate actual functions pass this value along somehow
625       bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false);
626       gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
627       if (fn->cfgExit) {
628          bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false);
629          bld.mkMovToReg(0, gpEmitAddress);
630       }
631    }
632    return true;
633 }
634 
635 bool
visit(BasicBlock * bb)636 NVC0LoweringPass::visit(BasicBlock *bb)
637 {
638    return true;
639 }
640 
641 // move array source to first slot, convert to u16, add indirections
642 bool
handleTEX(TexInstruction * i)643 NVC0LoweringPass::handleTEX(TexInstruction *i)
644 {
645    const int dim = i->tex.target.getDim() + i->tex.target.isCube();
646    const int arg = i->tex.target.getArgCount();
647 
648    if (prog->getTarget()->getChipset() >= 0xe0) {
649       if (i->tex.r == i->tex.s) {
650          i->tex.r += 8; // NOTE: offset should probably be a driver option
651          i->tex.s  = 0; // only a single cX[] value possible here
652       } else {
653          // TODO: extract handles and use register to select TIC/TSC entries
654       }
655       if (i->tex.target.isArray()) {
656          LValue *layer = new_LValue(func, FILE_GPR);
657          Value *src = i->getSrc(arg - 1);
658          const int sat = (i->op == OP_TXF) ? 1 : 0;
659          DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
660          bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
661          for (int s = dim; s >= 1; --s)
662             i->setSrc(s, i->getSrc(s - 1));
663          i->setSrc(0, layer);
664       }
665       if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
666          Value *tmp[2];
667          Symbol *bind;
668          Value *rRel = i->getIndirectR();
669          Value *sRel = i->getIndirectS();
670          Value *shCnt = bld.loadImm(NULL, 2);
671 
672          if (rRel) {
673             tmp[0] = bld.getScratch();
674             bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.r * 4);
675             bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], rRel, shCnt);
676             tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]);
677             bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1],
678                       bld.loadImm(tmp[0], 0x00ffffffu));
679             rRel = tmp[0];
680             i->setSrc(i->tex.rIndirectSrc, NULL);
681          }
682          if (sRel) {
683             tmp[0] = bld.getScratch();
684             bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.s * 4);
685             bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], sRel, shCnt);
686             tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]);
687             bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1],
688                       bld.loadImm(tmp[0], 0xff000000u));
689             sRel = tmp[0];
690             i->setSrc(i->tex.sIndirectSrc, NULL);
691          }
692          bld.mkOp2(OP_OR, TYPE_U32, rRel, rRel, sRel);
693 
694          int min = i->tex.rIndirectSrc;
695          if (min < 0 || min > i->tex.sIndirectSrc)
696             min = i->tex.sIndirectSrc;
697          for (int s = min; s >= 1; --s)
698             i->setSrc(s, i->getSrc(s - 1));
699          i->setSrc(0, rRel);
700       }
701    } else
702    // (nvc0) generate and move the tsc/tic/array source to the front
703    if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
704       LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
705 
706       Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(arg - 1) : NULL;
707       for (int s = dim; s >= 1; --s)
708          i->setSrc(s, i->getSrc(s - 1));
709       i->setSrc(0, arrayIndex);
710 
711       Value *ticRel = i->getIndirectR();
712       Value *tscRel = i->getIndirectS();
713 
714       if (arrayIndex) {
715          int sat = (i->op == OP_TXF) ? 1 : 0;
716          DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
717          bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat;
718       } else {
719          bld.loadImm(src, 0);
720       }
721 
722       if (ticRel) {
723          i->setSrc(i->tex.rIndirectSrc, NULL);
724          bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
725       }
726       if (tscRel) {
727          i->setSrc(i->tex.sIndirectSrc, NULL);
728          bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
729       }
730 
731       i->setSrc(0, src);
732    }
733 
734    // offset is last source (lod 1st, dc 2nd)
735    if (i->tex.useOffsets) {
736       uint32_t value = 0;
737       int n, c;
738       int s = i->srcCount(0xff);
739       for (n = 0; n < i->tex.useOffsets; ++n)
740          for (c = 0; c < 3; ++c)
741             value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
742       i->setSrc(s, bld.loadImm(NULL, value));
743    }
744 
745    return true;
746 }
747 
748 bool
handleManualTXD(TexInstruction * i)749 NVC0LoweringPass::handleManualTXD(TexInstruction *i)
750 {
751    static const uint8_t qOps[4][2] =
752    {
753       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
754       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
755       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
756       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
757    };
758    Value *def[4][4];
759    Value *crd[3];
760    Instruction *tex;
761    Value *zero = bld.loadImm(bld.getSSA(), 0);
762    int l, c;
763    const int dim = i->tex.target.getDim();
764 
765    i->op = OP_TEX; // no need to clone dPdx/dPdy later
766 
767    for (c = 0; c < dim; ++c)
768       crd[c] = bld.getScratch();
769 
770    bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
771    for (l = 0; l < 4; ++l) {
772       // mov coordinates from lane l to all lanes
773       for (c = 0; c < dim; ++c)
774          bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
775       // add dPdx from lane l to lanes dx
776       for (c = 0; c < dim; ++c)
777          bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
778       // add dPdy from lane l to lanes dy
779       for (c = 0; c < dim; ++c)
780          bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
781       // texture
782       bld.insert(tex = cloneForward(func, i));
783       for (c = 0; c < dim; ++c)
784          tex->setSrc(c, crd[c]);
785       // save results
786       for (c = 0; i->defExists(c); ++c) {
787          Instruction *mov;
788          def[c][l] = bld.getSSA();
789          mov = bld.mkMov(def[c][l], tex->getDef(c));
790          mov->fixed = 1;
791          mov->lanes = 1 << l;
792       }
793    }
794    bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
795 
796    for (c = 0; i->defExists(c); ++c) {
797       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
798       for (l = 0; l < 4; ++l)
799          u->setSrc(l, def[c][l]);
800    }
801 
802    i->bb->remove(i);
803    return true;
804 }
805 
806 bool
handleTXD(TexInstruction * txd)807 NVC0LoweringPass::handleTXD(TexInstruction *txd)
808 {
809    int dim = txd->tex.target.getDim();
810    int arg = txd->tex.target.getArgCount();
811 
812    handleTEX(txd);
813    while (txd->srcExists(arg))
814       ++arg;
815 
816    txd->tex.derivAll = true;
817    if (dim > 2 ||
818        txd->tex.target.isCube() ||
819        arg > 4 ||
820        txd->tex.target.isShadow())
821       return handleManualTXD(txd);
822 
823    for (int c = 0; c < dim; ++c) {
824       txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]);
825       txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]);
826       txd->dPdx[c].set(NULL);
827       txd->dPdy[c].set(NULL);
828    }
829    return true;
830 }
831 
832 bool
handleTXQ(TexInstruction * txq)833 NVC0LoweringPass::handleTXQ(TexInstruction *txq)
834 {
835    // TODO: indirect resource/sampler index
836    return true;
837 }
838 
839 bool
handleWRSV(Instruction * i)840 NVC0LoweringPass::handleWRSV(Instruction *i)
841 {
842    Instruction *st;
843    Symbol *sym;
844    uint32_t addr;
845 
846    // must replace, $sreg are not writeable
847    addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym());
848    if (addr >= 0x400)
849       return false;
850    sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
851 
852    st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0),
853                     i->getSrc(1));
854    st->perPatch = i->perPatch;
855 
856    bld.getBB()->remove(i);
857    return true;
858 }
859 
860 void
readTessCoord(LValue * dst,int c)861 NVC0LoweringPass::readTessCoord(LValue *dst, int c)
862 {
863    Value *laneid = bld.getSSA();
864    Value *x, *y;
865 
866    bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0));
867 
868    if (c == 0) {
869       x = dst;
870       y = NULL;
871    } else
872    if (c == 1) {
873       x = NULL;
874       y = dst;
875    } else {
876       assert(c == 2);
877       x = bld.getSSA();
878       y = bld.getSSA();
879    }
880    if (x)
881       bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid);
882    if (y)
883       bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid);
884 
885    if (c == 2) {
886       bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y);
887       bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst);
888    }
889 }
890 
891 bool
handleRDSV(Instruction * i)892 NVC0LoweringPass::handleRDSV(Instruction *i)
893 {
894    Symbol *sym = i->getSrc(0)->asSym();
895    Value *vtx = NULL;
896    Instruction *ld;
897    uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
898 
899    if (addr >= 0x400) // mov $sreg
900       return true;
901 
902    switch (i->getSrc(0)->reg.data.sv.sv) {
903    case SV_POSITION:
904       assert(prog->getType() == Program::TYPE_FRAGMENT);
905       bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
906       break;
907    case SV_FACE:
908    {
909       Value *face = i->getDef(0);
910       bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL);
911       if (i->dType == TYPE_F32) {
912          bld.mkOp2(OP_AND, TYPE_U32, face, face, bld.mkImm(0x80000000));
913          bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000));
914       }
915    }
916       break;
917    case SV_TESS_COORD:
918       assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
919       readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
920       break;
921    default:
922       if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
923          vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
924       ld = bld.mkFetch(i->getDef(0), i->dType,
925                        FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
926       ld->perPatch = i->perPatch;
927       break;
928    }
929    bld.getBB()->remove(i);
930    return true;
931 }
932 
933 bool
handleDIV(Instruction * i)934 NVC0LoweringPass::handleDIV(Instruction *i)
935 {
936    if (!isFloatType(i->dType))
937       return true;
938    bld.setPosition(i, false);
939    Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
940    i->op = OP_MUL;
941    i->setSrc(1, rcp->getDef(0));
942    return true;
943 }
944 
945 bool
handleMOD(Instruction * i)946 NVC0LoweringPass::handleMOD(Instruction *i)
947 {
948    if (i->dType != TYPE_F32)
949       return true;
950    LValue *value = bld.getScratch();
951    bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
952    bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
953    bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
954    bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
955    i->op = OP_SUB;
956    i->setSrc(1, value);
957    return true;
958 }
959 
960 bool
handleSQRT(Instruction * i)961 NVC0LoweringPass::handleSQRT(Instruction *i)
962 {
963    Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
964                                 bld.getSSA(), i->getSrc(0));
965    i->op = OP_MUL;
966    i->setSrc(1, rsq->getDef(0));
967 
968    return true;
969 }
970 
971 bool
handlePOW(Instruction * i)972 NVC0LoweringPass::handlePOW(Instruction *i)
973 {
974    LValue *val = bld.getScratch();
975 
976    bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
977    bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
978    bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
979 
980    i->op = OP_EX2;
981    i->setSrc(0, val);
982    i->setSrc(1, NULL);
983 
984    return true;
985 }
986 
987 bool
handleEXPORT(Instruction * i)988 NVC0LoweringPass::handleEXPORT(Instruction *i)
989 {
990    if (prog->getType() == Program::TYPE_FRAGMENT) {
991       int id = i->getSrc(0)->reg.data.offset / 4;
992 
993       if (i->src(0).isIndirect(0)) // TODO, ugly
994          return false;
995       i->op = OP_MOV;
996       i->subOp = NV50_IR_SUBOP_MOV_FINAL;
997       i->src(0).set(i->src(1));
998       i->setSrc(1, NULL);
999       i->setDef(0, new_LValue(func, FILE_GPR));
1000       i->getDef(0)->reg.data.id = id;
1001 
1002       prog->maxGPR = MAX2(prog->maxGPR, id);
1003    } else
1004    if (prog->getType() == Program::TYPE_GEOMETRY) {
1005       i->setIndirect(0, 1, gpEmitAddress);
1006    }
1007    return true;
1008 }
1009 
1010 bool
handleOUT(Instruction * i)1011 NVC0LoweringPass::handleOUT(Instruction *i)
1012 {
1013    if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
1014       i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
1015       delete_Instruction(prog, i);
1016    } else {
1017       assert(gpEmitAddress);
1018       i->setDef(0, gpEmitAddress);
1019       if (i->srcExists(0))
1020          i->setSrc(1, i->getSrc(0));
1021       i->setSrc(0, gpEmitAddress);
1022    }
1023    return true;
1024 }
1025 
1026 // Generate a binary predicate if an instruction is predicated by
1027 // e.g. an f32 value.
1028 void
checkPredicate(Instruction * insn)1029 NVC0LoweringPass::checkPredicate(Instruction *insn)
1030 {
1031    Value *pred = insn->getPredicate();
1032    Value *pdst;
1033 
1034    if (!pred || pred->reg.file == FILE_PREDICATE)
1035       return;
1036    pdst = new_LValue(func, FILE_PREDICATE);
1037 
1038    // CAUTION: don't use pdst->getInsn, the definition might not be unique,
1039    //  delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
1040 
1041    bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
1042 
1043    insn->setPredicate(insn->cc, pdst);
1044 }
1045 
1046 //
1047 // - add quadop dance for texturing
1048 // - put FP outputs in GPRs
1049 // - convert instruction sequences
1050 //
1051 bool
visit(Instruction * i)1052 NVC0LoweringPass::visit(Instruction *i)
1053 {
1054    bld.setPosition(i, false);
1055 
1056    if (i->cc != CC_ALWAYS)
1057       checkPredicate(i);
1058 
1059    switch (i->op) {
1060    case OP_TEX:
1061    case OP_TXB:
1062    case OP_TXL:
1063    case OP_TXF:
1064    case OP_TXG:
1065       return handleTEX(i->asTex());
1066    case OP_TXD:
1067       return handleTXD(i->asTex());
1068    case OP_TXQ:
1069      return handleTXQ(i->asTex());
1070    case OP_EX2:
1071       bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
1072       i->setSrc(0, i->getDef(0));
1073       break;
1074    case OP_POW:
1075       return handlePOW(i);
1076    case OP_DIV:
1077       return handleDIV(i);
1078    case OP_MOD:
1079       return handleMOD(i);
1080    case OP_SQRT:
1081       return handleSQRT(i);
1082    case OP_EXPORT:
1083       return handleEXPORT(i);
1084    case OP_EMIT:
1085    case OP_RESTART:
1086       return handleOUT(i);
1087    case OP_RDSV:
1088       return handleRDSV(i);
1089    case OP_WRSV:
1090       return handleWRSV(i);
1091    case OP_LOAD:
1092       if (i->src(0).getFile() == FILE_SHADER_INPUT) {
1093          i->op = OP_VFETCH;
1094          assert(prog->getType() != Program::TYPE_FRAGMENT);
1095       }
1096       break;
1097    default:
1098       break;
1099    }
1100    return true;
1101 }
1102 
1103 bool
runLegalizePass(Program * prog,CGStage stage) const1104 TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
1105 {
1106    if (stage == CG_STAGE_PRE_SSA) {
1107       NVC0LoweringPass pass(prog);
1108       return pass.run(prog, false, true);
1109    } else
1110    if (stage == CG_STAGE_POST_RA) {
1111       NVC0LegalizePostRA pass(prog);
1112       return pass.run(prog, false, true);
1113    } else
1114    if (stage == CG_STAGE_SSA) {
1115       NVC0LegalizeSSA pass;
1116       return pass.run(prog, false, true);
1117    }
1118    return false;
1119 }
1120 
1121 } // namespace nv50_ir
1122