• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25 
26 namespace nv50_ir {
27 
28 const uint8_t Target::operationSrcNr[] =
29 {
30    0, 0,                   // NOP, PHI
31    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32    1, 1, 2,                // MOV, LOAD, STORE
33    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34    3, 3,                   // SHLADD, XMAD
35    1, 1, 1,                // ABS, NEG, NOT
36    2, 2, 2, 3, 2, 2, 3,    // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
37    2, 2, 1,                // MAX, MIN, SAT
38    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
39    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
40    1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
41    1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
43    0, 0, 0,                // PRERET,CONT,BREAK
44    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45    1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46    1, 1, 1,                // EMIT, RESTART, FINAL
47    1, 1, 1,                // TEX, TXB, TXL,
48    1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49    1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50    3, 3, 3, 1, 3,          // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
51    0,                      // TEXBAR
52    1, 1,                   // DFDX, DFDY
53    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54    2, 3, 2, 1, 1, 2, 3,    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
55    2,                      // SGXT
56    3, 2,                   // ATOM, BAR
57    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
58    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
59    3,                      // SHFL
60    1,                      // VOTE
61    1,                      // BUFQ
62    1,                      // WARPSYNC
63    0
64 };
65 
66 const OpClass Target::operationClass[] =
67 {
68    // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
69    OPCLASS_OTHER,
70    OPCLASS_PSEUDO,
71    OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
72    // MOV; LOAD; STORE
73    OPCLASS_MOVE,
74    OPCLASS_LOAD,
75    OPCLASS_STORE,
76    // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
77    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
78    OPCLASS_ARITH, OPCLASS_ARITH,
79    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
80    // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
81    OPCLASS_CONVERT, OPCLASS_CONVERT,
82    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
83    OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
84    // MAX, MIN
85    OPCLASS_COMPARE, OPCLASS_COMPARE,
86    // SAT, CEIL, FLOOR, TRUNC; CVT
87    OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
88    OPCLASS_CONVERT,
89    // SET(AND,OR,XOR); SELP, SLCT
90    OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
91    OPCLASS_COMPARE, OPCLASS_COMPARE,
92    // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
93    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
94    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
95    OPCLASS_SFU, OPCLASS_SFU,
96    // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
97    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
98    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
99    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
100    // DISCARD, EXIT
101    OPCLASS_FLOW, OPCLASS_FLOW,
102    // MEMBAR
103    OPCLASS_CONTROL,
104    // VFETCH, PFETCH, AFETCH, EXPORT
105    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
106    // LINTERP, PINTERP
107    OPCLASS_SFU, OPCLASS_SFU,
108    // EMIT, RESTART, FINAL
109    OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
110    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
111    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
112    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
113    OPCLASS_TEXTURE, OPCLASS_TEXTURE,
114    // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
115    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
116    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
117    // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
118    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
119    // TEXBAR
120    OPCLASS_OTHER,
121    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
122    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
123    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
124    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
125    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
126    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
127    // ATOM, BAR
128    OPCLASS_ATOMIC, OPCLASS_CONTROL,
129    // VADD, VAVG, VMIN, VMAX
130    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
131    // VSAD, VSET, VSHR, VSHL
132    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
133    // VSEL, CCTL
134    OPCLASS_VECTOR, OPCLASS_CONTROL,
135    // SHFL
136    OPCLASS_OTHER,
137    // VOTE
138    OPCLASS_OTHER,
139    // BUFQ
140    OPCLASS_OTHER,
141    // WARPSYNC
142    OPCLASS_OTHER,
143    OPCLASS_PSEUDO // LAST
144 };
145 
146 
147 extern Target *getTargetGV100(unsigned int chipset);
148 extern Target *getTargetGM107(unsigned int chipset);
149 extern Target *getTargetNVC0(unsigned int chipset);
150 extern Target *getTargetNV50(unsigned int chipset);
151 
create(unsigned int chipset)152 Target *Target::create(unsigned int chipset)
153 {
154    STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
155    STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
156    switch (chipset & ~0xf) {
157    case 0x170:
158    case 0x160:
159    case 0x140:
160       return getTargetGV100(chipset);
161    case 0x110:
162    case 0x120:
163    case 0x130:
164       return getTargetGM107(chipset);
165    case 0xc0:
166    case 0xd0:
167    case 0xe0:
168    case 0xf0:
169    case 0x100:
170       return getTargetNVC0(chipset);
171    case 0x50:
172    case 0x80:
173    case 0x90:
174    case 0xa0:
175       return getTargetNV50(chipset);
176    default:
177       ERROR("unsupported target: NV%x\n", chipset);
178       return 0;
179    }
180 }
181 
destroy(Target * targ)182 void Target::destroy(Target *targ)
183 {
184    delete targ;
185 }
186 
CodeEmitter(const Target * target)187 CodeEmitter::CodeEmitter(const Target *target) : targ(target), code(NULL),
188    codeSize(0), codeSizeLimit(0), relocInfo(NULL), fixupInfo(NULL)
189 {
190 }
191 
192 void
setCodeLocation(void * ptr,uint32_t size)193 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
194 {
195    code = reinterpret_cast<uint32_t *>(ptr);
196    codeSize = 0;
197    codeSizeLimit = size;
198 }
199 
200 void
printBinary() const201 CodeEmitter::printBinary() const
202 {
203    uint32_t *bin = code - codeSize / 4;
204    INFO("program binary (%u bytes)", codeSize);
205    for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
206       if ((pos % 8) == 0)
207          INFO("\n");
208       INFO("%08x ", bin[pos]);
209    }
210    INFO("\n");
211 }
212 
sizeToBundlesNVE4(uint32_t size)213 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
214 {
215    return (size + 55) / 56;
216 }
217 
218 void
prepareEmission(Program * prog)219 CodeEmitter::prepareEmission(Program *prog)
220 {
221    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
222         !fi.end(); fi.next()) {
223       Function *func = reinterpret_cast<Function *>(fi.get());
224       func->binPos = prog->binSize;
225       prepareEmission(func);
226 
227       // adjust sizes & positions for scheduling info:
228       if (prog->getTarget()->hasSWSched) {
229          uint32_t adjPos = func->binPos;
230          BasicBlock *bb = NULL;
231          for (int i = 0; i < func->bbCount; ++i) {
232             bb = func->bbArray[i];
233             int32_t adjSize = bb->binSize;
234             if (adjPos % 64) {
235                adjSize -= 64 - adjPos % 64;
236                if (adjSize < 0)
237                   adjSize = 0;
238             }
239             adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
240             bb->binPos = adjPos;
241             bb->binSize = adjSize;
242             adjPos += adjSize;
243          }
244          if (bb)
245             func->binSize = adjPos - func->binPos;
246       }
247 
248       prog->binSize += func->binSize;
249    }
250 }
251 
252 void
prepareEmission(Function * func)253 CodeEmitter::prepareEmission(Function *func)
254 {
255    func->bbCount = 0;
256    func->bbArray = new BasicBlock * [func->cfg.getSize()];
257 
258    BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
259 
260    for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
261       prepareEmission(BasicBlock::get(*it));
262 }
263 
264 void
prepareEmission(BasicBlock * bb)265 CodeEmitter::prepareEmission(BasicBlock *bb)
266 {
267    Instruction *i, *next;
268    Function *func = bb->getFunction();
269    int j;
270    unsigned int nShort;
271 
272    for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
273 
274    for (; j >= 0; --j) {
275       BasicBlock *in = func->bbArray[j];
276       Instruction *exit = in->getExit();
277 
278       if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
279          in->binSize -= 8;
280          func->binSize -= 8;
281 
282          for (++j; j < func->bbCount; ++j)
283             func->bbArray[j]->binPos -= 8;
284 
285          in->remove(exit);
286       }
287       bb->binPos = in->binPos + in->binSize;
288       if (in->binSize) // no more no-op branches to bb
289          break;
290    }
291    func->bbArray[func->bbCount++] = bb;
292 
293    if (!bb->getExit())
294       return;
295 
296    // determine encoding size, try to group short instructions
297    nShort = 0;
298    for (i = bb->getEntry(); i; i = next) {
299       next = i->next;
300 
301       i->encSize = getMinEncodingSize(i);
302       if (next && i->encSize < 8)
303          ++nShort;
304       else
305       if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
306          if (i->isCommutationLegal(i->next)) {
307             bb->permuteAdjacent(i, next);
308             next->encSize = 4;
309             next = i;
310             i = i->prev;
311             ++nShort;
312          } else
313          if (i->isCommutationLegal(i->prev) && next->next) {
314             bb->permuteAdjacent(i->prev, i);
315             next->encSize = 4;
316             next = next->next;
317             bb->binSize += 4;
318             ++nShort;
319          } else {
320             i->encSize = 8;
321             i->prev->encSize = 8;
322             bb->binSize += 4;
323             nShort = 0;
324          }
325       } else {
326          i->encSize = 8;
327          if (nShort & 1) {
328             i->prev->encSize = 8;
329             bb->binSize += 4;
330          }
331          nShort = 0;
332       }
333       bb->binSize += i->encSize;
334    }
335 
336    if (bb->getExit()->encSize == 4) {
337       assert(nShort);
338       bb->getExit()->encSize = 8;
339       bb->binSize += 4;
340 
341       if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
342          bb->binSize += 8;
343          bb->getExit()->prev->encSize = 8;
344       }
345    }
346    assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
347 
348    func->binSize += bb->binSize;
349 }
350 
351 bool
emitBinary(struct nv50_ir_prog_info_out * info)352 Program::emitBinary(struct nv50_ir_prog_info_out *info)
353 {
354    CodeEmitter *emit = target->getCodeEmitter(progType);
355 
356    emit->prepareEmission(this);
357 
358    if (dbgFlags & NV50_IR_DEBUG_BASIC)
359       this->print();
360 
361    if (!binSize) {
362       code = NULL;
363       return false;
364    }
365    code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
366    if (!code)
367       return false;
368    emit->setCodeLocation(code, binSize);
369    info->bin.instructions = 0;
370 
371    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
372       Function *fn = reinterpret_cast<Function *>(fi.get());
373 
374       assert(emit->getCodeSize() == fn->binPos);
375 
376       for (int b = 0; b < fn->bbCount; ++b) {
377          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
378             emit->emitInstruction(i);
379             info->bin.instructions++;
380             if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
381                 (isFloatType(i->sType) || isFloatType(i->dType)))
382                info->io.fp64 = true;
383          }
384       }
385    }
386    info->io.fp64 |= fp64;
387    info->bin.relocData = emit->getRelocInfo();
388    info->bin.fixupData = emit->getFixupInfo();
389 
390    // the nvc0 driver will print the binary itself together with the header
391    if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
392       emit->printBinary();
393 
394    delete emit;
395    return true;
396 }
397 
398 #define RELOC_ALLOC_INCREMENT 8
399 
400 bool
addReloc(RelocEntry::Type ty,int w,uint32_t data,uint32_t m,int s)401 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
402                       int s)
403 {
404    unsigned int n = relocInfo ? relocInfo->count : 0;
405 
406    if (!(n % RELOC_ALLOC_INCREMENT)) {
407       size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
408       relocInfo = reinterpret_cast<RelocInfo *>(
409          REALLOC(relocInfo, n ? size : 0,
410                  size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
411       if (!relocInfo)
412          return false;
413       if (n == 0)
414          memset(relocInfo, 0, sizeof(RelocInfo));
415    }
416    ++relocInfo->count;
417 
418    relocInfo->entry[n].data = data;
419    relocInfo->entry[n].mask = m;
420    relocInfo->entry[n].offset = codeSize + w * 4;
421    relocInfo->entry[n].bitPos = s;
422    relocInfo->entry[n].type = ty;
423 
424    return true;
425 }
426 
427 bool
addInterp(int ipa,int reg,FixupApply apply)428 CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
429 {
430    unsigned int n = fixupInfo ? fixupInfo->count : 0;
431 
432    if (!(n % RELOC_ALLOC_INCREMENT)) {
433       size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
434       fixupInfo = reinterpret_cast<FixupInfo *>(
435          REALLOC(fixupInfo, n ? size : 0,
436                  size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
437       if (!fixupInfo)
438          return false;
439       if (n == 0)
440          fixupInfo->count = 0;
441    }
442    ++fixupInfo->count;
443 
444    fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
445 
446    return true;
447 }
448 
449 void
apply(uint32_t * binary,const RelocInfo * info) const450 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
451 {
452    uint32_t value = 0;
453 
454    switch (type) {
455    case TYPE_CODE: value = info->codePos; break;
456    case TYPE_BUILTIN: value = info->libPos; break;
457    case TYPE_DATA: value = info->dataPos; break;
458    default:
459       assert(0);
460       break;
461    }
462    value += data;
463    value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
464 
465    binary[offset / 4] &= ~mask;
466    binary[offset / 4] |= value & mask;
467 }
468 
469 } // namespace nv50_ir
470 
471 
472 #include "nv50_ir_driver.h"
473 
474 extern "C" {
475 
476 void
nv50_ir_relocate_code(void * relocData,uint32_t * code,uint32_t codePos,uint32_t libPos,uint32_t dataPos)477 nv50_ir_relocate_code(void *relocData, uint32_t *code,
478                       uint32_t codePos,
479                       uint32_t libPos,
480                       uint32_t dataPos)
481 {
482    nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
483 
484    info->codePos = codePos;
485    info->libPos = libPos;
486    info->dataPos = dataPos;
487 
488    for (unsigned int i = 0; i < info->count; ++i)
489       info->entry[i].apply(code, info);
490 }
491 
492 void
nv50_ir_apply_fixups(void * fixupData,uint32_t * code,bool force_persample_interp,bool flatshade,uint8_t alphatest,bool msaa)493 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
494                      bool force_persample_interp, bool flatshade,
495                      uint8_t alphatest, bool msaa)
496 {
497    nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
498       fixupData);
499 
500    // force_persample_interp: all non-flat -> per-sample
501    // flatshade: all color -> flat
502    // alphatest: PIPE_FUNC_* to use with alphatest
503    // msaa: false = sample id -> 0 for interpolateAtSample
504    nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa);
505    for (unsigned i = 0; i < info->count; ++i)
506       info->entry[i].apply(&info->entry[i], code, data);
507 }
508 
509 void
nv50_ir_get_target_library(uint32_t chipset,const uint32_t ** code,uint32_t * size)510 nv50_ir_get_target_library(uint32_t chipset,
511                            const uint32_t **code, uint32_t *size)
512 {
513    nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
514    targ->getBuiltinCode(code, size);
515    nv50_ir::Target::destroy(targ);
516 }
517 
518 }
519