1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25
26 namespace nv50_ir {
27
28 const uint8_t Target::operationSrcNr[] =
29 {
30 0, 0, // NOP, PHI
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34 3, 3, // SHLADD, XMAD
35 1, 1, 1, // ABS, NEG, NOT
36 2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
37 2, 2, 1, // MAX, MIN, SAT
38 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
39 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
40 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
41 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
43 0, 0, 0, // PRERET,CONT,BREAK
44 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46 1, 1, 1, // EMIT, RESTART, FINAL
47 1, 1, 1, // TEX, TXB, TXL,
48 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
51 0, // TEXBAR
52 1, 1, // DFDX, DFDY
53 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54 2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
55 2, // SGXT
56 2, 2, // ATOM, BAR
57 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
58 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
59 3, // SHFL
60 1, // VOTE
61 1, // BUFQ
62 1, // WARPSYNC
63 0
64 };
65
66 const OpClass Target::operationClass[] =
67 {
68 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
69 OPCLASS_OTHER,
70 OPCLASS_PSEUDO,
71 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
72 // MOV; LOAD; STORE
73 OPCLASS_MOVE,
74 OPCLASS_LOAD,
75 OPCLASS_STORE,
76 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
77 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
78 OPCLASS_ARITH, OPCLASS_ARITH,
79 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
80 // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
81 OPCLASS_CONVERT, OPCLASS_CONVERT,
82 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
83 OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
84 // MAX, MIN
85 OPCLASS_COMPARE, OPCLASS_COMPARE,
86 // SAT, CEIL, FLOOR, TRUNC; CVT
87 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
88 OPCLASS_CONVERT,
89 // SET(AND,OR,XOR); SELP, SLCT
90 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
91 OPCLASS_COMPARE, OPCLASS_COMPARE,
92 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
93 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
94 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
95 OPCLASS_SFU, OPCLASS_SFU,
96 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
97 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
98 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
99 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
100 // DISCARD, EXIT
101 OPCLASS_FLOW, OPCLASS_FLOW,
102 // MEMBAR
103 OPCLASS_CONTROL,
104 // VFETCH, PFETCH, AFETCH, EXPORT
105 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
106 // LINTERP, PINTERP
107 OPCLASS_SFU, OPCLASS_SFU,
108 // EMIT, RESTART, FINAL
109 OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
110 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
111 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
112 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
113 OPCLASS_TEXTURE, OPCLASS_TEXTURE,
114 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
115 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
116 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
117 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
118 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
119 // TEXBAR
120 OPCLASS_OTHER,
121 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
122 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
123 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
124 // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
125 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
126 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
127 // ATOM, BAR
128 OPCLASS_ATOMIC, OPCLASS_CONTROL,
129 // VADD, VAVG, VMIN, VMAX
130 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
131 // VSAD, VSET, VSHR, VSHL
132 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
133 // VSEL, CCTL
134 OPCLASS_VECTOR, OPCLASS_CONTROL,
135 // SHFL
136 OPCLASS_OTHER,
137 // VOTE
138 OPCLASS_OTHER,
139 // BUFQ
140 OPCLASS_OTHER,
141 // WARPSYNC
142 OPCLASS_OTHER,
143 OPCLASS_PSEUDO // LAST
144 };
145
146
147 extern Target *getTargetGV100(unsigned int chipset);
148 extern Target *getTargetGM107(unsigned int chipset);
149 extern Target *getTargetNVC0(unsigned int chipset);
150 extern Target *getTargetNV50(unsigned int chipset);
151
create(unsigned int chipset)152 Target *Target::create(unsigned int chipset)
153 {
154 STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
155 STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
156 switch (chipset & ~0xf) {
157 case 0x160:
158 case 0x140:
159 return getTargetGV100(chipset);
160 case 0x110:
161 case 0x120:
162 case 0x130:
163 return getTargetGM107(chipset);
164 case 0xc0:
165 case 0xd0:
166 case 0xe0:
167 case 0xf0:
168 case 0x100:
169 return getTargetNVC0(chipset);
170 case 0x50:
171 case 0x80:
172 case 0x90:
173 case 0xa0:
174 return getTargetNV50(chipset);
175 default:
176 ERROR("unsupported target: NV%x\n", chipset);
177 return 0;
178 }
179 }
180
destroy(Target * targ)181 void Target::destroy(Target *targ)
182 {
183 delete targ;
184 }
185
CodeEmitter(const Target * target)186 CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL)
187 {
188 }
189
190 void
setCodeLocation(void * ptr,uint32_t size)191 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
192 {
193 code = reinterpret_cast<uint32_t *>(ptr);
194 codeSize = 0;
195 codeSizeLimit = size;
196 }
197
198 void
printBinary() const199 CodeEmitter::printBinary() const
200 {
201 uint32_t *bin = code - codeSize / 4;
202 INFO("program binary (%u bytes)", codeSize);
203 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
204 if ((pos % 8) == 0)
205 INFO("\n");
206 INFO("%08x ", bin[pos]);
207 }
208 INFO("\n");
209 }
210
sizeToBundlesNVE4(uint32_t size)211 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
212 {
213 return (size + 55) / 56;
214 }
215
216 void
prepareEmission(Program * prog)217 CodeEmitter::prepareEmission(Program *prog)
218 {
219 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
220 !fi.end(); fi.next()) {
221 Function *func = reinterpret_cast<Function *>(fi.get());
222 func->binPos = prog->binSize;
223 prepareEmission(func);
224
225 // adjust sizes & positions for schedulding info:
226 if (prog->getTarget()->hasSWSched) {
227 uint32_t adjPos = func->binPos;
228 BasicBlock *bb = NULL;
229 for (int i = 0; i < func->bbCount; ++i) {
230 bb = func->bbArray[i];
231 int32_t adjSize = bb->binSize;
232 if (adjPos % 64) {
233 adjSize -= 64 - adjPos % 64;
234 if (adjSize < 0)
235 adjSize = 0;
236 }
237 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
238 bb->binPos = adjPos;
239 bb->binSize = adjSize;
240 adjPos += adjSize;
241 }
242 if (bb)
243 func->binSize = adjPos - func->binPos;
244 }
245
246 prog->binSize += func->binSize;
247 }
248 }
249
250 void
prepareEmission(Function * func)251 CodeEmitter::prepareEmission(Function *func)
252 {
253 func->bbCount = 0;
254 func->bbArray = new BasicBlock * [func->cfg.getSize()];
255
256 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
257
258 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
259 prepareEmission(BasicBlock::get(*it));
260 }
261
262 void
prepareEmission(BasicBlock * bb)263 CodeEmitter::prepareEmission(BasicBlock *bb)
264 {
265 Instruction *i, *next;
266 Function *func = bb->getFunction();
267 int j;
268 unsigned int nShort;
269
270 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
271
272 for (; j >= 0; --j) {
273 BasicBlock *in = func->bbArray[j];
274 Instruction *exit = in->getExit();
275
276 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
277 in->binSize -= 8;
278 func->binSize -= 8;
279
280 for (++j; j < func->bbCount; ++j)
281 func->bbArray[j]->binPos -= 8;
282
283 in->remove(exit);
284 }
285 bb->binPos = in->binPos + in->binSize;
286 if (in->binSize) // no more no-op branches to bb
287 break;
288 }
289 func->bbArray[func->bbCount++] = bb;
290
291 if (!bb->getExit())
292 return;
293
294 // determine encoding size, try to group short instructions
295 nShort = 0;
296 for (i = bb->getEntry(); i; i = next) {
297 next = i->next;
298
299 if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
300 bb->remove(i);
301 continue;
302 }
303
304 i->encSize = getMinEncodingSize(i);
305 if (next && i->encSize < 8)
306 ++nShort;
307 else
308 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
309 if (i->isCommutationLegal(i->next)) {
310 bb->permuteAdjacent(i, next);
311 next->encSize = 4;
312 next = i;
313 i = i->prev;
314 ++nShort;
315 } else
316 if (i->isCommutationLegal(i->prev) && next->next) {
317 bb->permuteAdjacent(i->prev, i);
318 next->encSize = 4;
319 next = next->next;
320 bb->binSize += 4;
321 ++nShort;
322 } else {
323 i->encSize = 8;
324 i->prev->encSize = 8;
325 bb->binSize += 4;
326 nShort = 0;
327 }
328 } else {
329 i->encSize = 8;
330 if (nShort & 1) {
331 i->prev->encSize = 8;
332 bb->binSize += 4;
333 }
334 nShort = 0;
335 }
336 bb->binSize += i->encSize;
337 }
338
339 if (bb->getExit()->encSize == 4) {
340 assert(nShort);
341 bb->getExit()->encSize = 8;
342 bb->binSize += 4;
343
344 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
345 bb->binSize += 8;
346 bb->getExit()->prev->encSize = 8;
347 }
348 }
349 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
350
351 func->binSize += bb->binSize;
352 }
353
354 bool
emitBinary(struct nv50_ir_prog_info_out * info)355 Program::emitBinary(struct nv50_ir_prog_info_out *info)
356 {
357 CodeEmitter *emit = target->getCodeEmitter(progType);
358
359 emit->prepareEmission(this);
360
361 if (dbgFlags & NV50_IR_DEBUG_BASIC)
362 this->print();
363
364 if (!binSize) {
365 code = NULL;
366 return false;
367 }
368 code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
369 if (!code)
370 return false;
371 emit->setCodeLocation(code, binSize);
372 info->bin.instructions = 0;
373
374 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
375 Function *fn = reinterpret_cast<Function *>(fi.get());
376
377 assert(emit->getCodeSize() == fn->binPos);
378
379 for (int b = 0; b < fn->bbCount; ++b) {
380 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
381 emit->emitInstruction(i);
382 info->bin.instructions++;
383 if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
384 (isFloatType(i->sType) || isFloatType(i->dType)))
385 info->io.fp64 = true;
386 }
387 }
388 }
389 info->io.fp64 |= fp64;
390 info->bin.relocData = emit->getRelocInfo();
391 info->bin.fixupData = emit->getFixupInfo();
392
393 // the nvc0 driver will print the binary iself together with the header
394 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
395 emit->printBinary();
396
397 delete emit;
398 return true;
399 }
400
401 #define RELOC_ALLOC_INCREMENT 8
402
403 bool
addReloc(RelocEntry::Type ty,int w,uint32_t data,uint32_t m,int s)404 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
405 int s)
406 {
407 unsigned int n = relocInfo ? relocInfo->count : 0;
408
409 if (!(n % RELOC_ALLOC_INCREMENT)) {
410 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
411 relocInfo = reinterpret_cast<RelocInfo *>(
412 REALLOC(relocInfo, n ? size : 0,
413 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
414 if (!relocInfo)
415 return false;
416 if (n == 0)
417 memset(relocInfo, 0, sizeof(RelocInfo));
418 }
419 ++relocInfo->count;
420
421 relocInfo->entry[n].data = data;
422 relocInfo->entry[n].mask = m;
423 relocInfo->entry[n].offset = codeSize + w * 4;
424 relocInfo->entry[n].bitPos = s;
425 relocInfo->entry[n].type = ty;
426
427 return true;
428 }
429
430 bool
addInterp(int ipa,int reg,FixupApply apply)431 CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
432 {
433 unsigned int n = fixupInfo ? fixupInfo->count : 0;
434
435 if (!(n % RELOC_ALLOC_INCREMENT)) {
436 size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
437 fixupInfo = reinterpret_cast<FixupInfo *>(
438 REALLOC(fixupInfo, n ? size : 0,
439 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
440 if (!fixupInfo)
441 return false;
442 if (n == 0)
443 fixupInfo->count = 0;
444 }
445 ++fixupInfo->count;
446
447 fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
448
449 return true;
450 }
451
452 void
apply(uint32_t * binary,const RelocInfo * info) const453 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
454 {
455 uint32_t value = 0;
456
457 switch (type) {
458 case TYPE_CODE: value = info->codePos; break;
459 case TYPE_BUILTIN: value = info->libPos; break;
460 case TYPE_DATA: value = info->dataPos; break;
461 default:
462 assert(0);
463 break;
464 }
465 value += data;
466 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
467
468 binary[offset / 4] &= ~mask;
469 binary[offset / 4] |= value & mask;
470 }
471
472 } // namespace nv50_ir
473
474
475 #include "codegen/nv50_ir_driver.h"
476
477 extern "C" {
478
479 void
nv50_ir_relocate_code(void * relocData,uint32_t * code,uint32_t codePos,uint32_t libPos,uint32_t dataPos)480 nv50_ir_relocate_code(void *relocData, uint32_t *code,
481 uint32_t codePos,
482 uint32_t libPos,
483 uint32_t dataPos)
484 {
485 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
486
487 info->codePos = codePos;
488 info->libPos = libPos;
489 info->dataPos = dataPos;
490
491 for (unsigned int i = 0; i < info->count; ++i)
492 info->entry[i].apply(code, info);
493 }
494
495 void
nv50_ir_apply_fixups(void * fixupData,uint32_t * code,bool force_persample_interp,bool flatshade,uint8_t alphatest)496 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
497 bool force_persample_interp, bool flatshade,
498 uint8_t alphatest)
499 {
500 nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
501 fixupData);
502
503 // force_persample_interp: all non-flat -> per-sample
504 // flatshade: all color -> flat
505 // alphatest: PIPE_FUNC_* to use with alphatest
506 nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
507 for (unsigned i = 0; i < info->count; ++i)
508 info->entry[i].apply(&info->entry[i], code, data);
509 }
510
511 void
nv50_ir_get_target_library(uint32_t chipset,const uint32_t ** code,uint32_t * size)512 nv50_ir_get_target_library(uint32_t chipset,
513 const uint32_t **code, uint32_t *size)
514 {
515 nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
516 targ->getBuiltinCode(code, size);
517 nv50_ir::Target::destroy(targ);
518 }
519
520 }
521