1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
setProgramType(Program::Type pType)38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setAddress24(const ValueRef&);
56 void setAddressByFile(const ValueRef&);
57 void setImmediate(const Instruction *, const int s); // needs op already set
58 void setImmediateS8(const ValueRef&);
59 void setSUConst16(const Instruction *, const int s);
60 void setSUPred(const Instruction *, const int s);
61 void setPDSTL(const Instruction *, const int d);
62
63 void emitCondCode(CondCode cc, int pos);
64 void emitInterpMode(const Instruction *);
65 void emitLoadStoreType(DataType ty);
66 void emitSUGType(DataType);
67 void emitSUAddr(const TexInstruction *);
68 void emitSUDim(const TexInstruction *);
69 void emitCachingMode(CacheMode c);
70
71 void emitShortSrc2(const ValueRef&);
72
73 inline uint8_t getSRegEncoding(const ValueRef&);
74
75 void roundMode_A(const Instruction *);
76 void roundMode_C(const Instruction *);
77 void roundMode_CS(const Instruction *);
78
79 void emitNegAbs12(const Instruction *);
80
81 void emitNOP(const Instruction *);
82
83 void emitLOAD(const Instruction *);
84 void emitSTORE(const Instruction *);
85 void emitMOV(const Instruction *);
86 void emitATOM(const Instruction *);
87 void emitMEMBAR(const Instruction *);
88 void emitCCTL(const Instruction *);
89
90 void emitINTERP(const Instruction *);
91 void emitAFETCH(const Instruction *);
92 void emitPFETCH(const Instruction *);
93 void emitVFETCH(const Instruction *);
94 void emitEXPORT(const Instruction *);
95 void emitOUT(const Instruction *);
96
97 void emitUADD(const Instruction *);
98 void emitFADD(const Instruction *);
99 void emitDADD(const Instruction *);
100 void emitUMUL(const Instruction *);
101 void emitFMUL(const Instruction *);
102 void emitDMUL(const Instruction *);
103 void emitIMAD(const Instruction *);
104 void emitISAD(const Instruction *);
105 void emitSHLADD(const Instruction *a);
106 void emitFMAD(const Instruction *);
107 void emitDMAD(const Instruction *);
108 void emitMADSP(const Instruction *);
109
110 void emitNOT(Instruction *);
111 void emitLogicOp(const Instruction *, uint8_t subOp);
112 void emitPOPC(const Instruction *);
113 void emitINSBF(const Instruction *);
114 void emitEXTBF(const Instruction *);
115 void emitBFIND(const Instruction *);
116 void emitPERMT(const Instruction *);
117 void emitShift(const Instruction *);
118
119 void emitSFnOp(const Instruction *, uint8_t subOp);
120
121 void emitCVT(Instruction *);
122 void emitMINMAX(const Instruction *);
123 void emitPreOp(const Instruction *);
124
125 void emitSET(const CmpInstruction *);
126 void emitSLCT(const CmpInstruction *);
127 void emitSELP(const Instruction *);
128
129 void emitTEXBAR(const Instruction *);
130 void emitTEX(const TexInstruction *);
131 void emitTEXCSAA(const TexInstruction *);
132 void emitTXQ(const TexInstruction *);
133
134 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
135
136 void emitFlow(const Instruction *);
137 void emitBAR(const Instruction *);
138
139 void emitSUCLAMPMode(uint16_t);
140 void emitSUCalc(Instruction *);
141 void emitSULDGB(const TexInstruction *);
142 void emitSUSTGx(const TexInstruction *);
143
144 void emitSULDB(const TexInstruction *);
145 void emitSUSTx(const TexInstruction *);
146 void emitSULEA(const TexInstruction *);
147
148 void emitVSHL(const Instruction *);
149 void emitVectorSubOp(const Instruction *);
150
151 void emitPIXLD(const Instruction *);
152
153 void emitSHFL(const Instruction *);
154
155 void emitVOTE(const Instruction *);
156
157 inline void defId(const ValueDef&, const int pos);
158 inline void defId(const Instruction *, int d, const int pos);
159 inline void srcId(const ValueRef&, const int pos);
160 inline void srcId(const ValueRef *, const int pos);
161 inline void srcId(const Instruction *, int s, const int pos);
162 inline void srcAddr32(const ValueRef&, int pos, int shr);
163
164 inline bool isLIMM(const ValueRef&, DataType ty);
165 };
166
167 // for better visibility
168 #define HEX64(h, l) 0x##h##l##ULL
169
170 #define SDATA(a) ((a).rep()->reg.data)
171 #define DDATA(a) ((a).rep()->reg.data)
172
srcId(const ValueRef & src,const int pos)173 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
174 {
175 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
176 }
177
srcId(const ValueRef * src,const int pos)178 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
179 {
180 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
181 }
182
srcId(const Instruction * insn,int s,int pos)183 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
184 {
185 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
186 code[pos / 32] |= r << (pos % 32);
187 }
188
189 void
srcAddr32(const ValueRef & src,int pos,int shr)190 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
191 {
192 const uint32_t offset = SDATA(src).offset >> shr;
193
194 code[pos / 32] |= offset << (pos % 32);
195 if (pos && (pos < 32))
196 code[1] |= offset >> (32 - pos);
197 }
198
defId(const ValueDef & def,const int pos)199 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
200 {
201 code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : 63) << (pos % 32);
202 }
203
defId(const Instruction * insn,int d,const int pos)204 void CodeEmitterNVC0::defId(const Instruction *insn, int d, const int pos)
205 {
206 if (insn->defExists(d))
207 defId(insn->def(d), pos);
208 else
209 code[pos / 32] |= 63 << (pos % 32);
210 }
211
isLIMM(const ValueRef & ref,DataType ty)212 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
213 {
214 const ImmediateValue *imm = ref.get()->asImm();
215
216 if (ty == TYPE_F32)
217 return imm && imm->reg.data.u32 & 0xfff;
218 else
219 return imm && (imm->reg.data.s32 > 0x7ffff ||
220 imm->reg.data.s32 < -0x80000);
221 }
222
223 void
roundMode_A(const Instruction * insn)224 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
225 {
226 switch (insn->rnd) {
227 case ROUND_M: code[1] |= 1 << 23; break;
228 case ROUND_P: code[1] |= 2 << 23; break;
229 case ROUND_Z: code[1] |= 3 << 23; break;
230 default:
231 assert(insn->rnd == ROUND_N);
232 break;
233 }
234 }
235
236 void
emitNegAbs12(const Instruction * i)237 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
238 {
239 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
240 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
241 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
242 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
243 }
244
emitCondCode(CondCode cc,int pos)245 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
246 {
247 uint8_t val;
248
249 switch (cc) {
250 case CC_LT: val = 0x1; break;
251 case CC_LTU: val = 0x9; break;
252 case CC_EQ: val = 0x2; break;
253 case CC_EQU: val = 0xa; break;
254 case CC_LE: val = 0x3; break;
255 case CC_LEU: val = 0xb; break;
256 case CC_GT: val = 0x4; break;
257 case CC_GTU: val = 0xc; break;
258 case CC_NE: val = 0x5; break;
259 case CC_NEU: val = 0xd; break;
260 case CC_GE: val = 0x6; break;
261 case CC_GEU: val = 0xe; break;
262 case CC_TR: val = 0xf; break;
263 case CC_FL: val = 0x0; break;
264
265 case CC_A: val = 0x14; break;
266 case CC_NA: val = 0x13; break;
267 case CC_S: val = 0x15; break;
268 case CC_NS: val = 0x12; break;
269 case CC_C: val = 0x16; break;
270 case CC_NC: val = 0x11; break;
271 case CC_O: val = 0x17; break;
272 case CC_NO: val = 0x10; break;
273
274 default:
275 val = 0;
276 assert(!"invalid condition code");
277 break;
278 }
279 code[pos / 32] |= val << (pos % 32);
280 }
281
282 void
emitPredicate(const Instruction * i)283 CodeEmitterNVC0::emitPredicate(const Instruction *i)
284 {
285 if (i->predSrc >= 0) {
286 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
287 srcId(i->src(i->predSrc), 10);
288 if (i->cc == CC_NOT_P)
289 code[0] |= 0x2000; // negate
290 } else {
291 code[0] |= 0x1c00;
292 }
293 }
294
295 void
setAddressByFile(const ValueRef & src)296 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
297 {
298 switch (src.getFile()) {
299 case FILE_MEMORY_GLOBAL:
300 srcAddr32(src, 26, 0);
301 break;
302 case FILE_MEMORY_LOCAL:
303 case FILE_MEMORY_SHARED:
304 setAddress24(src);
305 break;
306 default:
307 assert(src.getFile() == FILE_MEMORY_CONST);
308 setAddress16(src);
309 break;
310 }
311 }
312
313 void
setAddress16(const ValueRef & src)314 CodeEmitterNVC0::setAddress16(const ValueRef& src)
315 {
316 Symbol *sym = src.get()->asSym();
317
318 assert(sym);
319
320 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
321 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
322 }
323
324 void
setAddress24(const ValueRef & src)325 CodeEmitterNVC0::setAddress24(const ValueRef& src)
326 {
327 Symbol *sym = src.get()->asSym();
328
329 assert(sym);
330
331 code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
332 code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
333 }
334
335 void
setImmediate(const Instruction * i,const int s)336 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
337 {
338 const ImmediateValue *imm = i->src(s).get()->asImm();
339 uint32_t u32;
340
341 assert(imm);
342 u32 = imm->reg.data.u32;
343
344 if ((code[0] & 0xf) == 0x1) {
345 // double immediate
346 uint64_t u64 = imm->reg.data.u64;
347 assert(!(u64 & 0x00000fffffffffffULL));
348 assert(!(code[1] & 0xc000));
349 code[0] |= ((u64 >> 44) & 0x3f) << 26;
350 code[1] |= 0xc000 | (u64 >> 50);
351 } else
352 if ((code[0] & 0xf) == 0x2) {
353 // LIMM
354 code[0] |= (u32 & 0x3f) << 26;
355 code[1] |= u32 >> 6;
356 } else
357 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
358 // integer immediate
359 assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);
360 assert(!(code[1] & 0xc000));
361 u32 &= 0xfffff;
362 code[0] |= (u32 & 0x3f) << 26;
363 code[1] |= 0xc000 | (u32 >> 6);
364 } else {
365 // float immediate
366 assert(!(u32 & 0x00000fff));
367 assert(!(code[1] & 0xc000));
368 code[0] |= ((u32 >> 12) & 0x3f) << 26;
369 code[1] |= 0xc000 | (u32 >> 18);
370 }
371 }
372
setImmediateS8(const ValueRef & ref)373 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
374 {
375 const ImmediateValue *imm = ref.get()->asImm();
376
377 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
378
379 assert(s8 == imm->reg.data.s32);
380
381 code[0] |= (s8 & 0x3f) << 26;
382 code[0] |= (s8 >> 6) << 8;
383 }
384
setPDSTL(const Instruction * i,const int d)385 void CodeEmitterNVC0::setPDSTL(const Instruction *i, const int d)
386 {
387 assert(d < 0 || (i->defExists(d) && i->def(d).getFile() == FILE_PREDICATE));
388
389 uint32_t pred = d >= 0 ? DDATA(i->def(d)).id : 7;
390
391 code[0] |= (pred & 3) << 8;
392 code[1] |= (pred & 4) << (26 - 2);
393 }
394
395 void
emitForm_A(const Instruction * i,uint64_t opc)396 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
397 {
398 code[0] = opc;
399 code[1] = opc >> 32;
400
401 emitPredicate(i);
402
403 defId(i->def(0), 14);
404
405 int s1 = 26;
406 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
407 s1 = 49;
408
409 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
410 switch (i->getSrc(s)->reg.file) {
411 case FILE_MEMORY_CONST:
412 assert(!(code[1] & 0xc000));
413 code[1] |= (s == 2) ? 0x8000 : 0x4000;
414 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
415 setAddress16(i->src(s));
416 break;
417 case FILE_IMMEDIATE:
418 assert(s == 1 ||
419 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
420 assert(!(code[1] & 0xc000));
421 setImmediate(i, s);
422 break;
423 case FILE_GPR:
424 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
425 break;
426 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
427 break;
428 default:
429 if (i->op == OP_SELP) {
430 // OP_SELP is used to implement shared+atomics on Fermi.
431 assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
432 srcId(i->src(s), 49);
433 }
434 // ignore here, can be predicate or flags, but must not be address
435 break;
436 }
437 }
438 }
439
440 void
emitForm_B(const Instruction * i,uint64_t opc)441 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
442 {
443 code[0] = opc;
444 code[1] = opc >> 32;
445
446 emitPredicate(i);
447
448 defId(i->def(0), 14);
449
450 switch (i->src(0).getFile()) {
451 case FILE_MEMORY_CONST:
452 assert(!(code[1] & 0xc000));
453 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
454 setAddress16(i->src(0));
455 break;
456 case FILE_IMMEDIATE:
457 assert(!(code[1] & 0xc000));
458 setImmediate(i, 0);
459 break;
460 case FILE_GPR:
461 srcId(i->src(0), 26);
462 break;
463 default:
464 // ignore here, can be predicate or flags, but must not be address
465 break;
466 }
467 }
468
469 void
emitForm_S(const Instruction * i,uint32_t opc,bool pred)470 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
471 {
472 code[0] = opc;
473
474 int ss2a = 0;
475 if (opc == 0x0d || opc == 0x0e)
476 ss2a = 2;
477
478 defId(i->def(0), 14);
479 srcId(i->src(0), 20);
480
481 assert(pred || (i->predSrc < 0));
482 if (pred)
483 emitPredicate(i);
484
485 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
486 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
487 assert(!(code[0] & (0x300 >> ss2a)));
488 switch (i->src(s).get()->reg.fileIndex) {
489 case 0: code[0] |= 0x100 >> ss2a; break;
490 case 1: code[0] |= 0x200 >> ss2a; break;
491 case 16: code[0] |= 0x300 >> ss2a; break;
492 default:
493 ERROR("invalid c[] space for short form\n");
494 break;
495 }
496 if (s == 1)
497 code[0] |= i->getSrc(s)->reg.data.offset << 24;
498 else
499 code[0] |= i->getSrc(s)->reg.data.offset << 6;
500 } else
501 if (i->src(s).getFile() == FILE_IMMEDIATE) {
502 assert(s == 1);
503 setImmediateS8(i->src(s));
504 } else
505 if (i->src(s).getFile() == FILE_GPR) {
506 srcId(i->src(s), (s == 1) ? 26 : 8);
507 }
508 }
509 }
510
511 void
emitShortSrc2(const ValueRef & src)512 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
513 {
514 if (src.getFile() == FILE_MEMORY_CONST) {
515 switch (src.get()->reg.fileIndex) {
516 case 0: code[0] |= 0x100; break;
517 case 1: code[0] |= 0x200; break;
518 case 16: code[0] |= 0x300; break;
519 default:
520 assert(!"unsupported file index for short op");
521 break;
522 }
523 srcAddr32(src, 20, 2);
524 } else {
525 srcId(src, 20);
526 assert(src.getFile() == FILE_GPR);
527 }
528 }
529
530 void
emitNOP(const Instruction * i)531 CodeEmitterNVC0::emitNOP(const Instruction *i)
532 {
533 code[0] = 0x000001e4;
534 code[1] = 0x40000000;
535 emitPredicate(i);
536 }
537
538 void
emitFMAD(const Instruction * i)539 CodeEmitterNVC0::emitFMAD(const Instruction *i)
540 {
541 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
542
543 if (i->encSize == 8) {
544 if (isLIMM(i->src(1), TYPE_F32)) {
545 emitForm_A(i, HEX64(20000000, 00000002));
546 } else {
547 emitForm_A(i, HEX64(30000000, 00000000));
548
549 if (i->src(2).mod.neg())
550 code[0] |= 1 << 8;
551 }
552 roundMode_A(i);
553
554 if (neg1)
555 code[0] |= 1 << 9;
556
557 if (i->saturate)
558 code[0] |= 1 << 5;
559
560 if (i->dnz)
561 code[0] |= 1 << 7;
562 else
563 if (i->ftz)
564 code[0] |= 1 << 6;
565 } else {
566 assert(!i->saturate && !i->src(2).mod.neg());
567 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
568 false);
569 if (neg1)
570 code[0] |= 1 << 4;
571 }
572 }
573
574 void
emitDMAD(const Instruction * i)575 CodeEmitterNVC0::emitDMAD(const Instruction *i)
576 {
577 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
578
579 emitForm_A(i, HEX64(20000000, 00000001));
580
581 if (i->src(2).mod.neg())
582 code[0] |= 1 << 8;
583
584 roundMode_A(i);
585
586 if (neg1)
587 code[0] |= 1 << 9;
588
589 assert(!i->saturate);
590 assert(!i->ftz);
591 }
592
593 void
emitFMUL(const Instruction * i)594 CodeEmitterNVC0::emitFMUL(const Instruction *i)
595 {
596 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
597
598 assert(i->postFactor >= -3 && i->postFactor <= 3);
599
600 if (i->encSize == 8) {
601 if (isLIMM(i->src(1), TYPE_F32)) {
602 assert(i->postFactor == 0); // constant folded, hopefully
603 emitForm_A(i, HEX64(30000000, 00000002));
604 } else {
605 emitForm_A(i, HEX64(58000000, 00000000));
606 roundMode_A(i);
607 code[1] |= ((i->postFactor > 0) ?
608 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
609 }
610 if (neg)
611 code[1] ^= 1 << 25; // aliases with LIMM sign bit
612
613 if (i->saturate)
614 code[0] |= 1 << 5;
615
616 if (i->dnz)
617 code[0] |= 1 << 7;
618 else
619 if (i->ftz)
620 code[0] |= 1 << 6;
621 } else {
622 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
623 emitForm_S(i, 0xa8, true);
624 }
625 }
626
627 void
emitDMUL(const Instruction * i)628 CodeEmitterNVC0::emitDMUL(const Instruction *i)
629 {
630 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
631
632 emitForm_A(i, HEX64(50000000, 00000001));
633 roundMode_A(i);
634
635 if (neg)
636 code[0] |= 1 << 9;
637
638 assert(!i->saturate);
639 assert(!i->ftz);
640 assert(!i->dnz);
641 assert(!i->postFactor);
642 }
643
644 void
emitUMUL(const Instruction * i)645 CodeEmitterNVC0::emitUMUL(const Instruction *i)
646 {
647 if (i->encSize == 8) {
648 if (isLIMM(i->src(1), TYPE_U32)) {
649 emitForm_A(i, HEX64(10000000, 00000002));
650 } else {
651 emitForm_A(i, HEX64(50000000, 00000003));
652 }
653 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
654 code[0] |= 1 << 6;
655 if (i->sType == TYPE_S32)
656 code[0] |= 1 << 5;
657 if (i->dType == TYPE_S32)
658 code[0] |= 1 << 7;
659 } else {
660 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
661
662 if (i->sType == TYPE_S32)
663 code[0] |= 1 << 6;
664 }
665 }
666
667 void
emitFADD(const Instruction * i)668 CodeEmitterNVC0::emitFADD(const Instruction *i)
669 {
670 if (i->encSize == 8) {
671 if (isLIMM(i->src(1), TYPE_F32)) {
672 assert(!i->saturate);
673 emitForm_A(i, HEX64(28000000, 00000002));
674
675 code[0] |= i->src(0).mod.abs() << 7;
676 code[0] |= i->src(0).mod.neg() << 9;
677
678 if (i->src(1).mod.abs())
679 code[1] &= 0xfdffffff;
680 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
681 code[1] ^= 0x02000000;
682 } else {
683 emitForm_A(i, HEX64(50000000, 00000000));
684
685 roundMode_A(i);
686 if (i->saturate)
687 code[1] |= 1 << 17;
688
689 emitNegAbs12(i);
690 if (i->op == OP_SUB) code[0] ^= 1 << 8;
691 }
692 if (i->ftz)
693 code[0] |= 1 << 5;
694 } else {
695 assert(!i->saturate && i->op != OP_SUB &&
696 !i->src(0).mod.abs() &&
697 !i->src(1).mod.neg() && !i->src(1).mod.abs());
698
699 emitForm_S(i, 0x49, true);
700
701 if (i->src(0).mod.neg())
702 code[0] |= 1 << 7;
703 }
704 }
705
706 void
emitDADD(const Instruction * i)707 CodeEmitterNVC0::emitDADD(const Instruction *i)
708 {
709 assert(i->encSize == 8);
710 emitForm_A(i, HEX64(48000000, 00000001));
711 roundMode_A(i);
712 assert(!i->saturate);
713 assert(!i->ftz);
714 emitNegAbs12(i);
715 if (i->op == OP_SUB)
716 code[0] ^= 1 << 8;
717 }
718
719 void
emitUADD(const Instruction * i)720 CodeEmitterNVC0::emitUADD(const Instruction *i)
721 {
722 uint32_t addOp = 0;
723
724 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
725
726 if (i->src(0).mod.neg())
727 addOp |= 0x200;
728 if (i->src(1).mod.neg())
729 addOp |= 0x100;
730 if (i->op == OP_SUB)
731 addOp ^= 0x100;
732
733 assert(addOp != 0x300); // would be add-plus-one
734
735 if (i->encSize == 8) {
736 if (isLIMM(i->src(1), TYPE_U32)) {
737 emitForm_A(i, HEX64(08000000, 00000002));
738 if (i->flagsDef >= 0)
739 code[1] |= 1 << 26; // write carry
740 } else {
741 emitForm_A(i, HEX64(48000000, 00000003));
742 if (i->flagsDef >= 0)
743 code[1] |= 1 << 16; // write carry
744 }
745 code[0] |= addOp;
746
747 if (i->saturate)
748 code[0] |= 1 << 5;
749 if (i->flagsSrc >= 0) // add carry
750 code[0] |= 1 << 6;
751 } else {
752 assert(!(addOp & 0x100));
753 emitForm_S(i, (addOp >> 3) |
754 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
755 }
756 }
757
758 void
emitIMAD(const Instruction * i)759 CodeEmitterNVC0::emitIMAD(const Instruction *i)
760 {
761 uint8_t addOp =
762 i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
763
764 assert(i->encSize == 8);
765 emitForm_A(i, HEX64(20000000, 00000003));
766
767 assert(addOp != 3);
768 code[0] |= addOp << 8;
769
770 if (isSignedType(i->dType))
771 code[0] |= 1 << 7;
772 if (isSignedType(i->sType))
773 code[0] |= 1 << 5;
774
775 code[1] |= i->saturate << 24;
776
777 if (i->flagsDef >= 0) code[1] |= 1 << 16;
778 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
779
780 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
781 code[0] |= 1 << 6;
782 }
783
784 void
emitSHLADD(const Instruction * i)785 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
786 {
787 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
788 const ImmediateValue *imm = i->src(1).get()->asImm();
789 assert(imm);
790
791 code[0] = 0x00000003;
792 code[1] = 0x40000000 | addOp << 23;
793
794 emitPredicate(i);
795
796 defId(i->def(0), 14);
797 srcId(i->src(0), 20);
798
799 if (i->flagsDef >= 0)
800 code[1] |= 1 << 16;
801
802 assert(!(imm->reg.data.u32 & 0xffffffe0));
803 code[0] |= imm->reg.data.u32 << 5;
804
805 switch (i->src(2).getFile()) {
806 case FILE_GPR:
807 srcId(i->src(2), 26);
808 break;
809 case FILE_MEMORY_CONST:
810 code[1] |= 0x4000;
811 code[1] |= i->getSrc(2)->reg.fileIndex << 10;
812 setAddress16(i->src(2));
813 break;
814 case FILE_IMMEDIATE:
815 setImmediate(i, 2);
816 break;
817 default:
818 assert(!"bad src2 file");
819 break;
820 }
821 }
822
823 void
emitMADSP(const Instruction * i)824 CodeEmitterNVC0::emitMADSP(const Instruction *i)
825 {
826 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
827
828 emitForm_A(i, HEX64(00000000, 00000003));
829
830 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
831 code[1] |= 0x01800000;
832 } else {
833 code[0] |= (i->subOp & 0x00f) << 7;
834 code[0] |= (i->subOp & 0x0f0) << 1;
835 code[0] |= (i->subOp & 0x100) >> 3;
836 code[0] |= (i->subOp & 0x200) >> 2;
837 code[1] |= (i->subOp & 0xc00) << 13;
838 }
839
840 if (i->flagsDef >= 0)
841 code[1] |= 1 << 16;
842 }
843
844 void
emitISAD(const Instruction * i)845 CodeEmitterNVC0::emitISAD(const Instruction *i)
846 {
847 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
848 assert(i->encSize == 8);
849
850 emitForm_A(i, HEX64(38000000, 00000003));
851
852 if (i->dType == TYPE_S32)
853 code[0] |= 1 << 5;
854 }
855
856 void
emitNOT(Instruction * i)857 CodeEmitterNVC0::emitNOT(Instruction *i)
858 {
859 assert(i->encSize == 8);
860 if (i->getPredicate())
861 i->moveSources(1, 1);
862 i->setSrc(1, i->src(0));
863 emitForm_A(i, HEX64(68000000, 000001c3));
864 }
865
866 void
emitLogicOp(const Instruction * i,uint8_t subOp)867 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
868 {
869 if (i->def(0).getFile() == FILE_PREDICATE) {
870 code[0] = 0x00000004 | (subOp << 30);
871 code[1] = 0x0c000000;
872
873 emitPredicate(i);
874
875 defId(i->def(0), 17);
876 srcId(i->src(0), 20);
877 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
878 srcId(i->src(1), 26);
879 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
880
881 if (i->defExists(1)) {
882 defId(i->def(1), 14);
883 } else {
884 code[0] |= 7 << 14;
885 }
886 // (a OP b) OP c
887 if (i->predSrc != 2 && i->srcExists(2)) {
888 code[1] |= subOp << 21;
889 srcId(i->src(2), 49);
890 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
891 } else {
892 code[1] |= 0x000e0000;
893 }
894 } else
895 if (i->encSize == 8) {
896 if (isLIMM(i->src(1), TYPE_U32)) {
897 emitForm_A(i, HEX64(38000000, 00000002));
898
899 if (i->flagsDef >= 0)
900 code[1] |= 1 << 26;
901 } else {
902 emitForm_A(i, HEX64(68000000, 00000003));
903
904 if (i->flagsDef >= 0)
905 code[1] |= 1 << 16;
906 }
907 code[0] |= subOp << 6;
908
909 if (i->flagsSrc >= 0) // carry
910 code[0] |= 1 << 5;
911
912 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
913 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
914 } else {
915 emitForm_S(i, (subOp << 5) |
916 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
917 }
918 }
919
920 void
emitPOPC(const Instruction * i)921 CodeEmitterNVC0::emitPOPC(const Instruction *i)
922 {
923 emitForm_A(i, HEX64(54000000, 00000004));
924
925 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
926 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
927 }
928
929 void
emitINSBF(const Instruction * i)930 CodeEmitterNVC0::emitINSBF(const Instruction *i)
931 {
932 emitForm_A(i, HEX64(28000000, 00000003));
933 }
934
935 void
emitEXTBF(const Instruction * i)936 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
937 {
938 emitForm_A(i, HEX64(70000000, 00000003));
939
940 if (i->dType == TYPE_S32)
941 code[0] |= 1 << 5;
942 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
943 code[0] |= 1 << 8;
944 }
945
946 void
emitBFIND(const Instruction * i)947 CodeEmitterNVC0::emitBFIND(const Instruction *i)
948 {
949 emitForm_B(i, HEX64(78000000, 00000003));
950
951 if (i->dType == TYPE_S32)
952 code[0] |= 1 << 5;
953 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
954 code[0] |= 1 << 8;
955 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
956 code[0] |= 1 << 6;
957 }
958
959 void
emitPERMT(const Instruction * i)960 CodeEmitterNVC0::emitPERMT(const Instruction *i)
961 {
962 emitForm_A(i, HEX64(24000000, 00000004));
963
964 code[0] |= i->subOp << 5;
965 }
966
967 void
emitShift(const Instruction * i)968 CodeEmitterNVC0::emitShift(const Instruction *i)
969 {
970 if (i->op == OP_SHR) {
971 emitForm_A(i, HEX64(58000000, 00000003)
972 | (isSignedType(i->dType) ? 0x20 : 0x00));
973 } else {
974 emitForm_A(i, HEX64(60000000, 00000003));
975 }
976
977 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
978 code[0] |= 1 << 9;
979 }
980
981 void
emitPreOp(const Instruction * i)982 CodeEmitterNVC0::emitPreOp(const Instruction *i)
983 {
984 if (i->encSize == 8) {
985 emitForm_B(i, HEX64(60000000, 00000000));
986
987 if (i->op == OP_PREEX2)
988 code[0] |= 0x20;
989
990 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
991 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
992 } else {
993 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
994 }
995 }
996
997 void
emitSFnOp(const Instruction * i,uint8_t subOp)998 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
999 {
1000 if (i->encSize == 8) {
1001 code[0] = 0x00000000 | (subOp << 26);
1002 code[1] = 0xc8000000;
1003
1004 emitPredicate(i);
1005
1006 defId(i->def(0), 14);
1007 srcId(i->src(0), 20);
1008
1009 assert(i->src(0).getFile() == FILE_GPR);
1010
1011 if (i->saturate) code[0] |= 1 << 5;
1012
1013 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
1014 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
1015 } else {
1016 emitForm_S(i, 0x80000008 | (subOp << 26), true);
1017
1018 assert(!i->src(0).mod.neg());
1019 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
1020 }
1021 }
1022
1023 void
emitMINMAX(const Instruction * i)1024 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
1025 {
1026 uint64_t op;
1027
1028 assert(i->encSize == 8);
1029
1030 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
1031
1032 if (i->ftz)
1033 op |= 1 << 5;
1034 else
1035 if (!isFloatType(i->dType)) {
1036 op |= isSignedType(i->dType) ? 0x23 : 0x03;
1037 op |= i->subOp << 6;
1038 }
1039 if (i->dType == TYPE_F64)
1040 op |= 0x01;
1041
1042 emitForm_A(i, op);
1043 emitNegAbs12(i);
1044
1045 if (i->flagsDef >= 0)
1046 code[1] |= 1 << 16;
1047 }
1048
1049 void
roundMode_C(const Instruction * i)1050 CodeEmitterNVC0::roundMode_C(const Instruction *i)
1051 {
1052 switch (i->rnd) {
1053 case ROUND_M: code[1] |= 1 << 17; break;
1054 case ROUND_P: code[1] |= 2 << 17; break;
1055 case ROUND_Z: code[1] |= 3 << 17; break;
1056 case ROUND_NI: code[0] |= 1 << 7; break;
1057 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
1058 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
1059 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
1060 case ROUND_N: break;
1061 default:
1062 assert(!"invalid round mode");
1063 break;
1064 }
1065 }
1066
1067 void
roundMode_CS(const Instruction * i)1068 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
1069 {
1070 switch (i->rnd) {
1071 case ROUND_M:
1072 case ROUND_MI: code[0] |= 1 << 16; break;
1073 case ROUND_P:
1074 case ROUND_PI: code[0] |= 2 << 16; break;
1075 case ROUND_Z:
1076 case ROUND_ZI: code[0] |= 3 << 16; break;
1077 default:
1078 break;
1079 }
1080 }
1081
1082 void
emitCVT(Instruction * i)1083 CodeEmitterNVC0::emitCVT(Instruction *i)
1084 {
1085 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1086 DataType dType;
1087
1088 switch (i->op) {
1089 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
1090 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1091 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1092 default:
1093 break;
1094 }
1095
1096 const bool sat = (i->op == OP_SAT) || i->saturate;
1097 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1098 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1099
1100 if (i->op == OP_NEG && i->dType == TYPE_U32)
1101 dType = TYPE_S32;
1102 else
1103 dType = i->dType;
1104
1105 if (i->encSize == 8) {
1106 emitForm_B(i, HEX64(10000000, 00000004));
1107
1108 roundMode_C(i);
1109
1110 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1111 code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1112 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1113
1114 // for 8/16 source types, the byte/word is in subOp. word 1 is
1115 // represented as 2.
1116 if (!isFloatType(i->sType))
1117 code[1] |= i->subOp << 0x17;
1118 else
1119 code[1] |= i->subOp << 0x18;
1120
1121 if (sat)
1122 code[0] |= 0x20;
1123 if (abs)
1124 code[0] |= 1 << 6;
1125 if (neg && i->op != OP_ABS)
1126 code[0] |= 1 << 8;
1127
1128 if (i->ftz)
1129 code[1] |= 1 << 23;
1130
1131 if (isSignedIntType(dType))
1132 code[0] |= 0x080;
1133 if (isSignedIntType(i->sType))
1134 code[0] |= 0x200;
1135
1136 if (isFloatType(dType)) {
1137 if (!isFloatType(i->sType))
1138 code[1] |= 0x08000000;
1139 } else {
1140 if (isFloatType(i->sType))
1141 code[1] |= 0x04000000;
1142 else
1143 code[1] |= 0x0c000000;
1144 }
1145 } else {
1146 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1147 code[0] = 0x298;
1148 } else
1149 if (isFloatType(dType)) {
1150 if (isFloatType(i->sType))
1151 code[0] = 0x098;
1152 else
1153 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1154 } else {
1155 assert(isFloatType(i->sType));
1156
1157 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1158 }
1159
1160 if (neg) code[0] |= 1 << 16;
1161 if (sat) code[0] |= 1 << 18;
1162 if (abs) code[0] |= 1 << 19;
1163
1164 roundMode_CS(i);
1165 }
1166 }
1167
1168 void
emitSET(const CmpInstruction * i)1169 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1170 {
1171 uint32_t hi;
1172 uint32_t lo = 0;
1173
1174 if (i->sType == TYPE_F64)
1175 lo = 0x1;
1176 else
1177 if (!isFloatType(i->sType))
1178 lo = 0x3;
1179
1180 if (isSignedIntType(i->sType))
1181 lo |= 0x20;
1182 if (isFloatType(i->dType)) {
1183 if (isFloatType(i->sType))
1184 lo |= 0x20;
1185 else
1186 lo |= 0x80;
1187 }
1188
1189 switch (i->op) {
1190 case OP_SET_AND: hi = 0x10000000; break;
1191 case OP_SET_OR: hi = 0x10200000; break;
1192 case OP_SET_XOR: hi = 0x10400000; break;
1193 default:
1194 hi = 0x100e0000;
1195 break;
1196 }
1197 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1198
1199 if (i->op != OP_SET)
1200 srcId(i->src(2), 32 + 17);
1201
1202 if (i->def(0).getFile() == FILE_PREDICATE) {
1203 if (i->sType == TYPE_F32)
1204 code[1] += 0x10000000;
1205 else
1206 code[1] += 0x08000000;
1207
1208 code[0] &= ~0xfc000;
1209 defId(i->def(0), 17);
1210 if (i->defExists(1))
1211 defId(i->def(1), 14);
1212 else
1213 code[0] |= 0x1c000;
1214 }
1215
1216 if (i->ftz)
1217 code[1] |= 1 << 27;
1218 if (i->flagsSrc >= 0)
1219 code[0] |= 1 << 6;
1220
1221 emitCondCode(i->setCond, 32 + 23);
1222 emitNegAbs12(i);
1223 }
1224
1225 void
emitSLCT(const CmpInstruction * i)1226 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1227 {
1228 uint64_t op;
1229
1230 switch (i->dType) {
1231 case TYPE_S32:
1232 op = HEX64(30000000, 00000023);
1233 break;
1234 case TYPE_U32:
1235 op = HEX64(30000000, 00000003);
1236 break;
1237 case TYPE_F32:
1238 op = HEX64(38000000, 00000000);
1239 break;
1240 default:
1241 assert(!"invalid type for SLCT");
1242 op = 0;
1243 break;
1244 }
1245 emitForm_A(i, op);
1246
1247 CondCode cc = i->setCond;
1248
1249 if (i->src(2).mod.neg())
1250 cc = reverseCondCode(cc);
1251
1252 emitCondCode(cc, 32 + 23);
1253
1254 if (i->ftz)
1255 code[0] |= 1 << 5;
1256 }
1257
1258 void
nvc0_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1259 nvc0_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1260 {
1261 int loc = entry->loc;
1262 if (data.force_persample_interp)
1263 code[loc + 1] |= 1 << 20;
1264 else
1265 code[loc + 1] &= ~(1 << 20);
1266 }
1267
emitSELP(const Instruction * i)1268 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1269 {
1270 emitForm_A(i, HEX64(20000000, 00000004));
1271
1272 if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1273 code[1] |= 1 << 20;
1274
1275 if (i->subOp == 1) {
1276 addInterp(0, 0, nvc0_selpFlip);
1277 }
1278 }
1279
emitTEXBAR(const Instruction * i)1280 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1281 {
1282 code[0] = 0x00000006 | (i->subOp << 26);
1283 code[1] = 0xf0000000;
1284 emitPredicate(i);
1285 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1286 }
1287
emitTEXCSAA(const TexInstruction * i)1288 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1289 {
1290 code[0] = 0x00000086;
1291 code[1] = 0xd0000000;
1292
1293 code[1] |= i->tex.r;
1294 code[1] |= i->tex.s << 8;
1295
1296 if (i->tex.liveOnly)
1297 code[0] |= 1 << 9;
1298
1299 defId(i->def(0), 14);
1300 srcId(i->src(0), 20);
1301 }
1302
1303 static inline bool
isNextIndependentTex(const TexInstruction * i)1304 isNextIndependentTex(const TexInstruction *i)
1305 {
1306 if (!i->next || !isTextureOp(i->next->op))
1307 return false;
1308 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1309 return false;
1310 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1311 }
1312
1313 void
emitTEX(const TexInstruction * i)1314 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1315 {
1316 code[0] = 0x00000006;
1317
1318 if (isNextIndependentTex(i))
1319 code[0] |= 0x080; // t mode
1320 else
1321 code[0] |= 0x100; // p mode
1322
1323 if (i->tex.liveOnly)
1324 code[0] |= 1 << 9;
1325
1326 switch (i->op) {
1327 case OP_TEX: code[1] = 0x80000000; break;
1328 case OP_TXB: code[1] = 0x84000000; break;
1329 case OP_TXL: code[1] = 0x86000000; break;
1330 case OP_TXF: code[1] = 0x90000000; break;
1331 case OP_TXG: code[1] = 0xa0000000; break;
1332 case OP_TXLQ: code[1] = 0xb0000000; break;
1333 case OP_TXD: code[1] = 0xe0000000; break;
1334 default:
1335 assert(!"invalid texture op");
1336 break;
1337 }
1338 if (i->op == OP_TXF) {
1339 if (!i->tex.levelZero)
1340 code[1] |= 0x02000000;
1341 } else
1342 if (i->tex.levelZero) {
1343 code[1] |= 0x02000000;
1344 }
1345
1346 if (i->op != OP_TXD && i->tex.derivAll)
1347 code[1] |= 1 << 13;
1348
1349 defId(i->def(0), 14);
1350 srcId(i->src(0), 20);
1351
1352 emitPredicate(i);
1353
1354 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1355
1356 code[1] |= i->tex.mask << 14;
1357
1358 code[1] |= i->tex.r;
1359 code[1] |= i->tex.s << 8;
1360 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1361 code[1] |= 1 << 18; // in 1st source (with array index)
1362
1363 // texture target:
1364 code[1] |= (i->tex.target.getDim() - 1) << 20;
1365 if (i->tex.target.isCube())
1366 code[1] += 2 << 20;
1367 if (i->tex.target.isArray())
1368 code[1] |= 1 << 19;
1369 if (i->tex.target.isShadow())
1370 code[1] |= 1 << 24;
1371
1372 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1373
1374 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1375 // lzero
1376 if (i->op == OP_TXL)
1377 code[1] &= ~(1 << 26);
1378 else
1379 if (i->op == OP_TXF)
1380 code[1] &= ~(1 << 25);
1381 }
1382 if (i->tex.target == TEX_TARGET_2D_MS ||
1383 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1384 code[1] |= 1 << 23;
1385
1386 if (i->tex.useOffsets == 1)
1387 code[1] |= 1 << 22;
1388 if (i->tex.useOffsets == 4)
1389 code[1] |= 1 << 23;
1390
1391 srcId(i, src1, 26);
1392 }
1393
1394 void
emitTXQ(const TexInstruction * i)1395 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1396 {
1397 code[0] = 0x00000086;
1398 code[1] = 0xc0000000;
1399
1400 switch (i->tex.query) {
1401 case TXQ_DIMS: code[1] |= 0 << 22; break;
1402 case TXQ_TYPE: code[1] |= 1 << 22; break;
1403 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1404 case TXQ_FILTER: code[1] |= 3 << 22; break;
1405 case TXQ_LOD: code[1] |= 4 << 22; break;
1406 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1407 default:
1408 assert(!"invalid texture query");
1409 break;
1410 }
1411
1412 code[1] |= i->tex.mask << 14;
1413
1414 code[1] |= i->tex.r;
1415 code[1] |= i->tex.s << 8;
1416 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1417 code[1] |= 1 << 18;
1418
1419 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1420
1421 defId(i->def(0), 14);
1422 srcId(i->src(0), 20);
1423 srcId(i, src1, 26);
1424
1425 emitPredicate(i);
1426 }
1427
1428 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1429 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1430 {
1431 code[0] = 0x00000200 | (laneMask << 6); // dall
1432 code[1] = 0x48000000 | qOp;
1433
1434 defId(i->def(0), 14);
1435 srcId(i->src(0), 20);
1436 srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
1437
1438 emitPredicate(i);
1439 }
1440
1441 void
emitFlow(const Instruction * i)1442 CodeEmitterNVC0::emitFlow(const Instruction *i)
1443 {
1444 const FlowInstruction *f = i->asFlow();
1445
1446 unsigned mask; // bit 0: predicate, bit 1: target
1447
1448 code[0] = 0x00000007;
1449
1450 switch (i->op) {
1451 case OP_BRA:
1452 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1453 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1454 code[0] |= 0x4000;
1455 mask = 3;
1456 break;
1457 case OP_CALL:
1458 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1459 if (f->indirect)
1460 code[0] |= 0x4000; // indirect calls always use c[] source
1461 mask = 2;
1462 break;
1463
1464 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1465 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1466 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1467 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1468 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1469
1470 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1471 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1472 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1473 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1474
1475 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1476 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1477 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1478 default:
1479 assert(!"invalid flow operation");
1480 return;
1481 }
1482
1483 if (mask & 1) {
1484 emitPredicate(i);
1485 if (i->flagsSrc < 0)
1486 code[0] |= 0x1e0;
1487 }
1488
1489 if (!f)
1490 return;
1491
1492 if (f->allWarp)
1493 code[0] |= 1 << 15;
1494 if (f->limit)
1495 code[0] |= 1 << 16;
1496
1497 if (f->indirect) {
1498 if (code[0] & 0x4000) {
1499 assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1500 setAddress16(i->src(0));
1501 code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1502 if (f->op == OP_BRA)
1503 srcId(f->src(0).getIndirect(0), 20);
1504 } else {
1505 srcId(f, 0, 20);
1506 }
1507 }
1508
1509 if (f->op == OP_CALL) {
1510 if (f->indirect) {
1511 // nothing
1512 } else
1513 if (f->builtin) {
1514 assert(f->absolute);
1515 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1516 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1517 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1518 } else {
1519 assert(!f->absolute);
1520 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1521 code[0] |= (pcRel & 0x3f) << 26;
1522 code[1] |= (pcRel >> 6) & 0x3ffff;
1523 }
1524 } else
1525 if (mask & 2) {
1526 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1527 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1528 pcRel += 8;
1529 // currently we don't want absolute branches
1530 assert(!f->absolute);
1531 code[0] |= (pcRel & 0x3f) << 26;
1532 code[1] |= (pcRel >> 6) & 0x3ffff;
1533 }
1534 }
1535
1536 void
emitBAR(const Instruction * i)1537 CodeEmitterNVC0::emitBAR(const Instruction *i)
1538 {
1539 Value *rDef = NULL, *pDef = NULL;
1540
1541 switch (i->subOp) {
1542 case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;
1543 case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;
1544 case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;
1545 case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1546 default:
1547 code[0] = 0x04;
1548 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1549 break;
1550 }
1551 code[1] = 0x50000000;
1552
1553 code[0] |= 63 << 14;
1554 code[1] |= 7 << 21;
1555
1556 emitPredicate(i);
1557
1558 // barrier id
1559 if (i->src(0).getFile() == FILE_GPR) {
1560 srcId(i->src(0), 20);
1561 } else {
1562 ImmediateValue *imm = i->getSrc(0)->asImm();
1563 assert(imm);
1564 code[0] |= imm->reg.data.u32 << 20;
1565 code[1] |= 0x8000;
1566 }
1567
1568 // thread count
1569 if (i->src(1).getFile() == FILE_GPR) {
1570 srcId(i->src(1), 26);
1571 } else {
1572 ImmediateValue *imm = i->getSrc(1)->asImm();
1573 assert(imm);
1574 assert(imm->reg.data.u32 <= 0xfff);
1575 code[0] |= imm->reg.data.u32 << 26;
1576 code[1] |= imm->reg.data.u32 >> 6;
1577 code[1] |= 0x4000;
1578 }
1579
1580 if (i->srcExists(2) && (i->predSrc != 2)) {
1581 srcId(i->src(2), 32 + 17);
1582 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1583 code[1] |= 1 << 20;
1584 } else {
1585 code[1] |= 7 << 17;
1586 }
1587
1588 if (i->defExists(0)) {
1589 if (i->def(0).getFile() == FILE_GPR)
1590 rDef = i->getDef(0);
1591 else
1592 pDef = i->getDef(0);
1593
1594 if (i->defExists(1)) {
1595 if (i->def(1).getFile() == FILE_GPR)
1596 rDef = i->getDef(1);
1597 else
1598 pDef = i->getDef(1);
1599 }
1600 }
1601 if (rDef) {
1602 code[0] &= ~(63 << 14);
1603 defId(rDef, 14);
1604 }
1605 if (pDef) {
1606 code[1] &= ~(7 << 21);
1607 defId(pDef, 32 + 21);
1608 }
1609 }
1610
1611 void
emitAFETCH(const Instruction * i)1612 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1613 {
1614 code[0] = 0x00000006;
1615 code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1616
1617 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1618 code[0] |= 0x200;
1619
1620 emitPredicate(i);
1621
1622 defId(i->def(0), 14);
1623 srcId(i->src(0).getIndirect(0), 20);
1624 }
1625
1626 void
emitPFETCH(const Instruction * i)1627 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1628 {
1629 uint32_t prim = i->src(0).get()->reg.data.u32;
1630
1631 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1632 code[1] = 0x00000000 | (prim >> 6);
1633
1634 emitPredicate(i);
1635
1636 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1637
1638 defId(i->def(0), 14);
1639 srcId(i, src1, 20);
1640 }
1641
1642 void
emitVFETCH(const Instruction * i)1643 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1644 {
1645 code[0] = 0x00000006;
1646 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1647
1648 if (i->perPatch)
1649 code[0] |= 0x100;
1650 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1651 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1652
1653 emitPredicate(i);
1654
1655 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1656
1657 defId(i->def(0), 14);
1658 srcId(i->src(0).getIndirect(0), 20);
1659 srcId(i->src(0).getIndirect(1), 26); // vertex address
1660 }
1661
1662 void
emitEXPORT(const Instruction * i)1663 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1664 {
1665 unsigned int size = typeSizeof(i->dType);
1666
1667 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1668 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1669
1670 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1671
1672 if (i->perPatch)
1673 code[0] |= 0x100;
1674
1675 emitPredicate(i);
1676
1677 assert(i->src(1).getFile() == FILE_GPR);
1678
1679 srcId(i->src(0).getIndirect(0), 20);
1680 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1681 srcId(i->src(1), 26);
1682 }
1683
1684 void
emitOUT(const Instruction * i)1685 CodeEmitterNVC0::emitOUT(const Instruction *i)
1686 {
1687 code[0] = 0x00000006;
1688 code[1] = 0x1c000000;
1689
1690 emitPredicate(i);
1691
1692 defId(i->def(0), 14); // new secret address
1693 srcId(i->src(0), 20); // old secret address, should be 0 initially
1694
1695 assert(i->src(0).getFile() == FILE_GPR);
1696
1697 if (i->op == OP_EMIT)
1698 code[0] |= 1 << 5;
1699 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1700 code[0] |= 1 << 6;
1701
1702 // vertex stream
1703 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1704 unsigned int stream = SDATA(i->src(1)).u32;
1705 assert(stream < 4);
1706 if (stream) {
1707 code[1] |= 0xc000;
1708 code[0] |= stream << 26;
1709 } else {
1710 srcId(NULL, 26);
1711 }
1712 } else {
1713 srcId(i->src(1), 26);
1714 }
1715 }
1716
1717 void
emitInterpMode(const Instruction * i)1718 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1719 {
1720 if (i->encSize == 8) {
1721 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1722 } else {
1723 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1724 code[0] |= 0x80;
1725 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1726 }
1727 }
1728
1729 void
nvc0_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)1730 nvc0_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1731 {
1732 int ipa = entry->ipa;
1733 int reg = entry->reg;
1734 int loc = entry->loc;
1735
1736 if (data.flatshade &&
1737 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1738 ipa = NV50_IR_INTERP_FLAT;
1739 reg = 0x3f;
1740 } else if (data.force_persample_interp &&
1741 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1742 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1743 ipa |= NV50_IR_INTERP_CENTROID;
1744 }
1745 code[loc + 0] &= ~(0xf << 6);
1746 code[loc + 0] |= ipa << 6;
1747 code[loc + 0] &= ~(0x3f << 26);
1748 code[loc + 0] |= reg << 26;
1749 }
1750
1751 void
emitINTERP(const Instruction * i)1752 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1753 {
1754 const uint32_t base = i->getSrc(0)->reg.data.offset;
1755
1756 if (i->encSize == 8) {
1757 code[0] = 0x00000000;
1758 code[1] = 0xc0000000 | (base & 0xffff);
1759
1760 if (i->saturate)
1761 code[0] |= 1 << 5;
1762
1763 if (i->op == OP_PINTERP) {
1764 srcId(i->src(1), 26);
1765 addInterp(i->ipa, SDATA(i->src(1)).id, nvc0_interpApply);
1766 } else {
1767 code[0] |= 0x3f << 26;
1768 addInterp(i->ipa, 0x3f, nvc0_interpApply);
1769 }
1770
1771 srcId(i->src(0).getIndirect(0), 20);
1772 } else {
1773 assert(i->op == OP_PINTERP);
1774 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1775 srcId(i->src(1), 20);
1776 }
1777 emitInterpMode(i);
1778
1779 emitPredicate(i);
1780 defId(i->def(0), 14);
1781
1782 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1783 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1784 else
1785 code[1] |= 0x3f << 17;
1786 }
1787
1788 void
emitLoadStoreType(DataType ty)1789 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1790 {
1791 uint8_t val;
1792
1793 switch (ty) {
1794 case TYPE_U8:
1795 val = 0x00;
1796 break;
1797 case TYPE_S8:
1798 val = 0x20;
1799 break;
1800 case TYPE_F16:
1801 case TYPE_U16:
1802 val = 0x40;
1803 break;
1804 case TYPE_S16:
1805 val = 0x60;
1806 break;
1807 case TYPE_F32:
1808 case TYPE_U32:
1809 case TYPE_S32:
1810 val = 0x80;
1811 break;
1812 case TYPE_F64:
1813 case TYPE_U64:
1814 case TYPE_S64:
1815 val = 0xa0;
1816 break;
1817 case TYPE_B128:
1818 val = 0xc0;
1819 break;
1820 default:
1821 val = 0x80;
1822 assert(!"invalid type");
1823 break;
1824 }
1825 code[0] |= val;
1826 }
1827
1828 void
emitCachingMode(CacheMode c)1829 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1830 {
1831 uint32_t val;
1832
1833 switch (c) {
1834 case CACHE_CA:
1835 // case CACHE_WB:
1836 val = 0x000;
1837 break;
1838 case CACHE_CG:
1839 val = 0x100;
1840 break;
1841 case CACHE_CS:
1842 val = 0x200;
1843 break;
1844 case CACHE_CV:
1845 // case CACHE_WT:
1846 val = 0x300;
1847 break;
1848 default:
1849 val = 0;
1850 assert(!"invalid caching mode");
1851 break;
1852 }
1853 code[0] |= val;
1854 }
1855
1856 static inline bool
uses64bitAddress(const Instruction * ldst)1857 uses64bitAddress(const Instruction *ldst)
1858 {
1859 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1860 ldst->src(0).isIndirect(0) &&
1861 ldst->getIndirect(0, 0)->reg.size == 8;
1862 }
1863
1864 void
emitSTORE(const Instruction * i)1865 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1866 {
1867 uint32_t opc;
1868
1869 switch (i->src(0).getFile()) {
1870 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1871 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1872 case FILE_MEMORY_SHARED:
1873 if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1874 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1875 opc = 0xb8000000;
1876 else
1877 opc = 0xcc000000;
1878 } else {
1879 opc = 0xc9000000;
1880 }
1881 break;
1882 default:
1883 assert(!"invalid memory file");
1884 opc = 0;
1885 break;
1886 }
1887 code[0] = 0x00000005;
1888 code[1] = opc;
1889
1890 if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
1891 // Unlocked store on shared memory can fail.
1892 if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
1893 i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1894 assert(i->defExists(0));
1895 setPDSTL(i, 0);
1896 }
1897 }
1898
1899 setAddressByFile(i->src(0));
1900 srcId(i->src(1), 14);
1901 srcId(i->src(0).getIndirect(0), 20);
1902 if (uses64bitAddress(i))
1903 code[1] |= 1 << 26;
1904
1905 emitPredicate(i);
1906
1907 emitLoadStoreType(i->dType);
1908 emitCachingMode(i->cache);
1909 }
1910
1911 void
emitLOAD(const Instruction * i)1912 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1913 {
1914 uint32_t opc;
1915
1916 code[0] = 0x00000005;
1917
1918 switch (i->src(0).getFile()) {
1919 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1920 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1921 case FILE_MEMORY_SHARED:
1922 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1923 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1924 opc = 0xa8000000;
1925 else
1926 opc = 0xc4000000;
1927 } else {
1928 opc = 0xc1000000;
1929 }
1930 break;
1931 case FILE_MEMORY_CONST:
1932 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1933 emitMOV(i); // not sure if this is any better
1934 return;
1935 }
1936 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1937 code[0] = 0x00000006 | (i->subOp << 8);
1938 break;
1939 default:
1940 assert(!"invalid memory file");
1941 opc = 0;
1942 break;
1943 }
1944 code[1] = opc;
1945
1946 int r = 0, p = -1;
1947 if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
1948 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1949 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1950 r = -1;
1951 p = 0;
1952 } else if (i->defExists(1)) { // r, p
1953 p = 1;
1954 } else {
1955 assert(!"Expected predicate dest for load locked");
1956 }
1957 }
1958 }
1959
1960 if (r >= 0)
1961 defId(i->def(r), 14);
1962 else
1963 code[0] |= 63 << 14;
1964
1965 if (p >= 0) {
1966 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1967 setPDSTL(i, p);
1968 else
1969 defId(i->def(p), 32 + 18);
1970 }
1971
1972 setAddressByFile(i->src(0));
1973 srcId(i->src(0).getIndirect(0), 20);
1974 if (uses64bitAddress(i))
1975 code[1] |= 1 << 26;
1976
1977 emitPredicate(i);
1978
1979 emitLoadStoreType(i->dType);
1980 emitCachingMode(i->cache);
1981 }
1982
1983 uint8_t
getSRegEncoding(const ValueRef & ref)1984 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1985 {
1986 switch (SDATA(ref).sv.sv) {
1987 case SV_LANEID: return 0x00;
1988 case SV_PHYSID: return 0x03;
1989 case SV_VERTEX_COUNT: return 0x10;
1990 case SV_INVOCATION_ID: return 0x11;
1991 case SV_YDIR: return 0x12;
1992 case SV_THREAD_KILL: return 0x13;
1993 case SV_COMBINED_TID: return 0x20;
1994 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1995 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1996 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1997 case SV_GRIDID: return 0x2c;
1998 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1999 case SV_LBASE: return 0x34;
2000 case SV_SBASE: return 0x30;
2001 case SV_LANEMASK_EQ: return 0x38;
2002 case SV_LANEMASK_LT: return 0x39;
2003 case SV_LANEMASK_LE: return 0x3a;
2004 case SV_LANEMASK_GT: return 0x3b;
2005 case SV_LANEMASK_GE: return 0x3c;
2006 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
2007 default:
2008 assert(!"no sreg for system value");
2009 return 0;
2010 }
2011 }
2012
2013 void
emitMOV(const Instruction * i)2014 CodeEmitterNVC0::emitMOV(const Instruction *i)
2015 {
2016 assert(!i->saturate);
2017 if (i->def(0).getFile() == FILE_PREDICATE) {
2018 if (i->src(0).getFile() == FILE_GPR) {
2019 code[0] = 0xfc01c003;
2020 code[1] = 0x1a8e0000;
2021 srcId(i->src(0), 20);
2022 } else {
2023 code[0] = 0x0001c004;
2024 code[1] = 0x0c0e0000;
2025 if (i->src(0).getFile() == FILE_IMMEDIATE) {
2026 code[0] |= 7 << 20;
2027 if (!i->getSrc(0)->reg.data.u32)
2028 code[0] |= 1 << 23;
2029 } else {
2030 srcId(i->src(0), 20);
2031 }
2032 }
2033 defId(i->def(0), 17);
2034 emitPredicate(i);
2035 } else
2036 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2037 uint8_t sr = getSRegEncoding(i->src(0));
2038
2039 if (i->encSize == 8) {
2040 code[0] = 0x00000004 | (sr << 26);
2041 code[1] = 0x2c000000;
2042 } else {
2043 code[0] = 0x40000008 | (sr << 20);
2044 }
2045 defId(i->def(0), 14);
2046
2047 emitPredicate(i);
2048 } else
2049 if (i->encSize == 8) {
2050 uint64_t opc;
2051
2052 if (i->src(0).getFile() == FILE_IMMEDIATE)
2053 opc = HEX64(18000000, 000001e2);
2054 else
2055 if (i->src(0).getFile() == FILE_PREDICATE)
2056 opc = HEX64(080e0000, 1c000004);
2057 else
2058 opc = HEX64(28000000, 00000004);
2059
2060 if (i->src(0).getFile() != FILE_PREDICATE)
2061 opc |= i->lanes << 5;
2062
2063 emitForm_B(i, opc);
2064
2065 // Explicitly emit the predicate source as emitForm_B skips it.
2066 if (i->src(0).getFile() == FILE_PREDICATE)
2067 srcId(i->src(0), 20);
2068 } else {
2069 uint32_t imm;
2070
2071 if (i->src(0).getFile() == FILE_IMMEDIATE) {
2072 imm = SDATA(i->src(0)).u32;
2073 if (imm & 0xfff00000) {
2074 assert(!(imm & 0x000fffff));
2075 code[0] = 0x00000318 | imm;
2076 } else {
2077 assert(imm < 0x800 && ((int32_t)imm >= -0x800));
2078 code[0] = 0x00000118 | (imm << 20);
2079 }
2080 } else {
2081 code[0] = 0x0028;
2082 emitShortSrc2(i->src(0));
2083 }
2084 defId(i->def(0), 14);
2085
2086 emitPredicate(i);
2087 }
2088 }
2089
2090 void
emitATOM(const Instruction * i)2091 CodeEmitterNVC0::emitATOM(const Instruction *i)
2092 {
2093 const bool hasDst = i->defExists(0);
2094 const bool casOrExch =
2095 i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
2096 i->subOp == NV50_IR_SUBOP_ATOM_CAS;
2097
2098 if (i->dType == TYPE_U64) {
2099 switch (i->subOp) {
2100 case NV50_IR_SUBOP_ATOM_ADD:
2101 code[0] = 0x205;
2102 if (hasDst)
2103 code[1] = 0x507e0000;
2104 else
2105 code[1] = 0x10000000;
2106 break;
2107 case NV50_IR_SUBOP_ATOM_EXCH:
2108 code[0] = 0x305;
2109 code[1] = 0x507e0000;
2110 break;
2111 case NV50_IR_SUBOP_ATOM_CAS:
2112 code[0] = 0x325;
2113 code[1] = 0x50000000;
2114 break;
2115 default:
2116 assert(!"invalid u64 red op");
2117 break;
2118 }
2119 } else
2120 if (i->dType == TYPE_U32) {
2121 switch (i->subOp) {
2122 case NV50_IR_SUBOP_ATOM_EXCH:
2123 code[0] = 0x105;
2124 code[1] = 0x507e0000;
2125 break;
2126 case NV50_IR_SUBOP_ATOM_CAS:
2127 code[0] = 0x125;
2128 code[1] = 0x50000000;
2129 break;
2130 default:
2131 code[0] = 0x5 | (i->subOp << 5);
2132 if (hasDst)
2133 code[1] = 0x507e0000;
2134 else
2135 code[1] = 0x10000000;
2136 break;
2137 }
2138 } else
2139 if (i->dType == TYPE_S32) {
2140 assert(i->subOp <= 2);
2141 code[0] = 0x205 | (i->subOp << 5);
2142 if (hasDst)
2143 code[1] = 0x587e0000;
2144 else
2145 code[1] = 0x18000000;
2146 } else
2147 if (i->dType == TYPE_F32) {
2148 assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
2149 code[0] = 0x205;
2150 if (hasDst)
2151 code[1] = 0x687e0000;
2152 else
2153 code[1] = 0x28000000;
2154 }
2155
2156 emitPredicate(i);
2157
2158 srcId(i->src(1), 14);
2159
2160 if (hasDst)
2161 defId(i->def(0), 32 + 11);
2162 else
2163 if (casOrExch)
2164 code[1] |= 63 << 11;
2165
2166 if (hasDst || casOrExch) {
2167 const int32_t offset = SDATA(i->src(0)).offset;
2168 assert(offset < 0x80000 && offset >= -0x80000);
2169 code[0] |= offset << 26;
2170 code[1] |= (offset & 0x1ffc0) >> 6;
2171 code[1] |= (offset & 0xe0000) << 6;
2172 } else {
2173 srcAddr32(i->src(0), 26, 0);
2174 }
2175 if (i->getIndirect(0, 0)) {
2176 srcId(i->getIndirect(0, 0), 20);
2177 if (i->getIndirect(0, 0)->reg.size == 8)
2178 code[1] |= 1 << 26;
2179 } else {
2180 code[0] |= 63 << 20;
2181 }
2182
2183 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2184 assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
2185 code[1] |= (SDATA(i->src(1)).id + 1) << 17;
2186 }
2187 }
2188
2189 void
emitMEMBAR(const Instruction * i)2190 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
2191 {
2192 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
2193 case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
2194 case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;
2195 default:
2196 code[0] = 0x45;
2197 assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
2198 break;
2199 }
2200 code[1] = 0xe0000000;
2201
2202 emitPredicate(i);
2203 }
2204
2205 void
emitCCTL(const Instruction * i)2206 CodeEmitterNVC0::emitCCTL(const Instruction *i)
2207 {
2208 code[0] = 0x00000005 | (i->subOp << 5);
2209
2210 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2211 code[1] = 0x98000000;
2212 srcAddr32(i->src(0), 28, 2);
2213 } else {
2214 code[1] = 0xd0000000;
2215 setAddress24(i->src(0));
2216 }
2217 if (uses64bitAddress(i))
2218 code[1] |= 1 << 26;
2219 srcId(i->src(0).getIndirect(0), 20);
2220
2221 emitPredicate(i);
2222
2223 defId(i, 0, 14);
2224 }
2225
2226 void
emitSUCLAMPMode(uint16_t subOp)2227 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2228 {
2229 uint8_t m;
2230 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2231 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2232 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2233 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2234 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2235 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2236 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2237 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2238 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2239 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2240 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2241 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2242 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2243 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2244 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2245 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2246 default:
2247 return;
2248 }
2249 code[0] |= m << 5;
2250 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2251 code[1] |= 1 << 16;
2252 }
2253
2254 void
emitSUCalc(Instruction * i)2255 CodeEmitterNVC0::emitSUCalc(Instruction *i)
2256 {
2257 ImmediateValue *imm = NULL;
2258 uint64_t opc;
2259
2260 if (i->srcExists(2)) {
2261 imm = i->getSrc(2)->asImm();
2262 if (imm)
2263 i->setSrc(2, NULL); // special case, make emitForm_A not assert
2264 }
2265
2266 switch (i->op) {
2267 case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2268 case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2269 case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2270 default:
2271 assert(0);
2272 return;
2273 }
2274 emitForm_A(i, opc);
2275
2276 if (i->op == OP_SUCLAMP) {
2277 if (i->dType == TYPE_S32)
2278 code[0] |= 1 << 9;
2279 emitSUCLAMPMode(i->subOp);
2280 }
2281
2282 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2283 code[1] |= 1 << 16;
2284
2285 if (i->op != OP_SUEAU) {
2286 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2287 code[0] |= 63 << 14;
2288 code[1] |= i->getDef(0)->reg.data.id << 23;
2289 } else
2290 if (i->defExists(1)) { // r, p
2291 assert(i->def(1).getFile() == FILE_PREDICATE);
2292 code[1] |= i->getDef(1)->reg.data.id << 23;
2293 } else { // r, #
2294 code[1] |= 7 << 23;
2295 }
2296 }
2297 if (imm) {
2298 assert(i->op == OP_SUCLAMP);
2299 i->setSrc(2, imm);
2300 code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2301 }
2302 }
2303
2304 void
emitSUGType(DataType ty)2305 CodeEmitterNVC0::emitSUGType(DataType ty)
2306 {
2307 switch (ty) {
2308 case TYPE_S32: code[1] |= 1 << 13; break;
2309 case TYPE_U8: code[1] |= 2 << 13; break;
2310 case TYPE_S8: code[1] |= 3 << 13; break;
2311 default:
2312 assert(ty == TYPE_U32);
2313 break;
2314 }
2315 }
2316
2317 void
setSUConst16(const Instruction * i,const int s)2318 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2319 {
2320 const uint32_t offset = i->getSrc(s)->reg.data.offset;
2321
2322 assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2323 assert(offset == (offset & 0xfffc));
2324
2325 code[1] |= 1 << 21;
2326 code[0] |= offset << 24;
2327 code[1] |= offset >> 8;
2328 code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2329 }
2330
2331 void
setSUPred(const Instruction * i,const int s)2332 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2333 {
2334 if (!i->srcExists(s) || (i->predSrc == s)) {
2335 code[1] |= 0x7 << 17;
2336 } else {
2337 if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2338 code[1] |= 1 << 20;
2339 srcId(i->src(s), 32 + 17);
2340 }
2341 }
2342
2343 void
emitSULDGB(const TexInstruction * i)2344 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2345 {
2346 code[0] = 0x5;
2347 code[1] = 0xd4000000 | (i->subOp << 15);
2348
2349 emitLoadStoreType(i->dType);
2350 emitSUGType(i->sType);
2351 emitCachingMode(i->cache);
2352
2353 emitPredicate(i);
2354 defId(i->def(0), 14); // destination
2355 srcId(i->src(0), 20); // address
2356 // format
2357 if (i->src(1).getFile() == FILE_GPR)
2358 srcId(i->src(1), 26);
2359 else
2360 setSUConst16(i, 1);
2361 setSUPred(i, 2);
2362 }
2363
2364 void
emitSUSTGx(const TexInstruction * i)2365 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2366 {
2367 code[0] = 0x5;
2368 code[1] = 0xdc000000 | (i->subOp << 15);
2369
2370 if (i->op == OP_SUSTP)
2371 code[1] |= i->tex.mask << 22;
2372 else
2373 emitLoadStoreType(i->dType);
2374 emitSUGType(i->sType);
2375 emitCachingMode(i->cache);
2376
2377 emitPredicate(i);
2378 srcId(i->src(0), 20); // address
2379 // format
2380 if (i->src(1).getFile() == FILE_GPR)
2381 srcId(i->src(1), 26);
2382 else
2383 setSUConst16(i, 1);
2384 srcId(i->src(3), 14); // values
2385 setSUPred(i, 2);
2386 }
2387
2388 void
emitSUAddr(const TexInstruction * i)2389 CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
2390 {
2391 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2392
2393 if (i->tex.rIndirectSrc < 0) {
2394 code[1] |= 0x00004000;
2395 code[0] |= i->tex.r << 26;
2396 } else {
2397 srcId(i, i->tex.rIndirectSrc, 26);
2398 }
2399 }
2400
2401 void
emitSUDim(const TexInstruction * i)2402 CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
2403 {
2404 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2405
2406 code[1] |= (i->tex.target.getDim() - 1) << 12;
2407 if (i->tex.target.isArray() || i->tex.target.isCube() ||
2408 i->tex.target.getDim() == 3) {
2409 // use e2d mode for 3-dim images, arrays and cubes.
2410 code[1] |= 3 << 12;
2411 }
2412
2413 srcId(i->src(0), 20);
2414 }
2415
2416 void
emitSULEA(const TexInstruction * i)2417 CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
2418 {
2419 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2420
2421 code[0] = 0x5;
2422 code[1] = 0xf0000000;
2423
2424 emitPredicate(i);
2425 emitLoadStoreType(i->sType);
2426
2427 defId(i->def(0), 14);
2428
2429 if (i->defExists(1)) {
2430 defId(i->def(1), 32 + 22);
2431 } else {
2432 code[1] |= 7 << 22;
2433 }
2434
2435 emitSUAddr(i);
2436 emitSUDim(i);
2437 }
2438
2439 void
emitSULDB(const TexInstruction * i)2440 CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
2441 {
2442 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2443
2444 code[0] = 0x5;
2445 code[1] = 0xd4000000 | (i->subOp << 15);
2446
2447 emitPredicate(i);
2448 emitLoadStoreType(i->dType);
2449
2450 defId(i->def(0), 14);
2451
2452 emitCachingMode(i->cache);
2453 emitSUAddr(i);
2454 emitSUDim(i);
2455 }
2456
2457 void
emitSUSTx(const TexInstruction * i)2458 CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
2459 {
2460 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2461
2462 code[0] = 0x5;
2463 code[1] = 0xdc000000 | (i->subOp << 15);
2464
2465 if (i->op == OP_SUSTP)
2466 code[1] |= i->tex.mask << 17;
2467 else
2468 emitLoadStoreType(i->dType);
2469
2470 emitPredicate(i);
2471
2472 srcId(i->src(1), 14);
2473
2474 emitCachingMode(i->cache);
2475 emitSUAddr(i);
2476 emitSUDim(i);
2477 }
2478
2479 void
emitVectorSubOp(const Instruction * i)2480 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2481 {
2482 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2483 case 0:
2484 code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2485 code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
2486 code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
2487 code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2488 break;
2489 case 1:
2490 code[1] |= (i->subOp & 0x000f) << 8; // v2src1
2491 code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2492 code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
2493 code[1] |= (i->subOp & 0x0200) << 6; // v2src2
2494 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2495 code[1] |= (i->mask & 0x3) << 2;
2496 break;
2497 case 2:
2498 code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2499 code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2500 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2501 code[1] |= (i->mask & 0x3) << 2;
2502 code[1] |= (i->mask & 0xc) << 21;
2503 break;
2504 default:
2505 assert(0);
2506 break;
2507 }
2508 }
2509
2510 void
emitVSHL(const Instruction * i)2511 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2512 {
2513 uint64_t opc = 0x4;
2514
2515 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2516 case 0: opc |= 0xe8ULL << 56; break;
2517 case 1: opc |= 0xb4ULL << 56; break;
2518 case 2: opc |= 0x94ULL << 56; break;
2519 default:
2520 assert(0);
2521 break;
2522 }
2523 if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2524 if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2525 if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2526 } else {
2527 if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2528 if (isSignedType(i->sType)) opc |= 1 << 6;
2529 }
2530 emitForm_A(i, opc);
2531 emitVectorSubOp(i);
2532
2533 if (i->saturate)
2534 code[0] |= 1 << 9;
2535 if (i->flagsDef >= 0)
2536 code[1] |= 1 << 16;
2537 }
2538
2539 void
emitPIXLD(const Instruction * i)2540 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2541 {
2542 assert(i->encSize == 8);
2543 emitForm_A(i, HEX64(10000000, 00000006));
2544 code[0] |= i->subOp << 5;
2545 code[1] |= 0x00e00000;
2546 }
2547
2548 void
emitSHFL(const Instruction * i)2549 CodeEmitterNVC0::emitSHFL(const Instruction *i)
2550 {
2551 const ImmediateValue *imm;
2552
2553 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
2554
2555 code[0] = 0x00000005;
2556 code[1] = 0x88000000 | (i->subOp << 23);
2557
2558 emitPredicate(i);
2559
2560 defId(i->def(0), 14);
2561 srcId(i->src(0), 20);
2562
2563 switch (i->src(1).getFile()) {
2564 case FILE_GPR:
2565 srcId(i->src(1), 26);
2566 break;
2567 case FILE_IMMEDIATE:
2568 imm = i->getSrc(1)->asImm();
2569 assert(imm && imm->reg.data.u32 < 0x20);
2570 code[0] |= imm->reg.data.u32 << 26;
2571 code[0] |= 1 << 5;
2572 break;
2573 default:
2574 assert(!"invalid src1 file");
2575 break;
2576 }
2577
2578 switch (i->src(2).getFile()) {
2579 case FILE_GPR:
2580 srcId(i->src(2), 49);
2581 break;
2582 case FILE_IMMEDIATE:
2583 imm = i->getSrc(2)->asImm();
2584 assert(imm && imm->reg.data.u32 < 0x2000);
2585 code[1] |= imm->reg.data.u32 << 10;
2586 code[0] |= 1 << 6;
2587 break;
2588 default:
2589 assert(!"invalid src2 file");
2590 break;
2591 }
2592
2593 setPDSTL(i, i->defExists(1) ? 1 : -1);
2594 }
2595
2596 void
emitVOTE(const Instruction * i)2597 CodeEmitterNVC0::emitVOTE(const Instruction *i)
2598 {
2599 const ImmediateValue *imm;
2600 uint32_t u32;
2601
2602 code[0] = 0x00000004 | (i->subOp << 5);
2603 code[1] = 0x48000000;
2604
2605 emitPredicate(i);
2606
2607 unsigned rp = 0;
2608 for (int d = 0; i->defExists(d); d++) {
2609 if (i->def(d).getFile() == FILE_PREDICATE) {
2610 assert(!(rp & 2));
2611 rp |= 2;
2612 defId(i->def(d), 32 + 22);
2613 } else if (i->def(d).getFile() == FILE_GPR) {
2614 assert(!(rp & 1));
2615 rp |= 1;
2616 defId(i->def(d), 14);
2617 } else {
2618 assert(!"Unhandled def");
2619 }
2620 }
2621 if (!(rp & 1))
2622 code[0] |= 63 << 14;
2623 if (!(rp & 2))
2624 code[1] |= 7 << 22;
2625
2626 switch (i->src(0).getFile()) {
2627 case FILE_PREDICATE:
2628 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
2629 code[0] |= 1 << 23;
2630 srcId(i->src(0), 20);
2631 break;
2632 case FILE_IMMEDIATE:
2633 imm = i->getSrc(0)->asImm();
2634 assert(imm);
2635 u32 = imm->reg.data.u32;
2636 assert(u32 == 0 || u32 == 1);
2637 code[0] |= (u32 == 1 ? 0x7 : 0xf) << 20;
2638 break;
2639 default:
2640 assert(!"Unhandled src");
2641 break;
2642 }
2643 }
2644
2645 bool
emitInstruction(Instruction * insn)2646 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2647 {
2648 unsigned int size = insn->encSize;
2649
2650 if (writeIssueDelays && !(codeSize & 0x3f))
2651 size += 8;
2652
2653 if (!insn->encSize) {
2654 ERROR("skipping unencodable instruction: "); insn->print();
2655 return false;
2656 } else
2657 if (codeSize + size > codeSizeLimit) {
2658 ERROR("code emitter output buffer too small\n");
2659 return false;
2660 }
2661
2662 if (writeIssueDelays) {
2663 if (!(codeSize & 0x3f)) {
2664 code[0] = 0x00000007; // cf issue delay "instruction"
2665 code[1] = 0x20000000;
2666 code += 2;
2667 codeSize += 8;
2668 }
2669 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2670 uint32_t *data = code - (id * 2 + 2);
2671 if (id <= 2) {
2672 data[0] |= insn->sched << (id * 8 + 4);
2673 } else
2674 if (id == 3) {
2675 data[0] |= insn->sched << 28;
2676 data[1] |= insn->sched >> 4;
2677 } else {
2678 data[1] |= insn->sched << ((id - 4) * 8 + 4);
2679 }
2680 }
2681
2682 // assert that instructions with multiple defs don't corrupt registers
2683 for (int d = 0; insn->defExists(d); ++d)
2684 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2685
2686 switch (insn->op) {
2687 case OP_MOV:
2688 case OP_RDSV:
2689 emitMOV(insn);
2690 break;
2691 case OP_NOP:
2692 break;
2693 case OP_LOAD:
2694 emitLOAD(insn);
2695 break;
2696 case OP_STORE:
2697 emitSTORE(insn);
2698 break;
2699 case OP_LINTERP:
2700 case OP_PINTERP:
2701 emitINTERP(insn);
2702 break;
2703 case OP_VFETCH:
2704 emitVFETCH(insn);
2705 break;
2706 case OP_EXPORT:
2707 emitEXPORT(insn);
2708 break;
2709 case OP_PFETCH:
2710 emitPFETCH(insn);
2711 break;
2712 case OP_AFETCH:
2713 emitAFETCH(insn);
2714 break;
2715 case OP_EMIT:
2716 case OP_RESTART:
2717 emitOUT(insn);
2718 break;
2719 case OP_ADD:
2720 case OP_SUB:
2721 if (insn->dType == TYPE_F64)
2722 emitDADD(insn);
2723 else if (isFloatType(insn->dType))
2724 emitFADD(insn);
2725 else
2726 emitUADD(insn);
2727 break;
2728 case OP_MUL:
2729 if (insn->dType == TYPE_F64)
2730 emitDMUL(insn);
2731 else if (isFloatType(insn->dType))
2732 emitFMUL(insn);
2733 else
2734 emitUMUL(insn);
2735 break;
2736 case OP_MAD:
2737 case OP_FMA:
2738 if (insn->dType == TYPE_F64)
2739 emitDMAD(insn);
2740 else if (isFloatType(insn->dType))
2741 emitFMAD(insn);
2742 else
2743 emitIMAD(insn);
2744 break;
2745 case OP_SAD:
2746 emitISAD(insn);
2747 break;
2748 case OP_SHLADD:
2749 emitSHLADD(insn);
2750 break;
2751 case OP_NOT:
2752 emitNOT(insn);
2753 break;
2754 case OP_AND:
2755 emitLogicOp(insn, 0);
2756 break;
2757 case OP_OR:
2758 emitLogicOp(insn, 1);
2759 break;
2760 case OP_XOR:
2761 emitLogicOp(insn, 2);
2762 break;
2763 case OP_SHL:
2764 case OP_SHR:
2765 emitShift(insn);
2766 break;
2767 case OP_SET:
2768 case OP_SET_AND:
2769 case OP_SET_OR:
2770 case OP_SET_XOR:
2771 emitSET(insn->asCmp());
2772 break;
2773 case OP_SELP:
2774 emitSELP(insn);
2775 break;
2776 case OP_SLCT:
2777 emitSLCT(insn->asCmp());
2778 break;
2779 case OP_MIN:
2780 case OP_MAX:
2781 emitMINMAX(insn);
2782 break;
2783 case OP_ABS:
2784 case OP_NEG:
2785 case OP_CEIL:
2786 case OP_FLOOR:
2787 case OP_TRUNC:
2788 case OP_SAT:
2789 emitCVT(insn);
2790 break;
2791 case OP_CVT:
2792 if (insn->def(0).getFile() == FILE_PREDICATE ||
2793 insn->src(0).getFile() == FILE_PREDICATE)
2794 emitMOV(insn);
2795 else
2796 emitCVT(insn);
2797 break;
2798 case OP_RSQ:
2799 emitSFnOp(insn, 5 + 2 * insn->subOp);
2800 break;
2801 case OP_RCP:
2802 emitSFnOp(insn, 4 + 2 * insn->subOp);
2803 break;
2804 case OP_LG2:
2805 emitSFnOp(insn, 3);
2806 break;
2807 case OP_EX2:
2808 emitSFnOp(insn, 2);
2809 break;
2810 case OP_SIN:
2811 emitSFnOp(insn, 1);
2812 break;
2813 case OP_COS:
2814 emitSFnOp(insn, 0);
2815 break;
2816 case OP_PRESIN:
2817 case OP_PREEX2:
2818 emitPreOp(insn);
2819 break;
2820 case OP_TEX:
2821 case OP_TXB:
2822 case OP_TXL:
2823 case OP_TXD:
2824 case OP_TXF:
2825 case OP_TXG:
2826 case OP_TXLQ:
2827 emitTEX(insn->asTex());
2828 break;
2829 case OP_TXQ:
2830 emitTXQ(insn->asTex());
2831 break;
2832 case OP_TEXBAR:
2833 emitTEXBAR(insn);
2834 break;
2835 case OP_SUBFM:
2836 case OP_SUCLAMP:
2837 case OP_SUEAU:
2838 emitSUCalc(insn);
2839 break;
2840 case OP_MADSP:
2841 emitMADSP(insn);
2842 break;
2843 case OP_SULDB:
2844 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2845 emitSULDGB(insn->asTex());
2846 else
2847 emitSULDB(insn->asTex());
2848 break;
2849 case OP_SUSTB:
2850 case OP_SUSTP:
2851 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2852 emitSUSTGx(insn->asTex());
2853 else
2854 emitSUSTx(insn->asTex());
2855 break;
2856 case OP_SULEA:
2857 emitSULEA(insn->asTex());
2858 break;
2859 case OP_ATOM:
2860 emitATOM(insn);
2861 break;
2862 case OP_BRA:
2863 case OP_CALL:
2864 case OP_PRERET:
2865 case OP_RET:
2866 case OP_DISCARD:
2867 case OP_EXIT:
2868 case OP_PRECONT:
2869 case OP_CONT:
2870 case OP_PREBREAK:
2871 case OP_BREAK:
2872 case OP_JOINAT:
2873 case OP_BRKPT:
2874 case OP_QUADON:
2875 case OP_QUADPOP:
2876 emitFlow(insn);
2877 break;
2878 case OP_QUADOP:
2879 emitQUADOP(insn, insn->subOp, insn->lanes);
2880 break;
2881 case OP_DFDX:
2882 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2883 break;
2884 case OP_DFDY:
2885 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2886 break;
2887 case OP_POPCNT:
2888 emitPOPC(insn);
2889 break;
2890 case OP_INSBF:
2891 emitINSBF(insn);
2892 break;
2893 case OP_EXTBF:
2894 emitEXTBF(insn);
2895 break;
2896 case OP_BFIND:
2897 emitBFIND(insn);
2898 break;
2899 case OP_PERMT:
2900 emitPERMT(insn);
2901 break;
2902 case OP_JOIN:
2903 emitNOP(insn);
2904 insn->join = 1;
2905 break;
2906 case OP_BAR:
2907 emitBAR(insn);
2908 break;
2909 case OP_MEMBAR:
2910 emitMEMBAR(insn);
2911 break;
2912 case OP_CCTL:
2913 emitCCTL(insn);
2914 break;
2915 case OP_VSHL:
2916 emitVSHL(insn);
2917 break;
2918 case OP_PIXLD:
2919 emitPIXLD(insn);
2920 break;
2921 case OP_SHFL:
2922 emitSHFL(insn);
2923 break;
2924 case OP_VOTE:
2925 emitVOTE(insn);
2926 break;
2927 case OP_PHI:
2928 case OP_UNION:
2929 case OP_CONSTRAINT:
2930 ERROR("operation should have been eliminated");
2931 return false;
2932 case OP_EXP:
2933 case OP_LOG:
2934 case OP_SQRT:
2935 case OP_POW:
2936 ERROR("operation should have been lowered\n");
2937 return false;
2938 default:
2939 ERROR("unknown op: %u\n", insn->op);
2940 return false;
2941 }
2942
2943 if (insn->join) {
2944 code[0] |= 0x10;
2945 assert(insn->encSize == 8);
2946 }
2947
2948 code += insn->encSize / 4;
2949 codeSize += insn->encSize;
2950 return true;
2951 }
2952
2953 uint32_t
getMinEncodingSize(const Instruction * i) const2954 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2955 {
2956 const Target::OpInfo &info = targ->getOpInfo(i);
2957
2958 if (writeIssueDelays || info.minEncSize == 8 || 1)
2959 return 8;
2960
2961 if (i->ftz || i->saturate || i->join)
2962 return 8;
2963 if (i->rnd != ROUND_N)
2964 return 8;
2965 if (i->predSrc >= 0 && i->op == OP_MAD)
2966 return 8;
2967
2968 if (i->op == OP_PINTERP) {
2969 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2970 return 8;
2971 } else
2972 if (i->op == OP_MOV && i->lanes != 0xf) {
2973 return 8;
2974 }
2975
2976 for (int s = 0; i->srcExists(s); ++s) {
2977 if (i->src(s).isIndirect(0))
2978 return 8;
2979
2980 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2981 if (SDATA(i->src(s)).offset >= 0x100)
2982 return 8;
2983 if (i->getSrc(s)->reg.fileIndex > 1 &&
2984 i->getSrc(s)->reg.fileIndex != 16)
2985 return 8;
2986 } else
2987 if (i->src(s).getFile() == FILE_IMMEDIATE) {
2988 if (i->dType == TYPE_F32) {
2989 if (SDATA(i->src(s)).u32 >= 0x100)
2990 return 8;
2991 } else {
2992 if (SDATA(i->src(s)).u32 > 0xff)
2993 return 8;
2994 }
2995 }
2996
2997 if (i->op == OP_CVT)
2998 continue;
2999 if (i->src(s).mod != Modifier(0)) {
3000 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
3001 if (i->op != OP_RSQ)
3002 return 8;
3003 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
3004 if (i->op != OP_ADD || s != 0)
3005 return 8;
3006 }
3007 }
3008
3009 return 4;
3010 }
3011
3012 // Simplified, erring on safe side.
3013 class SchedDataCalculator : public Pass
3014 {
3015 public:
SchedDataCalculator(const Target * targ)3016 SchedDataCalculator(const Target *targ) : targ(targ) { }
3017
3018 private:
3019 struct RegScores
3020 {
3021 struct Resource {
3022 int st[DATA_FILE_COUNT]; // LD to LD delay 3
3023 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
3024 int tex; // TEX to non-TEX delay 17 (0x11)
3025 int sfu; // SFU to SFU delay 3 (except PRE-ops)
3026 int imul; // integer MUL to MUL delay 3
3027 } res;
3028 struct ScoreData {
3029 int r[256];
3030 int p[8];
3031 int c;
3032 } rd, wr;
3033 int base;
3034 int regs;
3035
rebasenv50_ir::SchedDataCalculator::RegScores3036 void rebase(const int base)
3037 {
3038 const int delta = this->base - base;
3039 if (!delta)
3040 return;
3041 this->base = 0;
3042
3043 for (int i = 0; i < regs; ++i) {
3044 rd.r[i] += delta;
3045 wr.r[i] += delta;
3046 }
3047 for (int i = 0; i < 8; ++i) {
3048 rd.p[i] += delta;
3049 wr.p[i] += delta;
3050 }
3051 rd.c += delta;
3052 wr.c += delta;
3053
3054 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3055 res.ld[f] += delta;
3056 res.st[f] += delta;
3057 }
3058 res.sfu += delta;
3059 res.imul += delta;
3060 res.tex += delta;
3061 }
wipenv50_ir::SchedDataCalculator::RegScores3062 void wipe(int regs)
3063 {
3064 memset(&rd, 0, sizeof(rd));
3065 memset(&wr, 0, sizeof(wr));
3066 memset(&res, 0, sizeof(res));
3067 this->regs = regs;
3068 }
getLatestnv50_ir::SchedDataCalculator::RegScores3069 int getLatest(const ScoreData& d) const
3070 {
3071 int max = 0;
3072 for (int i = 0; i < regs; ++i)
3073 if (d.r[i] > max)
3074 max = d.r[i];
3075 for (int i = 0; i < 8; ++i)
3076 if (d.p[i] > max)
3077 max = d.p[i];
3078 if (d.c > max)
3079 max = d.c;
3080 return max;
3081 }
getLatestRdnv50_ir::SchedDataCalculator::RegScores3082 inline int getLatestRd() const
3083 {
3084 return getLatest(rd);
3085 }
getLatestWrnv50_ir::SchedDataCalculator::RegScores3086 inline int getLatestWr() const
3087 {
3088 return getLatest(wr);
3089 }
getLatestnv50_ir::SchedDataCalculator::RegScores3090 inline int getLatest() const
3091 {
3092 const int a = getLatestRd();
3093 const int b = getLatestWr();
3094
3095 int max = MAX2(a, b);
3096 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3097 max = MAX2(res.ld[f], max);
3098 max = MAX2(res.st[f], max);
3099 }
3100 max = MAX2(res.sfu, max);
3101 max = MAX2(res.imul, max);
3102 max = MAX2(res.tex, max);
3103 return max;
3104 }
setMaxnv50_ir::SchedDataCalculator::RegScores3105 void setMax(const RegScores *that)
3106 {
3107 for (int i = 0; i < regs; ++i) {
3108 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3109 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3110 }
3111 for (int i = 0; i < 8; ++i) {
3112 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3113 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3114 }
3115 rd.c = MAX2(rd.c, that->rd.c);
3116 wr.c = MAX2(wr.c, that->wr.c);
3117
3118 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3119 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
3120 res.st[f] = MAX2(res.st[f], that->res.st[f]);
3121 }
3122 res.sfu = MAX2(res.sfu, that->res.sfu);
3123 res.imul = MAX2(res.imul, that->res.imul);
3124 res.tex = MAX2(res.tex, that->res.tex);
3125 }
printnv50_ir::SchedDataCalculator::RegScores3126 void print(int cycle)
3127 {
3128 for (int i = 0; i < regs; ++i) {
3129 if (rd.r[i] > cycle)
3130 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3131 if (wr.r[i] > cycle)
3132 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3133 }
3134 for (int i = 0; i < 8; ++i) {
3135 if (rd.p[i] > cycle)
3136 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3137 if (wr.p[i] > cycle)
3138 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3139 }
3140 if (rd.c > cycle)
3141 INFO("rd $c @ %i\n", rd.c);
3142 if (wr.c > cycle)
3143 INFO("wr $c @ %i\n", wr.c);
3144 if (res.sfu > cycle)
3145 INFO("sfu @ %i\n", res.sfu);
3146 if (res.imul > cycle)
3147 INFO("imul @ %i\n", res.imul);
3148 if (res.tex > cycle)
3149 INFO("tex @ %i\n", res.tex);
3150 }
3151 };
3152
3153 RegScores *score; // for current BB
3154 std::vector<RegScores> scoreBoards;
3155 int prevData;
3156 operation prevOp;
3157
3158 const Target *targ;
3159
3160 bool visit(Function *);
3161 bool visit(BasicBlock *);
3162
3163 void commitInsn(const Instruction *, int cycle);
3164 int calcDelay(const Instruction *, int cycle) const;
3165 void setDelay(Instruction *, int delay, Instruction *next);
3166
3167 void recordRd(const Value *, const int ready);
3168 void recordWr(const Value *, const int ready);
3169 void checkRd(const Value *, int cycle, int& delay) const;
3170 void checkWr(const Value *, int cycle, int& delay) const;
3171
3172 int getCycles(const Instruction *, int origDelay) const;
3173 };
3174
3175 void
setDelay(Instruction * insn,int delay,Instruction * next)3176 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
3177 {
3178 if (insn->op == OP_EXIT || insn->op == OP_RET)
3179 delay = MAX2(delay, 14);
3180
3181 if (insn->op == OP_TEXBAR) {
3182 // TODO: except if results not used before EXIT
3183 insn->sched = 0xc2;
3184 } else
3185 if (insn->op == OP_JOIN || insn->join) {
3186 insn->sched = 0x00;
3187 } else
3188 if (delay >= 0 || prevData == 0x04 ||
3189 !next || !targ->canDualIssue(insn, next)) {
3190 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
3191 if (prevOp == OP_EXPORT)
3192 insn->sched |= 0x40;
3193 else
3194 insn->sched |= 0x20;
3195 } else {
3196 insn->sched = 0x04; // dual-issue
3197 }
3198
3199 if (prevData != 0x04 || prevOp != OP_EXPORT)
3200 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
3201 prevOp = insn->op;
3202
3203 prevData = insn->sched;
3204 }
3205
3206 int
getCycles(const Instruction * insn,int origDelay) const3207 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
3208 {
3209 if (insn->sched & 0x80) {
3210 int c = (insn->sched & 0x0f) * 2 + 1;
3211 if (insn->op == OP_TEXBAR && origDelay > 0)
3212 c += origDelay;
3213 return c;
3214 }
3215 if (insn->sched & 0x60)
3216 return (insn->sched & 0x1f) + 1;
3217 return (insn->sched == 0x04) ? 0 : 32;
3218 }
3219
3220 bool
visit(Function * func)3221 SchedDataCalculator::visit(Function *func)
3222 {
3223 int regs = targ->getFileSize(FILE_GPR) + 1;
3224 scoreBoards.resize(func->cfg.getSize());
3225 for (size_t i = 0; i < scoreBoards.size(); ++i)
3226 scoreBoards[i].wipe(regs);
3227 return true;
3228 }
3229
3230 bool
visit(BasicBlock * bb)3231 SchedDataCalculator::visit(BasicBlock *bb)
3232 {
3233 Instruction *insn;
3234 Instruction *next = NULL;
3235
3236 int cycle = 0;
3237
3238 prevData = 0x00;
3239 prevOp = OP_NOP;
3240 score = &scoreBoards.at(bb->getId());
3241
3242 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
3243 // back branches will wait until all target dependencies are satisfied
3244 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
3245 continue;
3246 BasicBlock *in = BasicBlock::get(ei.getNode());
3247 if (in->getExit()) {
3248 if (prevData != 0x04)
3249 prevData = in->getExit()->sched;
3250 prevOp = in->getExit()->op;
3251 }
3252 score->setMax(&scoreBoards.at(in->getId()));
3253 }
3254 if (bb->cfg.incidentCount() > 1)
3255 prevOp = OP_NOP;
3256
3257 #ifdef NVC0_DEBUG_SCHED_DATA
3258 INFO("=== BB:%i initial scores\n", bb->getId());
3259 score->print(cycle);
3260 #endif
3261
3262 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
3263 next = insn->next;
3264
3265 commitInsn(insn, cycle);
3266 int delay = calcDelay(next, cycle);
3267 setDelay(insn, delay, next);
3268 cycle += getCycles(insn, delay);
3269
3270 #ifdef NVC0_DEBUG_SCHED_DATA
3271 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
3272 insn->print();
3273 next->print();
3274 #endif
3275 }
3276 if (!insn)
3277 return true;
3278 commitInsn(insn, cycle);
3279
3280 int bbDelay = -1;
3281
3282 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
3283 BasicBlock *out = BasicBlock::get(ei.getNode());
3284
3285 if (ei.getType() != Graph::Edge::BACK) {
3286 // only test the first instruction of the outgoing block
3287 next = out->getEntry();
3288 if (next)
3289 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
3290 } else {
3291 // wait until all dependencies are satisfied
3292 const int regsFree = score->getLatest();
3293 next = out->getFirst();
3294 for (int c = cycle; next && c < regsFree; next = next->next) {
3295 bbDelay = MAX2(bbDelay, calcDelay(next, c));
3296 c += getCycles(next, bbDelay);
3297 }
3298 next = NULL;
3299 }
3300 }
3301 if (bb->cfg.outgoingCount() != 1)
3302 next = NULL;
3303 setDelay(insn, bbDelay, next);
3304 cycle += getCycles(insn, bbDelay);
3305
3306 score->rebase(cycle); // common base for initializing out blocks' scores
3307 return true;
3308 }
3309
3310 #define NVE4_MAX_ISSUE_DELAY 0x1f
3311 int
calcDelay(const Instruction * insn,int cycle) const3312 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
3313 {
3314 int delay = 0, ready = cycle;
3315
3316 for (int s = 0; insn->srcExists(s); ++s)
3317 checkRd(insn->getSrc(s), cycle, delay);
3318 // WAR & WAW don't seem to matter
3319 // for (int s = 0; insn->srcExists(s); ++s)
3320 // recordRd(insn->getSrc(s), cycle);
3321
3322 switch (Target::getOpClass(insn->op)) {
3323 case OPCLASS_SFU:
3324 ready = score->res.sfu;
3325 break;
3326 case OPCLASS_ARITH:
3327 if (insn->op == OP_MUL && !isFloatType(insn->dType))
3328 ready = score->res.imul;
3329 break;
3330 case OPCLASS_TEXTURE:
3331 ready = score->res.tex;
3332 break;
3333 case OPCLASS_LOAD:
3334 ready = score->res.ld[insn->src(0).getFile()];
3335 break;
3336 case OPCLASS_STORE:
3337 ready = score->res.st[insn->src(0).getFile()];
3338 break;
3339 default:
3340 break;
3341 }
3342 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
3343 ready = MAX2(ready, score->res.tex);
3344
3345 delay = MAX2(delay, ready - cycle);
3346
3347 // if can issue next cycle, delay is 0, not 1
3348 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
3349 }
3350
3351 void
commitInsn(const Instruction * insn,int cycle)3352 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
3353 {
3354 const int ready = cycle + targ->getLatency(insn);
3355
3356 for (int d = 0; insn->defExists(d); ++d)
3357 recordWr(insn->getDef(d), ready);
3358 // WAR & WAW don't seem to matter
3359 // for (int s = 0; insn->srcExists(s); ++s)
3360 // recordRd(insn->getSrc(s), cycle);
3361
3362 switch (Target::getOpClass(insn->op)) {
3363 case OPCLASS_SFU:
3364 score->res.sfu = cycle + 4;
3365 break;
3366 case OPCLASS_ARITH:
3367 if (insn->op == OP_MUL && !isFloatType(insn->dType))
3368 score->res.imul = cycle + 4;
3369 break;
3370 case OPCLASS_TEXTURE:
3371 score->res.tex = cycle + 18;
3372 break;
3373 case OPCLASS_LOAD:
3374 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
3375 break;
3376 score->res.ld[insn->src(0).getFile()] = cycle + 4;
3377 score->res.st[insn->src(0).getFile()] = ready;
3378 break;
3379 case OPCLASS_STORE:
3380 score->res.st[insn->src(0).getFile()] = cycle + 4;
3381 score->res.ld[insn->src(0).getFile()] = ready;
3382 break;
3383 case OPCLASS_OTHER:
3384 if (insn->op == OP_TEXBAR)
3385 score->res.tex = cycle;
3386 break;
3387 default:
3388 break;
3389 }
3390
3391 #ifdef NVC0_DEBUG_SCHED_DATA
3392 score->print(cycle);
3393 #endif
3394 }
3395
3396 void
checkRd(const Value * v,int cycle,int & delay) const3397 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
3398 {
3399 int ready = cycle;
3400 int a, b;
3401
3402 switch (v->reg.file) {
3403 case FILE_GPR:
3404 a = v->reg.data.id;
3405 b = a + v->reg.size / 4;
3406 for (int r = a; r < b; ++r)
3407 ready = MAX2(ready, score->rd.r[r]);
3408 break;
3409 case FILE_PREDICATE:
3410 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3411 break;
3412 case FILE_FLAGS:
3413 ready = MAX2(ready, score->rd.c);
3414 break;
3415 case FILE_SHADER_INPUT:
3416 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3417 case FILE_MEMORY_LOCAL:
3418 case FILE_MEMORY_CONST:
3419 case FILE_MEMORY_SHARED:
3420 case FILE_MEMORY_GLOBAL:
3421 case FILE_SYSTEM_VALUE:
3422 // TODO: any restrictions here ?
3423 break;
3424 case FILE_IMMEDIATE:
3425 break;
3426 default:
3427 assert(0);
3428 break;
3429 }
3430 if (cycle < ready)
3431 delay = MAX2(delay, ready - cycle);
3432 }
3433
3434 void
checkWr(const Value * v,int cycle,int & delay) const3435 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3436 {
3437 int ready = cycle;
3438 int a, b;
3439
3440 switch (v->reg.file) {
3441 case FILE_GPR:
3442 a = v->reg.data.id;
3443 b = a + v->reg.size / 4;
3444 for (int r = a; r < b; ++r)
3445 ready = MAX2(ready, score->wr.r[r]);
3446 break;
3447 case FILE_PREDICATE:
3448 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3449 break;
3450 default:
3451 assert(v->reg.file == FILE_FLAGS);
3452 ready = MAX2(ready, score->wr.c);
3453 break;
3454 }
3455 if (cycle < ready)
3456 delay = MAX2(delay, ready - cycle);
3457 }
3458
3459 void
recordWr(const Value * v,const int ready)3460 SchedDataCalculator::recordWr(const Value *v, const int ready)
3461 {
3462 int a = v->reg.data.id;
3463
3464 if (v->reg.file == FILE_GPR) {
3465 int b = a + v->reg.size / 4;
3466 for (int r = a; r < b; ++r)
3467 score->rd.r[r] = ready;
3468 } else
3469 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3470 if (v->reg.file == FILE_PREDICATE) {
3471 score->rd.p[a] = ready + 4;
3472 } else {
3473 assert(v->reg.file == FILE_FLAGS);
3474 score->rd.c = ready + 4;
3475 }
3476 }
3477
3478 void
recordRd(const Value * v,const int ready)3479 SchedDataCalculator::recordRd(const Value *v, const int ready)
3480 {
3481 int a = v->reg.data.id;
3482
3483 if (v->reg.file == FILE_GPR) {
3484 int b = a + v->reg.size / 4;
3485 for (int r = a; r < b; ++r)
3486 score->wr.r[r] = ready;
3487 } else
3488 if (v->reg.file == FILE_PREDICATE) {
3489 score->wr.p[a] = ready;
3490 } else
3491 if (v->reg.file == FILE_FLAGS) {
3492 score->wr.c = ready;
3493 }
3494 }
3495
3496 bool
calculateSchedDataNVC0(const Target * targ,Function * func)3497 calculateSchedDataNVC0(const Target *targ, Function *func)
3498 {
3499 SchedDataCalculator sched(targ);
3500 return sched.run(func, true, true);
3501 }
3502
3503 void
prepareEmission(Function * func)3504 CodeEmitterNVC0::prepareEmission(Function *func)
3505 {
3506 CodeEmitter::prepareEmission(func);
3507
3508 if (targ->hasSWSched)
3509 calculateSchedDataNVC0(targ, func);
3510 }
3511
CodeEmitterNVC0(const TargetNVC0 * target)3512 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3513 : CodeEmitter(target),
3514 targNVC0(target),
3515 writeIssueDelays(target->hasSWSched)
3516 {
3517 code = NULL;
3518 codeSize = codeSizeLimit = 0;
3519 relocInfo = NULL;
3520 }
3521
3522 CodeEmitter *
createCodeEmitterNVC0(Program::Type type)3523 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3524 {
3525 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3526 emit->setProgramType(type);
3527 return emit;
3528 }
3529
3530 CodeEmitter *
getCodeEmitter(Program::Type type)3531 TargetNVC0::getCodeEmitter(Program::Type type)
3532 {
3533 if (chipset >= NVISA_GK20A_CHIPSET)
3534 return createCodeEmitterGK110(type);
3535 return createCodeEmitterNVC0(type);
3536 }
3537
3538 } // namespace nv50_ir
3539