• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir_target_nvc0.h"
24 
25 namespace nv50_ir {
26 
27 // Argh, all these assertions ...
28 
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32    CodeEmitterNVC0(const TargetNVC0 *);
33 
34    virtual bool emitInstruction(Instruction *);
35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
36    virtual void prepareEmission(Function *);
37 
setProgramType(Program::Type pType)38    inline void setProgramType(Program::Type pType) { progType = pType; }
39 
40 private:
41    const TargetNVC0 *targNVC0;
42 
43    Program::Type progType;
44 
45    const bool writeIssueDelays;
46 
47 private:
48    void emitForm_A(const Instruction *, uint64_t);
49    void emitForm_B(const Instruction *, uint64_t);
50    void emitForm_S(const Instruction *, uint32_t, bool pred);
51 
52    void emitPredicate(const Instruction *);
53 
54    void setAddress16(const ValueRef&);
55    void setAddress24(const ValueRef&);
56    void setAddressByFile(const ValueRef&);
57    void setImmediate(const Instruction *, const int s); // needs op already set
58    void setImmediateS8(const ValueRef&);
59    void setSUConst16(const Instruction *, const int s);
60    void setSUPred(const Instruction *, const int s);
61 
62    void emitCondCode(CondCode cc, int pos);
63    void emitInterpMode(const Instruction *);
64    void emitLoadStoreType(DataType ty);
65    void emitSUGType(DataType);
66    void emitSUAddr(const TexInstruction *);
67    void emitSUDim(const TexInstruction *);
68    void emitCachingMode(CacheMode c);
69 
70    void emitShortSrc2(const ValueRef&);
71 
72    inline uint8_t getSRegEncoding(const ValueRef&);
73 
74    void roundMode_A(const Instruction *);
75    void roundMode_C(const Instruction *);
76    void roundMode_CS(const Instruction *);
77 
78    void emitNegAbs12(const Instruction *);
79 
80    void emitNOP(const Instruction *);
81 
82    void emitLOAD(const Instruction *);
83    void emitSTORE(const Instruction *);
84    void emitMOV(const Instruction *);
85    void emitATOM(const Instruction *);
86    void emitMEMBAR(const Instruction *);
87    void emitCCTL(const Instruction *);
88 
89    void emitINTERP(const Instruction *);
90    void emitAFETCH(const Instruction *);
91    void emitPFETCH(const Instruction *);
92    void emitVFETCH(const Instruction *);
93    void emitEXPORT(const Instruction *);
94    void emitOUT(const Instruction *);
95 
96    void emitUADD(const Instruction *);
97    void emitFADD(const Instruction *);
98    void emitDADD(const Instruction *);
99    void emitUMUL(const Instruction *);
100    void emitFMUL(const Instruction *);
101    void emitDMUL(const Instruction *);
102    void emitIMAD(const Instruction *);
103    void emitISAD(const Instruction *);
104    void emitSHLADD(const Instruction *a);
105    void emitFMAD(const Instruction *);
106    void emitDMAD(const Instruction *);
107    void emitMADSP(const Instruction *);
108 
109    void emitNOT(Instruction *);
110    void emitLogicOp(const Instruction *, uint8_t subOp);
111    void emitPOPC(const Instruction *);
112    void emitINSBF(const Instruction *);
113    void emitEXTBF(const Instruction *);
114    void emitBFIND(const Instruction *);
115    void emitPERMT(const Instruction *);
116    void emitShift(const Instruction *);
117 
118    void emitSFnOp(const Instruction *, uint8_t subOp);
119 
120    void emitCVT(Instruction *);
121    void emitMINMAX(const Instruction *);
122    void emitPreOp(const Instruction *);
123 
124    void emitSET(const CmpInstruction *);
125    void emitSLCT(const CmpInstruction *);
126    void emitSELP(const Instruction *);
127 
128    void emitTEXBAR(const Instruction *);
129    void emitTEX(const TexInstruction *);
130    void emitTEXCSAA(const TexInstruction *);
131    void emitTXQ(const TexInstruction *);
132 
133    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
134 
135    void emitFlow(const Instruction *);
136    void emitBAR(const Instruction *);
137 
138    void emitSUCLAMPMode(uint16_t);
139    void emitSUCalc(Instruction *);
140    void emitSULDGB(const TexInstruction *);
141    void emitSUSTGx(const TexInstruction *);
142 
143    void emitSULDB(const TexInstruction *);
144    void emitSUSTx(const TexInstruction *);
145    void emitSULEA(const TexInstruction *);
146 
147    void emitVSHL(const Instruction *);
148    void emitVectorSubOp(const Instruction *);
149 
150    void emitPIXLD(const Instruction *);
151 
152    void emitVOTE(const Instruction *);
153 
154    inline void defId(const ValueDef&, const int pos);
155    inline void defId(const Instruction *, int d, const int pos);
156    inline void srcId(const ValueRef&, const int pos);
157    inline void srcId(const ValueRef *, const int pos);
158    inline void srcId(const Instruction *, int s, const int pos);
159    inline void srcAddr32(const ValueRef&, int pos, int shr);
160 
161    inline bool isLIMM(const ValueRef&, DataType ty);
162 };
163 
164 // for better visibility
165 #define HEX64(h, l) 0x##h##l##ULL
166 
167 #define SDATA(a) ((a).rep()->reg.data)
168 #define DDATA(a) ((a).rep()->reg.data)
169 
srcId(const ValueRef & src,const int pos)170 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
171 {
172    code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
173 }
174 
srcId(const ValueRef * src,const int pos)175 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
176 {
177    code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
178 }
179 
srcId(const Instruction * insn,int s,int pos)180 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
181 {
182    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
183    code[pos / 32] |= r << (pos % 32);
184 }
185 
186 void
srcAddr32(const ValueRef & src,int pos,int shr)187 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
188 {
189    const uint32_t offset = SDATA(src).offset >> shr;
190 
191    code[pos / 32] |= offset << (pos % 32);
192    if (pos && (pos < 32))
193       code[1] |= offset >> (32 - pos);
194 }
195 
defId(const ValueDef & def,const int pos)196 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
197 {
198    code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
199 }
200 
defId(const Instruction * insn,int d,int pos)201 void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
202 {
203    int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
204    code[pos / 32] |= r << (pos % 32);
205 }
206 
isLIMM(const ValueRef & ref,DataType ty)207 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
208 {
209    const ImmediateValue *imm = ref.get()->asImm();
210 
211    return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
212 }
213 
214 void
roundMode_A(const Instruction * insn)215 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
216 {
217    switch (insn->rnd) {
218    case ROUND_M: code[1] |= 1 << 23; break;
219    case ROUND_P: code[1] |= 2 << 23; break;
220    case ROUND_Z: code[1] |= 3 << 23; break;
221    default:
222       assert(insn->rnd == ROUND_N);
223       break;
224    }
225 }
226 
227 void
emitNegAbs12(const Instruction * i)228 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
229 {
230    if (i->src(1).mod.abs()) code[0] |= 1 << 6;
231    if (i->src(0).mod.abs()) code[0] |= 1 << 7;
232    if (i->src(1).mod.neg()) code[0] |= 1 << 8;
233    if (i->src(0).mod.neg()) code[0] |= 1 << 9;
234 }
235 
emitCondCode(CondCode cc,int pos)236 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
237 {
238    uint8_t val;
239 
240    switch (cc) {
241    case CC_LT:  val = 0x1; break;
242    case CC_LTU: val = 0x9; break;
243    case CC_EQ:  val = 0x2; break;
244    case CC_EQU: val = 0xa; break;
245    case CC_LE:  val = 0x3; break;
246    case CC_LEU: val = 0xb; break;
247    case CC_GT:  val = 0x4; break;
248    case CC_GTU: val = 0xc; break;
249    case CC_NE:  val = 0x5; break;
250    case CC_NEU: val = 0xd; break;
251    case CC_GE:  val = 0x6; break;
252    case CC_GEU: val = 0xe; break;
253    case CC_TR:  val = 0xf; break;
254    case CC_FL:  val = 0x0; break;
255 
256    case CC_A:  val = 0x14; break;
257    case CC_NA: val = 0x13; break;
258    case CC_S:  val = 0x15; break;
259    case CC_NS: val = 0x12; break;
260    case CC_C:  val = 0x16; break;
261    case CC_NC: val = 0x11; break;
262    case CC_O:  val = 0x17; break;
263    case CC_NO: val = 0x10; break;
264 
265    default:
266       val = 0;
267       assert(!"invalid condition code");
268       break;
269    }
270    code[pos / 32] |= val << (pos % 32);
271 }
272 
273 void
emitPredicate(const Instruction * i)274 CodeEmitterNVC0::emitPredicate(const Instruction *i)
275 {
276    if (i->predSrc >= 0) {
277       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
278       srcId(i->src(i->predSrc), 10);
279       if (i->cc == CC_NOT_P)
280          code[0] |= 0x2000; // negate
281    } else {
282       code[0] |= 0x1c00;
283    }
284 }
285 
286 void
setAddressByFile(const ValueRef & src)287 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
288 {
289    switch (src.getFile()) {
290    case FILE_MEMORY_GLOBAL:
291       srcAddr32(src, 26, 0);
292       break;
293    case FILE_MEMORY_LOCAL:
294    case FILE_MEMORY_SHARED:
295       setAddress24(src);
296       break;
297    default:
298       assert(src.getFile() == FILE_MEMORY_CONST);
299       setAddress16(src);
300       break;
301    }
302 }
303 
304 void
setAddress16(const ValueRef & src)305 CodeEmitterNVC0::setAddress16(const ValueRef& src)
306 {
307    Symbol *sym = src.get()->asSym();
308 
309    assert(sym);
310 
311    code[0] |= (sym->reg.data.offset & 0x003f) << 26;
312    code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
313 }
314 
315 void
setAddress24(const ValueRef & src)316 CodeEmitterNVC0::setAddress24(const ValueRef& src)
317 {
318    Symbol *sym = src.get()->asSym();
319 
320    assert(sym);
321 
322    code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
323    code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
324 }
325 
326 void
setImmediate(const Instruction * i,const int s)327 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
328 {
329    const ImmediateValue *imm = i->src(s).get()->asImm();
330    uint32_t u32;
331 
332    assert(imm);
333    u32 = imm->reg.data.u32;
334 
335    if ((code[0] & 0xf) == 0x1) {
336       // double immediate
337       uint64_t u64 = imm->reg.data.u64;
338       assert(!(u64 & 0x00000fffffffffffULL));
339       assert(!(code[1] & 0xc000));
340       code[0] |= ((u64 >> 44) & 0x3f) << 26;
341       code[1] |= 0xc000 | (u64 >> 50);
342    } else
343    if ((code[0] & 0xf) == 0x2) {
344       // LIMM
345       code[0] |= (u32 & 0x3f) << 26;
346       code[1] |= u32 >> 6;
347    } else
348    if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
349       // integer immediate
350       assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
351       assert(!(code[1] & 0xc000));
352       u32 &= 0xfffff;
353       code[0] |= (u32 & 0x3f) << 26;
354       code[1] |= 0xc000 | (u32 >> 6);
355    } else {
356       // float immediate
357       assert(!(u32 & 0x00000fff));
358       assert(!(code[1] & 0xc000));
359       code[0] |= ((u32 >> 12) & 0x3f) << 26;
360       code[1] |= 0xc000 | (u32 >> 18);
361    }
362 }
363 
setImmediateS8(const ValueRef & ref)364 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
365 {
366    const ImmediateValue *imm = ref.get()->asImm();
367 
368    int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
369 
370    assert(s8 == imm->reg.data.s32);
371 
372    code[0] |= (s8 & 0x3f) << 26;
373    code[0] |= (s8 >> 6) << 8;
374 }
375 
376 void
emitForm_A(const Instruction * i,uint64_t opc)377 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
378 {
379    code[0] = opc;
380    code[1] = opc >> 32;
381 
382    emitPredicate(i);
383 
384    defId(i->def(0), 14);
385 
386    int s1 = 26;
387    if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
388       s1 = 49;
389 
390    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
391       switch (i->getSrc(s)->reg.file) {
392       case FILE_MEMORY_CONST:
393          assert(!(code[1] & 0xc000));
394          code[1] |= (s == 2) ? 0x8000 : 0x4000;
395          code[1] |= i->getSrc(s)->reg.fileIndex << 10;
396          setAddress16(i->src(s));
397          break;
398       case FILE_IMMEDIATE:
399          assert(s == 1 ||
400                 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
401          assert(!(code[1] & 0xc000));
402          setImmediate(i, s);
403          break;
404       case FILE_GPR:
405          if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
406             break;
407          srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
408          break;
409       default:
410          if (i->op == OP_SELP) {
411             // OP_SELP is used to implement shared+atomics on Fermi.
412             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
413             srcId(i->src(s), 49);
414          }
415          // ignore here, can be predicate or flags, but must not be address
416          break;
417       }
418    }
419 }
420 
421 void
emitForm_B(const Instruction * i,uint64_t opc)422 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
423 {
424    code[0] = opc;
425    code[1] = opc >> 32;
426 
427    emitPredicate(i);
428 
429    defId(i->def(0), 14);
430 
431    switch (i->src(0).getFile()) {
432    case FILE_MEMORY_CONST:
433       assert(!(code[1] & 0xc000));
434       code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
435       setAddress16(i->src(0));
436       break;
437    case FILE_IMMEDIATE:
438       assert(!(code[1] & 0xc000));
439       setImmediate(i, 0);
440       break;
441    case FILE_GPR:
442       srcId(i->src(0), 26);
443       break;
444    default:
445       // ignore here, can be predicate or flags, but must not be address
446       break;
447    }
448 }
449 
450 void
emitForm_S(const Instruction * i,uint32_t opc,bool pred)451 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
452 {
453    code[0] = opc;
454 
455    int ss2a = 0;
456    if (opc == 0x0d || opc == 0x0e)
457       ss2a = 2;
458 
459    defId(i->def(0), 14);
460    srcId(i->src(0), 20);
461 
462    assert(pred || (i->predSrc < 0));
463    if (pred)
464       emitPredicate(i);
465 
466    for (int s = 1; s < 3 && i->srcExists(s); ++s) {
467       if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
468          assert(!(code[0] & (0x300 >> ss2a)));
469          switch (i->src(s).get()->reg.fileIndex) {
470          case 0:  code[0] |= 0x100 >> ss2a; break;
471          case 1:  code[0] |= 0x200 >> ss2a; break;
472          case 16: code[0] |= 0x300 >> ss2a; break;
473          default:
474             ERROR("invalid c[] space for short form\n");
475             break;
476          }
477          if (s == 1)
478             code[0] |= i->getSrc(s)->reg.data.offset << 24;
479          else
480             code[0] |= i->getSrc(s)->reg.data.offset << 6;
481       } else
482       if (i->src(s).getFile() == FILE_IMMEDIATE) {
483          assert(s == 1);
484          setImmediateS8(i->src(s));
485       } else
486       if (i->src(s).getFile() == FILE_GPR) {
487          srcId(i->src(s), (s == 1) ? 26 : 8);
488       }
489    }
490 }
491 
492 void
emitShortSrc2(const ValueRef & src)493 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
494 {
495    if (src.getFile() == FILE_MEMORY_CONST) {
496       switch (src.get()->reg.fileIndex) {
497       case 0:  code[0] |= 0x100; break;
498       case 1:  code[0] |= 0x200; break;
499       case 16: code[0] |= 0x300; break;
500       default:
501          assert(!"unsupported file index for short op");
502          break;
503       }
504       srcAddr32(src, 20, 2);
505    } else {
506       srcId(src, 20);
507       assert(src.getFile() == FILE_GPR);
508    }
509 }
510 
511 void
emitNOP(const Instruction * i)512 CodeEmitterNVC0::emitNOP(const Instruction *i)
513 {
514    code[0] = 0x000001e4;
515    code[1] = 0x40000000;
516    emitPredicate(i);
517 }
518 
519 void
emitFMAD(const Instruction * i)520 CodeEmitterNVC0::emitFMAD(const Instruction *i)
521 {
522    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
523 
524    if (i->encSize == 8) {
525       if (isLIMM(i->src(1), TYPE_F32)) {
526          emitForm_A(i, HEX64(20000000, 00000002));
527       } else {
528          emitForm_A(i, HEX64(30000000, 00000000));
529 
530          if (i->src(2).mod.neg())
531             code[0] |= 1 << 8;
532       }
533       roundMode_A(i);
534 
535       if (neg1)
536          code[0] |= 1 << 9;
537 
538       if (i->saturate)
539          code[0] |= 1 << 5;
540 
541       if (i->dnz)
542          code[0] |= 1 << 7;
543       else
544       if (i->ftz)
545          code[0] |= 1 << 6;
546    } else {
547       assert(!i->saturate && !i->src(2).mod.neg());
548       emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
549                  false);
550       if (neg1)
551          code[0] |= 1 << 4;
552    }
553 }
554 
555 void
emitDMAD(const Instruction * i)556 CodeEmitterNVC0::emitDMAD(const Instruction *i)
557 {
558    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
559 
560    emitForm_A(i, HEX64(20000000, 00000001));
561 
562    if (i->src(2).mod.neg())
563       code[0] |= 1 << 8;
564 
565    roundMode_A(i);
566 
567    if (neg1)
568       code[0] |= 1 << 9;
569 
570    assert(!i->saturate);
571    assert(!i->ftz);
572 }
573 
574 void
emitFMUL(const Instruction * i)575 CodeEmitterNVC0::emitFMUL(const Instruction *i)
576 {
577    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
578 
579    assert(i->postFactor >= -3 && i->postFactor <= 3);
580 
581    if (i->encSize == 8) {
582       if (isLIMM(i->src(1), TYPE_F32)) {
583          assert(i->postFactor == 0); // constant folded, hopefully
584          emitForm_A(i, HEX64(30000000, 00000002));
585       } else {
586          emitForm_A(i, HEX64(58000000, 00000000));
587          roundMode_A(i);
588          code[1] |= ((i->postFactor > 0) ?
589                      (7 - i->postFactor) : (0 - i->postFactor)) << 17;
590       }
591       if (neg)
592          code[1] ^= 1 << 25; // aliases with LIMM sign bit
593 
594       if (i->saturate)
595          code[0] |= 1 << 5;
596 
597       if (i->dnz)
598          code[0] |= 1 << 7;
599       else
600       if (i->ftz)
601          code[0] |= 1 << 6;
602    } else {
603       assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
604       emitForm_S(i, 0xa8, true);
605    }
606 }
607 
608 void
emitDMUL(const Instruction * i)609 CodeEmitterNVC0::emitDMUL(const Instruction *i)
610 {
611    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
612 
613    emitForm_A(i, HEX64(50000000, 00000001));
614    roundMode_A(i);
615 
616    if (neg)
617       code[0] |= 1 << 9;
618 
619    assert(!i->saturate);
620    assert(!i->ftz);
621    assert(!i->dnz);
622    assert(!i->postFactor);
623 }
624 
625 void
emitUMUL(const Instruction * i)626 CodeEmitterNVC0::emitUMUL(const Instruction *i)
627 {
628    if (i->encSize == 8) {
629       if (i->src(1).getFile() == FILE_IMMEDIATE) {
630          emitForm_A(i, HEX64(10000000, 00000002));
631       } else {
632          emitForm_A(i, HEX64(50000000, 00000003));
633       }
634       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
635          code[0] |= 1 << 6;
636       if (i->sType == TYPE_S32)
637          code[0] |= 1 << 5;
638       if (i->dType == TYPE_S32)
639          code[0] |= 1 << 7;
640    } else {
641       emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
642 
643       if (i->sType == TYPE_S32)
644          code[0] |= 1 << 6;
645    }
646 }
647 
648 void
emitFADD(const Instruction * i)649 CodeEmitterNVC0::emitFADD(const Instruction *i)
650 {
651    if (i->encSize == 8) {
652       if (isLIMM(i->src(1), TYPE_F32)) {
653          assert(!i->saturate);
654          emitForm_A(i, HEX64(28000000, 00000002));
655 
656          code[0] |= i->src(0).mod.abs() << 7;
657          code[0] |= i->src(0).mod.neg() << 9;
658 
659          if (i->src(1).mod.abs())
660             code[1] &= 0xfdffffff;
661          if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
662             code[1] ^= 0x02000000;
663       } else {
664          emitForm_A(i, HEX64(50000000, 00000000));
665 
666          roundMode_A(i);
667          if (i->saturate)
668             code[1] |= 1 << 17;
669 
670          emitNegAbs12(i);
671          if (i->op == OP_SUB) code[0] ^= 1 << 8;
672       }
673       if (i->ftz)
674          code[0] |= 1 << 5;
675    } else {
676       assert(!i->saturate && i->op != OP_SUB &&
677              !i->src(0).mod.abs() &&
678              !i->src(1).mod.neg() && !i->src(1).mod.abs());
679 
680       emitForm_S(i, 0x49, true);
681 
682       if (i->src(0).mod.neg())
683          code[0] |= 1 << 7;
684    }
685 }
686 
687 void
emitDADD(const Instruction * i)688 CodeEmitterNVC0::emitDADD(const Instruction *i)
689 {
690    assert(i->encSize == 8);
691    emitForm_A(i, HEX64(48000000, 00000001));
692    roundMode_A(i);
693    assert(!i->saturate);
694    assert(!i->ftz);
695    emitNegAbs12(i);
696    if (i->op == OP_SUB)
697       code[0] ^= 1 << 8;
698 }
699 
700 void
emitUADD(const Instruction * i)701 CodeEmitterNVC0::emitUADD(const Instruction *i)
702 {
703    uint32_t addOp = 0;
704 
705    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
706 
707    if (i->src(0).mod.neg())
708       addOp |= 0x200;
709    if (i->src(1).mod.neg())
710       addOp |= 0x100;
711    if (i->op == OP_SUB)
712       addOp ^= 0x100;
713 
714    assert(addOp != 0x300); // would be add-plus-one
715 
716    if (i->encSize == 8) {
717       if (isLIMM(i->src(1), TYPE_U32)) {
718          emitForm_A(i, HEX64(08000000, 00000002));
719          if (i->defExists(1))
720             code[1] |= 1 << 26; // write carry
721       } else {
722          emitForm_A(i, HEX64(48000000, 00000003));
723          if (i->defExists(1))
724             code[1] |= 1 << 16; // write carry
725       }
726       code[0] |= addOp;
727 
728       if (i->saturate)
729          code[0] |= 1 << 5;
730       if (i->flagsSrc >= 0) // add carry
731          code[0] |= 1 << 6;
732    } else {
733       assert(!(addOp & 0x100));
734       emitForm_S(i, (addOp >> 3) |
735                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
736    }
737 }
738 
739 void
emitIMAD(const Instruction * i)740 CodeEmitterNVC0::emitIMAD(const Instruction *i)
741 {
742    uint8_t addOp =
743       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
744 
745    assert(i->encSize == 8);
746    emitForm_A(i, HEX64(20000000, 00000003));
747 
748    assert(addOp != 3);
749    code[0] |= addOp << 8;
750 
751    if (isSignedType(i->dType))
752       code[0] |= 1 << 7;
753    if (isSignedType(i->sType))
754       code[0] |= 1 << 5;
755 
756    code[1] |= i->saturate << 24;
757 
758    if (i->flagsDef >= 0) code[1] |= 1 << 16;
759    if (i->flagsSrc >= 0) code[1] |= 1 << 23;
760 
761    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
762       code[0] |= 1 << 6;
763 }
764 
765 void
emitSHLADD(const Instruction * i)766 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
767 {
768    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
769    const ImmediateValue *imm = i->src(1).get()->asImm();
770    assert(imm);
771 
772    code[0] = 0x00000003;
773    code[1] = 0x40000000 | addOp << 23;
774 
775    emitPredicate(i);
776 
777    defId(i->def(0), 14);
778    srcId(i->src(0), 20);
779 
780    if (i->flagsDef >= 0)
781       code[1] |= 1 << 16;
782 
783    assert(!(imm->reg.data.u32 & 0xffffffe0));
784    code[0] |= imm->reg.data.u32 << 5;
785 
786    switch (i->src(2).getFile()) {
787    case FILE_GPR:
788       srcId(i->src(2), 26);
789       break;
790    case FILE_MEMORY_CONST:
791       code[1] |= 0x4000;
792       code[1] |= i->getSrc(2)->reg.fileIndex << 10;
793       setAddress16(i->src(2));
794       break;
795    case FILE_IMMEDIATE:
796       setImmediate(i, 2);
797       break;
798    default:
799       assert(!"bad src2 file");
800       break;
801    }
802 }
803 
804 void
emitMADSP(const Instruction * i)805 CodeEmitterNVC0::emitMADSP(const Instruction *i)
806 {
807    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
808 
809    emitForm_A(i, HEX64(00000000, 00000003));
810 
811    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
812       code[1] |= 0x01800000;
813    } else {
814       code[0] |= (i->subOp & 0x00f) << 7;
815       code[0] |= (i->subOp & 0x0f0) << 1;
816       code[0] |= (i->subOp & 0x100) >> 3;
817       code[0] |= (i->subOp & 0x200) >> 2;
818       code[1] |= (i->subOp & 0xc00) << 13;
819    }
820 
821    if (i->flagsDef >= 0)
822       code[1] |= 1 << 16;
823 }
824 
825 void
emitISAD(const Instruction * i)826 CodeEmitterNVC0::emitISAD(const Instruction *i)
827 {
828    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
829    assert(i->encSize == 8);
830 
831    emitForm_A(i, HEX64(38000000, 00000003));
832 
833    if (i->dType == TYPE_S32)
834       code[0] |= 1 << 5;
835 }
836 
837 void
emitNOT(Instruction * i)838 CodeEmitterNVC0::emitNOT(Instruction *i)
839 {
840    assert(i->encSize == 8);
841    i->setSrc(1, i->src(0));
842    emitForm_A(i, HEX64(68000000, 000001c3));
843 }
844 
845 void
emitLogicOp(const Instruction * i,uint8_t subOp)846 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
847 {
848    if (i->def(0).getFile() == FILE_PREDICATE) {
849       code[0] = 0x00000004 | (subOp << 30);
850       code[1] = 0x0c000000;
851 
852       emitPredicate(i);
853 
854       defId(i->def(0), 17);
855       srcId(i->src(0), 20);
856       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
857       srcId(i->src(1), 26);
858       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
859 
860       if (i->defExists(1)) {
861          defId(i->def(1), 14);
862       } else {
863          code[0] |= 7 << 14;
864       }
865       // (a OP b) OP c
866       if (i->predSrc != 2 && i->srcExists(2)) {
867          code[1] |= subOp << 21;
868          srcId(i->src(2), 49);
869          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
870       } else {
871          code[1] |= 0x000e0000;
872       }
873    } else
874    if (i->encSize == 8) {
875       if (isLIMM(i->src(1), TYPE_U32)) {
876          emitForm_A(i, HEX64(38000000, 00000002));
877 
878          if (i->flagsDef >= 0)
879             code[1] |= 1 << 26;
880       } else {
881          emitForm_A(i, HEX64(68000000, 00000003));
882 
883          if (i->flagsDef >= 0)
884             code[1] |= 1 << 16;
885       }
886       code[0] |= subOp << 6;
887 
888       if (i->flagsSrc >= 0) // carry
889          code[0] |= 1 << 5;
890 
891       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
892       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
893    } else {
894       emitForm_S(i, (subOp << 5) |
895                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
896    }
897 }
898 
899 void
emitPOPC(const Instruction * i)900 CodeEmitterNVC0::emitPOPC(const Instruction *i)
901 {
902    emitForm_A(i, HEX64(54000000, 00000004));
903 
904    if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
905    if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
906 }
907 
908 void
emitINSBF(const Instruction * i)909 CodeEmitterNVC0::emitINSBF(const Instruction *i)
910 {
911    emitForm_A(i, HEX64(28000000, 00000003));
912 }
913 
914 void
emitEXTBF(const Instruction * i)915 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
916 {
917    emitForm_A(i, HEX64(70000000, 00000003));
918 
919    if (i->dType == TYPE_S32)
920       code[0] |= 1 << 5;
921    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
922       code[0] |= 1 << 8;
923 }
924 
925 void
emitBFIND(const Instruction * i)926 CodeEmitterNVC0::emitBFIND(const Instruction *i)
927 {
928    emitForm_B(i, HEX64(78000000, 00000003));
929 
930    if (i->dType == TYPE_S32)
931       code[0] |= 1 << 5;
932    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
933       code[0] |= 1 << 8;
934    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
935       code[0] |= 1 << 6;
936 }
937 
938 void
emitPERMT(const Instruction * i)939 CodeEmitterNVC0::emitPERMT(const Instruction *i)
940 {
941    emitForm_A(i, HEX64(24000000, 00000004));
942 
943    code[0] |= i->subOp << 5;
944 }
945 
946 void
emitShift(const Instruction * i)947 CodeEmitterNVC0::emitShift(const Instruction *i)
948 {
949    if (i->op == OP_SHR) {
950       emitForm_A(i, HEX64(58000000, 00000003)
951                  | (isSignedType(i->dType) ? 0x20 : 0x00));
952    } else {
953       emitForm_A(i, HEX64(60000000, 00000003));
954    }
955 
956    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
957       code[0] |= 1 << 9;
958 }
959 
960 void
emitPreOp(const Instruction * i)961 CodeEmitterNVC0::emitPreOp(const Instruction *i)
962 {
963    if (i->encSize == 8) {
964       emitForm_B(i, HEX64(60000000, 00000000));
965 
966       if (i->op == OP_PREEX2)
967          code[0] |= 0x20;
968 
969       if (i->src(0).mod.abs()) code[0] |= 1 << 6;
970       if (i->src(0).mod.neg()) code[0] |= 1 << 8;
971    } else {
972       emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
973    }
974 }
975 
976 void
emitSFnOp(const Instruction * i,uint8_t subOp)977 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
978 {
979    if (i->encSize == 8) {
980       code[0] = 0x00000000 | (subOp << 26);
981       code[1] = 0xc8000000;
982 
983       emitPredicate(i);
984 
985       defId(i->def(0), 14);
986       srcId(i->src(0), 20);
987 
988       assert(i->src(0).getFile() == FILE_GPR);
989 
990       if (i->saturate) code[0] |= 1 << 5;
991 
992       if (i->src(0).mod.abs()) code[0] |= 1 << 7;
993       if (i->src(0).mod.neg()) code[0] |= 1 << 9;
994    } else {
995       emitForm_S(i, 0x80000008 | (subOp << 26), true);
996 
997       assert(!i->src(0).mod.neg());
998       if (i->src(0).mod.abs()) code[0] |= 1 << 30;
999    }
1000 }
1001 
1002 void
emitMINMAX(const Instruction * i)1003 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
1004 {
1005    uint64_t op;
1006 
1007    assert(i->encSize == 8);
1008 
1009    op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
1010 
1011    if (i->ftz)
1012       op |= 1 << 5;
1013    else
1014    if (!isFloatType(i->dType))
1015       op |= isSignedType(i->dType) ? 0x23 : 0x03;
1016    if (i->dType == TYPE_F64)
1017       op |= 0x01;
1018 
1019    emitForm_A(i, op);
1020    emitNegAbs12(i);
1021 }
1022 
1023 void
roundMode_C(const Instruction * i)1024 CodeEmitterNVC0::roundMode_C(const Instruction *i)
1025 {
1026    switch (i->rnd) {
1027    case ROUND_M:  code[1] |= 1 << 17; break;
1028    case ROUND_P:  code[1] |= 2 << 17; break;
1029    case ROUND_Z:  code[1] |= 3 << 17; break;
1030    case ROUND_NI: code[0] |= 1 << 7; break;
1031    case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
1032    case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
1033    case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
1034    case ROUND_N: break;
1035    default:
1036       assert(!"invalid round mode");
1037       break;
1038    }
1039 }
1040 
1041 void
roundMode_CS(const Instruction * i)1042 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
1043 {
1044    switch (i->rnd) {
1045    case ROUND_M:
1046    case ROUND_MI: code[0] |= 1 << 16; break;
1047    case ROUND_P:
1048    case ROUND_PI: code[0] |= 2 << 16; break;
1049    case ROUND_Z:
1050    case ROUND_ZI: code[0] |= 3 << 16; break;
1051    default:
1052       break;
1053    }
1054 }
1055 
1056 void
emitCVT(Instruction * i)1057 CodeEmitterNVC0::emitCVT(Instruction *i)
1058 {
1059    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1060    DataType dType;
1061 
1062    switch (i->op) {
1063    case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
1064    case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1065    case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1066    default:
1067       break;
1068    }
1069 
1070    const bool sat = (i->op == OP_SAT) || i->saturate;
1071    const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1072    const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1073 
1074    if (i->op == OP_NEG && i->dType == TYPE_U32)
1075       dType = TYPE_S32;
1076    else
1077       dType = i->dType;
1078 
1079    if (i->encSize == 8) {
1080       emitForm_B(i, HEX64(10000000, 00000004));
1081 
1082       roundMode_C(i);
1083 
1084       // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1085       code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1086       code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1087 
1088       // for 8/16 source types, the byte/word is in subOp. word 1 is
1089       // represented as 2.
1090       if (!isFloatType(i->sType))
1091          code[1] |= i->subOp << 0x17;
1092       else
1093          code[1] |= i->subOp << 0x18;
1094 
1095       if (sat)
1096          code[0] |= 0x20;
1097       if (abs)
1098          code[0] |= 1 << 6;
1099       if (neg && i->op != OP_ABS)
1100          code[0] |= 1 << 8;
1101 
1102       if (i->ftz)
1103          code[1] |= 1 << 23;
1104 
1105       if (isSignedIntType(dType))
1106          code[0] |= 0x080;
1107       if (isSignedIntType(i->sType))
1108          code[0] |= 0x200;
1109 
1110       if (isFloatType(dType)) {
1111          if (!isFloatType(i->sType))
1112             code[1] |= 0x08000000;
1113       } else {
1114          if (isFloatType(i->sType))
1115             code[1] |= 0x04000000;
1116          else
1117             code[1] |= 0x0c000000;
1118       }
1119    } else {
1120       if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1121          code[0] = 0x298;
1122       } else
1123       if (isFloatType(dType)) {
1124          if (isFloatType(i->sType))
1125             code[0] = 0x098;
1126          else
1127             code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1128       } else {
1129          assert(isFloatType(i->sType));
1130 
1131          code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1132       }
1133 
1134       if (neg) code[0] |= 1 << 16;
1135       if (sat) code[0] |= 1 << 18;
1136       if (abs) code[0] |= 1 << 19;
1137 
1138       roundMode_CS(i);
1139    }
1140 }
1141 
1142 void
emitSET(const CmpInstruction * i)1143 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1144 {
1145    uint32_t hi;
1146    uint32_t lo = 0;
1147 
1148    if (i->sType == TYPE_F64)
1149       lo = 0x1;
1150    else
1151    if (!isFloatType(i->sType))
1152       lo = 0x3;
1153 
1154    if (isSignedIntType(i->sType))
1155       lo |= 0x20;
1156    if (isFloatType(i->dType)) {
1157       if (isFloatType(i->sType))
1158          lo |= 0x20;
1159       else
1160          lo |= 0x80;
1161    }
1162 
1163    switch (i->op) {
1164    case OP_SET_AND: hi = 0x10000000; break;
1165    case OP_SET_OR:  hi = 0x10200000; break;
1166    case OP_SET_XOR: hi = 0x10400000; break;
1167    default:
1168       hi = 0x100e0000;
1169       break;
1170    }
1171    emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1172 
1173    if (i->op != OP_SET)
1174       srcId(i->src(2), 32 + 17);
1175 
1176    if (i->def(0).getFile() == FILE_PREDICATE) {
1177       if (i->sType == TYPE_F32)
1178          code[1] += 0x10000000;
1179       else
1180          code[1] += 0x08000000;
1181 
1182       code[0] &= ~0xfc000;
1183       defId(i->def(0), 17);
1184       if (i->defExists(1))
1185          defId(i->def(1), 14);
1186       else
1187          code[0] |= 0x1c000;
1188    }
1189 
1190    if (i->ftz)
1191       code[1] |= 1 << 27;
1192 
1193    emitCondCode(i->setCond, 32 + 23);
1194    emitNegAbs12(i);
1195 }
1196 
1197 void
emitSLCT(const CmpInstruction * i)1198 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1199 {
1200    uint64_t op;
1201 
1202    switch (i->dType) {
1203    case TYPE_S32:
1204       op = HEX64(30000000, 00000023);
1205       break;
1206    case TYPE_U32:
1207       op = HEX64(30000000, 00000003);
1208       break;
1209    case TYPE_F32:
1210       op = HEX64(38000000, 00000000);
1211       break;
1212    default:
1213       assert(!"invalid type for SLCT");
1214       op = 0;
1215       break;
1216    }
1217    emitForm_A(i, op);
1218 
1219    CondCode cc = i->setCond;
1220 
1221    if (i->src(2).mod.neg())
1222       cc = reverseCondCode(cc);
1223 
1224    emitCondCode(cc, 32 + 23);
1225 
1226    if (i->ftz)
1227       code[0] |= 1 << 5;
1228 }
1229 
1230 static void
selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1231 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1232 {
1233    int loc = entry->loc;
1234    if (data.force_persample_interp)
1235       code[loc + 1] |= 1 << 20;
1236    else
1237       code[loc + 1] &= ~(1 << 20);
1238 }
1239 
emitSELP(const Instruction * i)1240 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1241 {
1242    emitForm_A(i, HEX64(20000000, 00000004));
1243 
1244    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1245       code[1] |= 1 << 20;
1246 
1247    if (i->subOp == 1) {
1248       addInterp(0, 0, selpFlip);
1249    }
1250 }
1251 
emitTEXBAR(const Instruction * i)1252 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1253 {
1254    code[0] = 0x00000006 | (i->subOp << 26);
1255    code[1] = 0xf0000000;
1256    emitPredicate(i);
1257    emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1258 }
1259 
emitTEXCSAA(const TexInstruction * i)1260 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1261 {
1262    code[0] = 0x00000086;
1263    code[1] = 0xd0000000;
1264 
1265    code[1] |= i->tex.r;
1266    code[1] |= i->tex.s << 8;
1267 
1268    if (i->tex.liveOnly)
1269       code[0] |= 1 << 9;
1270 
1271    defId(i->def(0), 14);
1272    srcId(i->src(0), 20);
1273 }
1274 
1275 static inline bool
isNextIndependentTex(const TexInstruction * i)1276 isNextIndependentTex(const TexInstruction *i)
1277 {
1278    if (!i->next || !isTextureOp(i->next->op))
1279       return false;
1280    if (i->getDef(0)->interfers(i->next->getSrc(0)))
1281       return false;
1282    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1283 }
1284 
1285 void
emitTEX(const TexInstruction * i)1286 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1287 {
1288    code[0] = 0x00000006;
1289 
1290    if (isNextIndependentTex(i))
1291       code[0] |= 0x080; // t mode
1292    else
1293       code[0] |= 0x100; // p mode
1294 
1295    if (i->tex.liveOnly)
1296       code[0] |= 1 << 9;
1297 
1298    switch (i->op) {
1299    case OP_TEX: code[1] = 0x80000000; break;
1300    case OP_TXB: code[1] = 0x84000000; break;
1301    case OP_TXL: code[1] = 0x86000000; break;
1302    case OP_TXF: code[1] = 0x90000000; break;
1303    case OP_TXG: code[1] = 0xa0000000; break;
1304    case OP_TXLQ: code[1] = 0xb0000000; break;
1305    case OP_TXD: code[1] = 0xe0000000; break;
1306    default:
1307       assert(!"invalid texture op");
1308       break;
1309    }
1310    if (i->op == OP_TXF) {
1311       if (!i->tex.levelZero)
1312          code[1] |= 0x02000000;
1313    } else
1314    if (i->tex.levelZero) {
1315       code[1] |= 0x02000000;
1316    }
1317 
1318    if (i->op != OP_TXD && i->tex.derivAll)
1319       code[1] |= 1 << 13;
1320 
1321    defId(i->def(0), 14);
1322    srcId(i->src(0), 20);
1323 
1324    emitPredicate(i);
1325 
1326    if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1327 
1328    code[1] |= i->tex.mask << 14;
1329 
1330    code[1] |= i->tex.r;
1331    code[1] |= i->tex.s << 8;
1332    if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1333       code[1] |= 1 << 18; // in 1st source (with array index)
1334 
1335    // texture target:
1336    code[1] |= (i->tex.target.getDim() - 1) << 20;
1337    if (i->tex.target.isCube())
1338       code[1] += 2 << 20;
1339    if (i->tex.target.isArray())
1340       code[1] |= 1 << 19;
1341    if (i->tex.target.isShadow())
1342       code[1] |= 1 << 24;
1343 
1344    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1345 
1346    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1347       // lzero
1348       if (i->op == OP_TXL)
1349          code[1] &= ~(1 << 26);
1350       else
1351       if (i->op == OP_TXF)
1352          code[1] &= ~(1 << 25);
1353    }
1354    if (i->tex.target == TEX_TARGET_2D_MS ||
1355        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1356       code[1] |= 1 << 23;
1357 
1358    if (i->tex.useOffsets == 1)
1359       code[1] |= 1 << 22;
1360    if (i->tex.useOffsets == 4)
1361       code[1] |= 1 << 23;
1362 
1363    srcId(i, src1, 26);
1364 }
1365 
1366 void
emitTXQ(const TexInstruction * i)1367 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1368 {
1369    code[0] = 0x00000086;
1370    code[1] = 0xc0000000;
1371 
1372    switch (i->tex.query) {
1373    case TXQ_DIMS:            code[1] |= 0 << 22; break;
1374    case TXQ_TYPE:            code[1] |= 1 << 22; break;
1375    case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1376    case TXQ_FILTER:          code[1] |= 3 << 22; break;
1377    case TXQ_LOD:             code[1] |= 4 << 22; break;
1378    case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
1379    default:
1380       assert(!"invalid texture query");
1381       break;
1382    }
1383 
1384    code[1] |= i->tex.mask << 14;
1385 
1386    code[1] |= i->tex.r;
1387    code[1] |= i->tex.s << 8;
1388    if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1389       code[1] |= 1 << 18;
1390 
1391    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1392 
1393    defId(i->def(0), 14);
1394    srcId(i->src(0), 20);
1395    srcId(i, src1, 26);
1396 
1397    emitPredicate(i);
1398 }
1399 
1400 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1401 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1402 {
1403    code[0] = 0x00000200 | (laneMask << 6); // dall
1404    code[1] = 0x48000000 | qOp;
1405 
1406    defId(i->def(0), 14);
1407    srcId(i->src(0), 20);
1408    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
1409 
1410    emitPredicate(i);
1411 }
1412 
1413 void
emitFlow(const Instruction * i)1414 CodeEmitterNVC0::emitFlow(const Instruction *i)
1415 {
1416    const FlowInstruction *f = i->asFlow();
1417 
1418    unsigned mask; // bit 0: predicate, bit 1: target
1419 
1420    code[0] = 0x00000007;
1421 
1422    switch (i->op) {
1423    case OP_BRA:
1424       code[1] = f->absolute ? 0x00000000 : 0x40000000;
1425       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1426          code[0] |= 0x4000;
1427       mask = 3;
1428       break;
1429    case OP_CALL:
1430       code[1] = f->absolute ? 0x10000000 : 0x50000000;
1431       if (f->indirect)
1432          code[0] |= 0x4000; // indirect calls always use c[] source
1433       mask = 2;
1434       break;
1435 
1436    case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
1437    case OP_RET:     code[1] = 0x90000000; mask = 1; break;
1438    case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1439    case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
1440    case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
1441 
1442    case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
1443    case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1444    case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
1445    case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
1446 
1447    case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
1448    case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1449    case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
1450    default:
1451       assert(!"invalid flow operation");
1452       return;
1453    }
1454 
1455    if (mask & 1) {
1456       emitPredicate(i);
1457       if (i->flagsSrc < 0)
1458          code[0] |= 0x1e0;
1459    }
1460 
1461    if (!f)
1462       return;
1463 
1464    if (f->allWarp)
1465       code[0] |= 1 << 15;
1466    if (f->limit)
1467       code[0] |= 1 << 16;
1468 
1469    if (f->indirect) {
1470       if (code[0] & 0x4000) {
1471          assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1472          setAddress16(i->src(0));
1473          code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1474          if (f->op == OP_BRA)
1475             srcId(f->src(0).getIndirect(0), 20);
1476       } else {
1477          srcId(f, 0, 20);
1478       }
1479    }
1480 
1481    if (f->op == OP_CALL) {
1482       if (f->indirect) {
1483          // nothing
1484       } else
1485       if (f->builtin) {
1486          assert(f->absolute);
1487          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1488          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1489          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1490       } else {
1491          assert(!f->absolute);
1492          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1493          code[0] |= (pcRel & 0x3f) << 26;
1494          code[1] |= (pcRel >> 6) & 0x3ffff;
1495       }
1496    } else
1497    if (mask & 2) {
1498       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1499       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1500          pcRel += 8;
1501       // currently we don't want absolute branches
1502       assert(!f->absolute);
1503       code[0] |= (pcRel & 0x3f) << 26;
1504       code[1] |= (pcRel >> 6) & 0x3ffff;
1505    }
1506 }
1507 
1508 void
emitBAR(const Instruction * i)1509 CodeEmitterNVC0::emitBAR(const Instruction *i)
1510 {
1511    Value *rDef = NULL, *pDef = NULL;
1512 
1513    switch (i->subOp) {
1514    case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
1515    case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
1516    case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
1517    case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1518    default:
1519       code[0] = 0x04;
1520       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1521       break;
1522    }
1523    code[1] = 0x50000000;
1524 
1525    code[0] |= 63 << 14;
1526    code[1] |= 7 << 21;
1527 
1528    emitPredicate(i);
1529 
1530    // barrier id
1531    if (i->src(0).getFile() == FILE_GPR) {
1532       srcId(i->src(0), 20);
1533    } else {
1534       ImmediateValue *imm = i->getSrc(0)->asImm();
1535       assert(imm);
1536       code[0] |= imm->reg.data.u32 << 20;
1537       code[1] |= 0x8000;
1538    }
1539 
1540    // thread count
1541    if (i->src(1).getFile() == FILE_GPR) {
1542       srcId(i->src(1), 26);
1543    } else {
1544       ImmediateValue *imm = i->getSrc(1)->asImm();
1545       assert(imm);
1546       assert(imm->reg.data.u32 <= 0xfff);
1547       code[0] |= imm->reg.data.u32 << 26;
1548       code[1] |= imm->reg.data.u32 >> 6;
1549       code[1] |= 0x4000;
1550    }
1551 
1552    if (i->srcExists(2) && (i->predSrc != 2)) {
1553       srcId(i->src(2), 32 + 17);
1554       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1555          code[1] |= 1 << 20;
1556    } else {
1557       code[1] |= 7 << 17;
1558    }
1559 
1560    if (i->defExists(0)) {
1561       if (i->def(0).getFile() == FILE_GPR)
1562          rDef = i->getDef(0);
1563       else
1564          pDef = i->getDef(0);
1565 
1566       if (i->defExists(1)) {
1567          if (i->def(1).getFile() == FILE_GPR)
1568             rDef = i->getDef(1);
1569          else
1570             pDef = i->getDef(1);
1571       }
1572    }
1573    if (rDef) {
1574       code[0] &= ~(63 << 14);
1575       defId(rDef, 14);
1576    }
1577    if (pDef) {
1578       code[1] &= ~(7 << 21);
1579       defId(pDef, 32 + 21);
1580    }
1581 }
1582 
1583 void
emitAFETCH(const Instruction * i)1584 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1585 {
1586    code[0] = 0x00000006;
1587    code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1588 
1589    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1590       code[0] |= 0x200;
1591 
1592    emitPredicate(i);
1593 
1594    defId(i->def(0), 14);
1595    srcId(i->src(0).getIndirect(0), 20);
1596 }
1597 
1598 void
emitPFETCH(const Instruction * i)1599 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1600 {
1601    uint32_t prim = i->src(0).get()->reg.data.u32;
1602 
1603    code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1604    code[1] = 0x00000000 | (prim >> 6);
1605 
1606    emitPredicate(i);
1607 
1608    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1609 
1610    defId(i->def(0), 14);
1611    srcId(i, src1, 20);
1612 }
1613 
1614 void
emitVFETCH(const Instruction * i)1615 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1616 {
1617    code[0] = 0x00000006;
1618    code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1619 
1620    if (i->perPatch)
1621       code[0] |= 0x100;
1622    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1623       code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1624 
1625    emitPredicate(i);
1626 
1627    code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1628 
1629    defId(i->def(0), 14);
1630    srcId(i->src(0).getIndirect(0), 20);
1631    srcId(i->src(0).getIndirect(1), 26); // vertex address
1632 }
1633 
1634 void
emitEXPORT(const Instruction * i)1635 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1636 {
1637    unsigned int size = typeSizeof(i->dType);
1638 
1639    code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1640    code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1641 
1642    assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1643 
1644    if (i->perPatch)
1645       code[0] |= 0x100;
1646 
1647    emitPredicate(i);
1648 
1649    assert(i->src(1).getFile() == FILE_GPR);
1650 
1651    srcId(i->src(0).getIndirect(0), 20);
1652    srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1653    srcId(i->src(1), 26);
1654 }
1655 
1656 void
emitOUT(const Instruction * i)1657 CodeEmitterNVC0::emitOUT(const Instruction *i)
1658 {
1659    code[0] = 0x00000006;
1660    code[1] = 0x1c000000;
1661 
1662    emitPredicate(i);
1663 
1664    defId(i->def(0), 14); // new secret address
1665    srcId(i->src(0), 20); // old secret address, should be 0 initially
1666 
1667    assert(i->src(0).getFile() == FILE_GPR);
1668 
1669    if (i->op == OP_EMIT)
1670       code[0] |= 1 << 5;
1671    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1672       code[0] |= 1 << 6;
1673 
1674    // vertex stream
1675    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1676       unsigned int stream = SDATA(i->src(1)).u32;
1677       assert(stream < 4);
1678       if (stream) {
1679          code[1] |= 0xc000;
1680          code[0] |= stream << 26;
1681       } else {
1682          srcId(NULL, 26);
1683       }
1684    } else {
1685       srcId(i->src(1), 26);
1686    }
1687 }
1688 
1689 void
emitInterpMode(const Instruction * i)1690 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1691 {
1692    if (i->encSize == 8) {
1693       code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1694    } else {
1695       if (i->getInterpMode() == NV50_IR_INTERP_SC)
1696          code[0] |= 0x80;
1697       assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1698    }
1699 }
1700 
1701 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)1702 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1703 {
1704    int ipa = entry->ipa;
1705    int reg = entry->reg;
1706    int loc = entry->loc;
1707 
1708    if (data.flatshade &&
1709        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1710       ipa = NV50_IR_INTERP_FLAT;
1711       reg = 0x3f;
1712    } else if (data.force_persample_interp &&
1713               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1714               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1715       ipa |= NV50_IR_INTERP_CENTROID;
1716    }
1717    code[loc + 0] &= ~(0xf << 6);
1718    code[loc + 0] |= ipa << 6;
1719    code[loc + 0] &= ~(0x3f << 26);
1720    code[loc + 0] |= reg << 26;
1721 }
1722 
1723 void
emitINTERP(const Instruction * i)1724 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1725 {
1726    const uint32_t base = i->getSrc(0)->reg.data.offset;
1727 
1728    if (i->encSize == 8) {
1729       code[0] = 0x00000000;
1730       code[1] = 0xc0000000 | (base & 0xffff);
1731 
1732       if (i->saturate)
1733          code[0] |= 1 << 5;
1734 
1735       if (i->op == OP_PINTERP) {
1736          srcId(i->src(1), 26);
1737          addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
1738       } else {
1739          code[0] |= 0x3f << 26;
1740          addInterp(i->ipa, 0x3f, interpApply);
1741       }
1742 
1743       srcId(i->src(0).getIndirect(0), 20);
1744    } else {
1745       assert(i->op == OP_PINTERP);
1746       code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1747       srcId(i->src(1), 20);
1748    }
1749    emitInterpMode(i);
1750 
1751    emitPredicate(i);
1752    defId(i->def(0), 14);
1753 
1754    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1755       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1756    else
1757       code[1] |= 0x3f << 17;
1758 }
1759 
1760 void
emitLoadStoreType(DataType ty)1761 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1762 {
1763    uint8_t val;
1764 
1765    switch (ty) {
1766    case TYPE_U8:
1767       val = 0x00;
1768       break;
1769    case TYPE_S8:
1770       val = 0x20;
1771       break;
1772    case TYPE_F16:
1773    case TYPE_U16:
1774       val = 0x40;
1775       break;
1776    case TYPE_S16:
1777       val = 0x60;
1778       break;
1779    case TYPE_F32:
1780    case TYPE_U32:
1781    case TYPE_S32:
1782       val = 0x80;
1783       break;
1784    case TYPE_F64:
1785    case TYPE_U64:
1786    case TYPE_S64:
1787       val = 0xa0;
1788       break;
1789    case TYPE_B128:
1790       val = 0xc0;
1791       break;
1792    default:
1793       val = 0x80;
1794       assert(!"invalid type");
1795       break;
1796    }
1797    code[0] |= val;
1798 }
1799 
1800 void
emitCachingMode(CacheMode c)1801 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1802 {
1803    uint32_t val;
1804 
1805    switch (c) {
1806    case CACHE_CA:
1807 // case CACHE_WB:
1808       val = 0x000;
1809       break;
1810    case CACHE_CG:
1811       val = 0x100;
1812       break;
1813    case CACHE_CS:
1814       val = 0x200;
1815       break;
1816    case CACHE_CV:
1817 // case CACHE_WT:
1818       val = 0x300;
1819       break;
1820    default:
1821       val = 0;
1822       assert(!"invalid caching mode");
1823       break;
1824    }
1825    code[0] |= val;
1826 }
1827 
1828 static inline bool
uses64bitAddress(const Instruction * ldst)1829 uses64bitAddress(const Instruction *ldst)
1830 {
1831    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1832       ldst->src(0).isIndirect(0) &&
1833       ldst->getIndirect(0, 0)->reg.size == 8;
1834 }
1835 
1836 void
emitSTORE(const Instruction * i)1837 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1838 {
1839    uint32_t opc;
1840 
1841    switch (i->src(0).getFile()) {
1842    case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1843    case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
1844    case FILE_MEMORY_SHARED:
1845       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1846          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1847             opc = 0xb8000000;
1848          else
1849             opc = 0xcc000000;
1850       } else {
1851          opc = 0xc9000000;
1852       }
1853       break;
1854    default:
1855       assert(!"invalid memory file");
1856       opc = 0;
1857       break;
1858    }
1859    code[0] = 0x00000005;
1860    code[1] = opc;
1861 
1862    if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
1863       // Unlocked store on shared memory can fail.
1864       if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
1865           i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1866          assert(i->defExists(0));
1867          defId(i->def(0), 8);
1868       }
1869    }
1870 
1871    setAddressByFile(i->src(0));
1872    srcId(i->src(1), 14);
1873    srcId(i->src(0).getIndirect(0), 20);
1874    if (uses64bitAddress(i))
1875       code[1] |= 1 << 26;
1876 
1877    emitPredicate(i);
1878 
1879    emitLoadStoreType(i->dType);
1880    emitCachingMode(i->cache);
1881 }
1882 
1883 void
emitLOAD(const Instruction * i)1884 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1885 {
1886    uint32_t opc;
1887 
1888    code[0] = 0x00000005;
1889 
1890    switch (i->src(0).getFile()) {
1891    case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1892    case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
1893    case FILE_MEMORY_SHARED:
1894       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1895          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1896             opc = 0xa8000000;
1897          else
1898             opc = 0xc4000000;
1899       } else {
1900          opc = 0xc1000000;
1901       }
1902       break;
1903    case FILE_MEMORY_CONST:
1904       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1905          emitMOV(i); // not sure if this is any better
1906          return;
1907       }
1908       opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1909       code[0] = 0x00000006 | (i->subOp << 8);
1910       break;
1911    default:
1912       assert(!"invalid memory file");
1913       opc = 0;
1914       break;
1915    }
1916    code[1] = opc;
1917 
1918    int r = 0, p = -1;
1919    if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
1920       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1921          if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1922             r = -1;
1923             p = 0;
1924          } else if (i->defExists(1)) { // r, p
1925             p = 1;
1926          } else {
1927             assert(!"Expected predicate dest for load locked");
1928          }
1929       }
1930    }
1931 
1932    if (r >= 0)
1933       defId(i->def(r), 14);
1934    else
1935       code[0] |= 63 << 14;
1936 
1937    if (p >= 0) {
1938       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1939          defId(i->def(p), 8);
1940       else
1941          defId(i->def(p), 32 + 18);
1942    }
1943 
1944    setAddressByFile(i->src(0));
1945    srcId(i->src(0).getIndirect(0), 20);
1946    if (uses64bitAddress(i))
1947       code[1] |= 1 << 26;
1948 
1949    emitPredicate(i);
1950 
1951    emitLoadStoreType(i->dType);
1952    emitCachingMode(i->cache);
1953 }
1954 
1955 uint8_t
getSRegEncoding(const ValueRef & ref)1956 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1957 {
1958    switch (SDATA(ref).sv.sv) {
1959    case SV_LANEID:        return 0x00;
1960    case SV_PHYSID:        return 0x03;
1961    case SV_VERTEX_COUNT:  return 0x10;
1962    case SV_INVOCATION_ID: return 0x11;
1963    case SV_YDIR:          return 0x12;
1964    case SV_THREAD_KILL:   return 0x13;
1965    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
1966    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
1967    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
1968    case SV_GRIDID:        return 0x2c;
1969    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
1970    case SV_LBASE:         return 0x34;
1971    case SV_SBASE:         return 0x30;
1972    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
1973    default:
1974       assert(!"no sreg for system value");
1975       return 0;
1976    }
1977 }
1978 
1979 void
emitMOV(const Instruction * i)1980 CodeEmitterNVC0::emitMOV(const Instruction *i)
1981 {
1982    if (i->def(0).getFile() == FILE_PREDICATE) {
1983       if (i->src(0).getFile() == FILE_GPR) {
1984          code[0] = 0xfc01c003;
1985          code[1] = 0x1a8e0000;
1986          srcId(i->src(0), 20);
1987       } else {
1988          code[0] = 0x0001c004;
1989          code[1] = 0x0c0e0000;
1990          if (i->src(0).getFile() == FILE_IMMEDIATE) {
1991             code[0] |= 7 << 20;
1992             if (!i->getSrc(0)->reg.data.u32)
1993                code[0] |= 1 << 23;
1994          } else {
1995             srcId(i->src(0), 20);
1996          }
1997       }
1998       defId(i->def(0), 17);
1999       emitPredicate(i);
2000    } else
2001    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2002       uint8_t sr = getSRegEncoding(i->src(0));
2003 
2004       if (i->encSize == 8) {
2005          code[0] = 0x00000004 | (sr << 26);
2006          code[1] = 0x2c000000;
2007       } else {
2008          code[0] = 0x40000008 | (sr << 20);
2009       }
2010       defId(i->def(0), 14);
2011 
2012       emitPredicate(i);
2013    } else
2014    if (i->encSize == 8) {
2015       uint64_t opc;
2016 
2017       if (i->src(0).getFile() == FILE_IMMEDIATE)
2018          opc = HEX64(18000000, 000001e2);
2019       else
2020       if (i->src(0).getFile() == FILE_PREDICATE)
2021          opc = HEX64(080e0000, 1c000004);
2022       else
2023          opc = HEX64(28000000, 00000004);
2024 
2025       if (i->src(0).getFile() != FILE_PREDICATE)
2026          opc |= i->lanes << 5;
2027 
2028       emitForm_B(i, opc);
2029 
2030       // Explicitly emit the predicate source as emitForm_B skips it.
2031       if (i->src(0).getFile() == FILE_PREDICATE)
2032          srcId(i->src(0), 20);
2033    } else {
2034       uint32_t imm;
2035 
2036       if (i->src(0).getFile() == FILE_IMMEDIATE) {
2037          imm = SDATA(i->src(0)).u32;
2038          if (imm & 0xfff00000) {
2039             assert(!(imm & 0x000fffff));
2040             code[0] = 0x00000318 | imm;
2041          } else {
2042             assert(imm < 0x800 || ((int32_t)imm >= -0x800));
2043             code[0] = 0x00000118 | (imm << 20);
2044          }
2045       } else {
2046          code[0] = 0x0028;
2047          emitShortSrc2(i->src(0));
2048       }
2049       defId(i->def(0), 14);
2050 
2051       emitPredicate(i);
2052    }
2053 }
2054 
2055 void
emitATOM(const Instruction * i)2056 CodeEmitterNVC0::emitATOM(const Instruction *i)
2057 {
2058    const bool hasDst = i->defExists(0);
2059    const bool casOrExch =
2060       i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
2061       i->subOp == NV50_IR_SUBOP_ATOM_CAS;
2062 
2063    if (i->dType == TYPE_U64) {
2064       switch (i->subOp) {
2065       case NV50_IR_SUBOP_ATOM_ADD:
2066          code[0] = 0x205;
2067          if (hasDst)
2068             code[1] = 0x507e0000;
2069          else
2070             code[1] = 0x10000000;
2071          break;
2072       case NV50_IR_SUBOP_ATOM_EXCH:
2073          code[0] = 0x305;
2074          code[1] = 0x507e0000;
2075          break;
2076       case NV50_IR_SUBOP_ATOM_CAS:
2077          code[0] = 0x325;
2078          code[1] = 0x50000000;
2079          break;
2080       default:
2081          assert(!"invalid u64 red op");
2082          break;
2083       }
2084    } else
2085    if (i->dType == TYPE_U32) {
2086       switch (i->subOp) {
2087       case NV50_IR_SUBOP_ATOM_EXCH:
2088          code[0] = 0x105;
2089          code[1] = 0x507e0000;
2090          break;
2091       case NV50_IR_SUBOP_ATOM_CAS:
2092          code[0] = 0x125;
2093          code[1] = 0x50000000;
2094          break;
2095       default:
2096          code[0] = 0x5 | (i->subOp << 5);
2097          if (hasDst)
2098             code[1] = 0x507e0000;
2099          else
2100             code[1] = 0x10000000;
2101          break;
2102       }
2103    } else
2104    if (i->dType == TYPE_S32) {
2105       assert(i->subOp <= 2);
2106       code[0] = 0x205 | (i->subOp << 5);
2107       if (hasDst)
2108          code[1] = 0x587e0000;
2109       else
2110          code[1] = 0x18000000;
2111    } else
2112    if (i->dType == TYPE_F32) {
2113       assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
2114       code[0] = 0x205;
2115       if (hasDst)
2116          code[1] = 0x687e0000;
2117       else
2118          code[1] = 0x28000000;
2119    }
2120 
2121    emitPredicate(i);
2122 
2123    srcId(i->src(1), 14);
2124 
2125    if (hasDst)
2126       defId(i->def(0), 32 + 11);
2127    else
2128    if (casOrExch)
2129       code[1] |= 63 << 11;
2130 
2131    if (hasDst || casOrExch) {
2132       const int32_t offset = SDATA(i->src(0)).offset;
2133       assert(offset < 0x80000 && offset >= -0x80000);
2134       code[0] |= offset << 26;
2135       code[1] |= (offset & 0x1ffc0) >> 6;
2136       code[1] |= (offset & 0xe0000) << 6;
2137    } else {
2138       srcAddr32(i->src(0), 26, 0);
2139    }
2140    if (i->getIndirect(0, 0)) {
2141       srcId(i->getIndirect(0, 0), 20);
2142       if (i->getIndirect(0, 0)->reg.size == 8)
2143          code[1] |= 1 << 26;
2144    } else {
2145       code[0] |= 63 << 20;
2146    }
2147 
2148    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2149       assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
2150       code[1] |= (SDATA(i->src(1)).id + 1) << 17;
2151    }
2152 }
2153 
2154 void
emitMEMBAR(const Instruction * i)2155 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
2156 {
2157    switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
2158    case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
2159    case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
2160    default:
2161       code[0] = 0x45;
2162       assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
2163       break;
2164    }
2165    code[1] = 0xe0000000;
2166 
2167    emitPredicate(i);
2168 }
2169 
2170 void
emitCCTL(const Instruction * i)2171 CodeEmitterNVC0::emitCCTL(const Instruction *i)
2172 {
2173    code[0] = 0x00000005 | (i->subOp << 5);
2174 
2175    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2176       code[1] = 0x98000000;
2177       srcAddr32(i->src(0), 28, 2);
2178    } else {
2179       code[1] = 0xd0000000;
2180       setAddress24(i->src(0));
2181    }
2182    if (uses64bitAddress(i))
2183       code[1] |= 1 << 26;
2184    srcId(i->src(0).getIndirect(0), 20);
2185 
2186    emitPredicate(i);
2187 
2188    defId(i, 0, 14);
2189 }
2190 
2191 void
emitSUCLAMPMode(uint16_t subOp)2192 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2193 {
2194    uint8_t m;
2195    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2196    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2197    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2198    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2199    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2200    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2201    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2202    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2203    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2204    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2205    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2206    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2207    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2208    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2209    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2210    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2211    default:
2212       return;
2213    }
2214    code[0] |= m << 5;
2215    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2216       code[1] |= 1 << 16;
2217 }
2218 
2219 void
emitSUCalc(Instruction * i)2220 CodeEmitterNVC0::emitSUCalc(Instruction *i)
2221 {
2222    ImmediateValue *imm = NULL;
2223    uint64_t opc;
2224 
2225    if (i->srcExists(2)) {
2226       imm = i->getSrc(2)->asImm();
2227       if (imm)
2228          i->setSrc(2, NULL); // special case, make emitForm_A not assert
2229    }
2230 
2231    switch (i->op) {
2232    case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2233    case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2234    case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2235    default:
2236       assert(0);
2237       return;
2238    }
2239    emitForm_A(i, opc);
2240 
2241    if (i->op == OP_SUCLAMP) {
2242       if (i->dType == TYPE_S32)
2243          code[0] |= 1 << 9;
2244       emitSUCLAMPMode(i->subOp);
2245    }
2246 
2247    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2248          code[1] |= 1 << 16;
2249 
2250    if (i->op != OP_SUEAU) {
2251       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2252          code[0] |= 63 << 14;
2253          code[1] |= i->getDef(0)->reg.data.id << 23;
2254       } else
2255       if (i->defExists(1)) { // r, p
2256          assert(i->def(1).getFile() == FILE_PREDICATE);
2257          code[1] |= i->getDef(1)->reg.data.id << 23;
2258       } else { // r, #
2259          code[1] |= 7 << 23;
2260       }
2261    }
2262    if (imm) {
2263       assert(i->op == OP_SUCLAMP);
2264       i->setSrc(2, imm);
2265       code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2266    }
2267 }
2268 
2269 void
emitSUGType(DataType ty)2270 CodeEmitterNVC0::emitSUGType(DataType ty)
2271 {
2272    switch (ty) {
2273    case TYPE_S32: code[1] |= 1 << 13; break;
2274    case TYPE_U8:  code[1] |= 2 << 13; break;
2275    case TYPE_S8:  code[1] |= 3 << 13; break;
2276    default:
2277       assert(ty == TYPE_U32);
2278       break;
2279    }
2280 }
2281 
2282 void
setSUConst16(const Instruction * i,const int s)2283 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2284 {
2285    const uint32_t offset = i->getSrc(s)->reg.data.offset;
2286 
2287    assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2288    assert(offset == (offset & 0xfffc));
2289 
2290    code[1] |= 1 << 21;
2291    code[0] |= offset << 24;
2292    code[1] |= offset >> 8;
2293    code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2294 }
2295 
2296 void
setSUPred(const Instruction * i,const int s)2297 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2298 {
2299    if (!i->srcExists(s) || (i->predSrc == s)) {
2300       code[1] |= 0x7 << 17;
2301    } else {
2302       if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2303          code[1] |= 1 << 20;
2304       srcId(i->src(s), 32 + 17);
2305    }
2306 }
2307 
2308 void
emitSULDGB(const TexInstruction * i)2309 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2310 {
2311    code[0] = 0x5;
2312    code[1] = 0xd4000000 | (i->subOp << 15);
2313 
2314    emitLoadStoreType(i->dType);
2315    emitSUGType(i->sType);
2316    emitCachingMode(i->cache);
2317 
2318    emitPredicate(i);
2319    defId(i->def(0), 14); // destination
2320    srcId(i->src(0), 20); // address
2321    // format
2322    if (i->src(1).getFile() == FILE_GPR)
2323       srcId(i->src(1), 26);
2324    else
2325       setSUConst16(i, 1);
2326    setSUPred(i, 2);
2327 }
2328 
2329 void
emitSUSTGx(const TexInstruction * i)2330 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2331 {
2332    code[0] = 0x5;
2333    code[1] = 0xdc000000 | (i->subOp << 15);
2334 
2335    if (i->op == OP_SUSTP)
2336       code[1] |= i->tex.mask << 22;
2337    else
2338       emitLoadStoreType(i->dType);
2339    emitSUGType(i->sType);
2340    emitCachingMode(i->cache);
2341 
2342    emitPredicate(i);
2343    srcId(i->src(0), 20); // address
2344    // format
2345    if (i->src(1).getFile() == FILE_GPR)
2346       srcId(i->src(1), 26);
2347    else
2348       setSUConst16(i, 1);
2349    srcId(i->src(3), 14); // values
2350    setSUPred(i, 2);
2351 }
2352 
2353 void
emitSUAddr(const TexInstruction * i)2354 CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
2355 {
2356    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2357 
2358    if (i->tex.rIndirectSrc < 0) {
2359       code[1] |= 0x00004000;
2360       code[0] |= i->tex.r << 26;
2361    } else {
2362       srcId(i, i->tex.rIndirectSrc, 26);
2363    }
2364 }
2365 
2366 void
emitSUDim(const TexInstruction * i)2367 CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
2368 {
2369    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2370 
2371    code[1] |= (i->tex.target.getDim() - 1) << 12;
2372    if (i->tex.target.isArray() || i->tex.target.isCube() ||
2373        i->tex.target.getDim() == 3) {
2374       // use e2d mode for 3-dim images, arrays and cubes.
2375       code[1] |= 3 << 12;
2376    }
2377 
2378    srcId(i->src(0), 20);
2379 }
2380 
2381 void
emitSULEA(const TexInstruction * i)2382 CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
2383 {
2384    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2385 
2386    code[0] = 0x5;
2387    code[1] = 0xf0000000;
2388 
2389    emitPredicate(i);
2390    emitLoadStoreType(i->sType);
2391 
2392    defId(i->def(0), 14);
2393 
2394    if (i->defExists(1)) {
2395       defId(i->def(1), 32 + 22);
2396    } else {
2397       code[1] |= 7 << 22;
2398    }
2399 
2400    emitSUAddr(i);
2401    emitSUDim(i);
2402 }
2403 
2404 void
emitSULDB(const TexInstruction * i)2405 CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
2406 {
2407    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2408 
2409    code[0] = 0x5;
2410    code[1] = 0xd4000000 | (i->subOp << 15);
2411 
2412    emitPredicate(i);
2413    emitLoadStoreType(i->dType);
2414 
2415    defId(i->def(0), 14);
2416 
2417    emitCachingMode(i->cache);
2418    emitSUAddr(i);
2419    emitSUDim(i);
2420 }
2421 
2422 void
emitSUSTx(const TexInstruction * i)2423 CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
2424 {
2425    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2426 
2427    code[0] = 0x5;
2428    code[1] = 0xdc000000 | (i->subOp << 15);
2429 
2430    if (i->op == OP_SUSTP)
2431       code[1] |= i->tex.mask << 17;
2432    else
2433       emitLoadStoreType(i->dType);
2434 
2435    emitPredicate(i);
2436 
2437    srcId(i->src(1), 14);
2438 
2439    emitCachingMode(i->cache);
2440    emitSUAddr(i);
2441    emitSUDim(i);
2442 }
2443 
2444 void
emitVectorSubOp(const Instruction * i)2445 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2446 {
2447    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2448    case 0:
2449       code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2450       code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
2451       code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
2452       code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2453       break;
2454    case 1:
2455       code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
2456       code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2457       code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
2458       code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
2459       code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
2460       code[1] |= (i->mask & 0x3) << 2;
2461       break;
2462    case 2:
2463       code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2464       code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2465       code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2466       code[1] |= (i->mask & 0x3) << 2;
2467       code[1] |= (i->mask & 0xc) << 21;
2468       break;
2469    default:
2470       assert(0);
2471       break;
2472    }
2473 }
2474 
2475 void
emitVSHL(const Instruction * i)2476 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2477 {
2478    uint64_t opc = 0x4;
2479 
2480    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2481    case 0: opc |= 0xe8ULL << 56; break;
2482    case 1: opc |= 0xb4ULL << 56; break;
2483    case 2: opc |= 0x94ULL << 56; break;
2484    default:
2485       assert(0);
2486       break;
2487    }
2488    if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2489       if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2490       if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2491    } else {
2492       if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2493       if (isSignedType(i->sType)) opc |= 1 << 6;
2494    }
2495    emitForm_A(i, opc);
2496    emitVectorSubOp(i);
2497 
2498    if (i->saturate)
2499       code[0] |= 1 << 9;
2500    if (i->flagsDef >= 0)
2501       code[1] |= 1 << 16;
2502 }
2503 
2504 void
emitPIXLD(const Instruction * i)2505 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2506 {
2507    assert(i->encSize == 8);
2508    emitForm_A(i, HEX64(10000000, 00000006));
2509    code[0] |= i->subOp << 5;
2510    code[1] |= 0x00e00000;
2511 }
2512 
2513 void
emitVOTE(const Instruction * i)2514 CodeEmitterNVC0::emitVOTE(const Instruction *i)
2515 {
2516    assert(i->src(0).getFile() == FILE_PREDICATE);
2517 
2518    code[0] = 0x00000004 | (i->subOp << 5);
2519    code[1] = 0x48000000;
2520 
2521    emitPredicate(i);
2522 
2523    unsigned rp = 0;
2524    for (int d = 0; i->defExists(d); d++) {
2525       if (i->def(d).getFile() == FILE_PREDICATE) {
2526          assert(!(rp & 2));
2527          rp |= 2;
2528          defId(i->def(d), 32 + 22);
2529       } else if (i->def(d).getFile() == FILE_GPR) {
2530          assert(!(rp & 1));
2531          rp |= 1;
2532          defId(i->def(d), 14);
2533       } else {
2534          assert(!"Unhandled def");
2535       }
2536    }
2537    if (!(rp & 1))
2538       code[0] |= 63 << 14;
2539    if (!(rp & 2))
2540       code[1] |= 7 << 22;
2541    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
2542       code[0] |= 1 << 23;
2543    srcId(i->src(0), 20);
2544 }
2545 
2546 bool
emitInstruction(Instruction * insn)2547 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2548 {
2549    unsigned int size = insn->encSize;
2550 
2551    if (writeIssueDelays && !(codeSize & 0x3f))
2552       size += 8;
2553 
2554    if (!insn->encSize) {
2555       ERROR("skipping unencodable instruction: "); insn->print();
2556       return false;
2557    } else
2558    if (codeSize + size > codeSizeLimit) {
2559       ERROR("code emitter output buffer too small\n");
2560       return false;
2561    }
2562 
2563    if (writeIssueDelays) {
2564       if (!(codeSize & 0x3f)) {
2565          code[0] = 0x00000007; // cf issue delay "instruction"
2566          code[1] = 0x20000000;
2567          code += 2;
2568          codeSize += 8;
2569       }
2570       const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2571       uint32_t *data = code - (id * 2 + 2);
2572       if (id <= 2) {
2573          data[0] |= insn->sched << (id * 8 + 4);
2574       } else
2575       if (id == 3) {
2576          data[0] |= insn->sched << 28;
2577          data[1] |= insn->sched >> 4;
2578       } else {
2579          data[1] |= insn->sched << ((id - 4) * 8 + 4);
2580       }
2581    }
2582 
2583    // assert that instructions with multiple defs don't corrupt registers
2584    for (int d = 0; insn->defExists(d); ++d)
2585       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2586 
2587    switch (insn->op) {
2588    case OP_MOV:
2589    case OP_RDSV:
2590       emitMOV(insn);
2591       break;
2592    case OP_NOP:
2593       break;
2594    case OP_LOAD:
2595       emitLOAD(insn);
2596       break;
2597    case OP_STORE:
2598       emitSTORE(insn);
2599       break;
2600    case OP_LINTERP:
2601    case OP_PINTERP:
2602       emitINTERP(insn);
2603       break;
2604    case OP_VFETCH:
2605       emitVFETCH(insn);
2606       break;
2607    case OP_EXPORT:
2608       emitEXPORT(insn);
2609       break;
2610    case OP_PFETCH:
2611       emitPFETCH(insn);
2612       break;
2613    case OP_AFETCH:
2614       emitAFETCH(insn);
2615       break;
2616    case OP_EMIT:
2617    case OP_RESTART:
2618       emitOUT(insn);
2619       break;
2620    case OP_ADD:
2621    case OP_SUB:
2622       if (insn->dType == TYPE_F64)
2623          emitDADD(insn);
2624       else if (isFloatType(insn->dType))
2625          emitFADD(insn);
2626       else
2627          emitUADD(insn);
2628       break;
2629    case OP_MUL:
2630       if (insn->dType == TYPE_F64)
2631          emitDMUL(insn);
2632       else if (isFloatType(insn->dType))
2633          emitFMUL(insn);
2634       else
2635          emitUMUL(insn);
2636       break;
2637    case OP_MAD:
2638    case OP_FMA:
2639       if (insn->dType == TYPE_F64)
2640          emitDMAD(insn);
2641       else if (isFloatType(insn->dType))
2642          emitFMAD(insn);
2643       else
2644          emitIMAD(insn);
2645       break;
2646    case OP_SAD:
2647       emitISAD(insn);
2648       break;
2649    case OP_SHLADD:
2650       emitSHLADD(insn);
2651       break;
2652    case OP_NOT:
2653       emitNOT(insn);
2654       break;
2655    case OP_AND:
2656       emitLogicOp(insn, 0);
2657       break;
2658    case OP_OR:
2659       emitLogicOp(insn, 1);
2660       break;
2661    case OP_XOR:
2662       emitLogicOp(insn, 2);
2663       break;
2664    case OP_SHL:
2665    case OP_SHR:
2666       emitShift(insn);
2667       break;
2668    case OP_SET:
2669    case OP_SET_AND:
2670    case OP_SET_OR:
2671    case OP_SET_XOR:
2672       emitSET(insn->asCmp());
2673       break;
2674    case OP_SELP:
2675       emitSELP(insn);
2676       break;
2677    case OP_SLCT:
2678       emitSLCT(insn->asCmp());
2679       break;
2680    case OP_MIN:
2681    case OP_MAX:
2682       emitMINMAX(insn);
2683       break;
2684    case OP_ABS:
2685    case OP_NEG:
2686    case OP_CEIL:
2687    case OP_FLOOR:
2688    case OP_TRUNC:
2689    case OP_SAT:
2690       emitCVT(insn);
2691       break;
2692    case OP_CVT:
2693       if (insn->def(0).getFile() == FILE_PREDICATE ||
2694           insn->src(0).getFile() == FILE_PREDICATE)
2695          emitMOV(insn);
2696       else
2697          emitCVT(insn);
2698       break;
2699    case OP_RSQ:
2700       emitSFnOp(insn, 5 + 2 * insn->subOp);
2701       break;
2702    case OP_RCP:
2703       emitSFnOp(insn, 4 + 2 * insn->subOp);
2704       break;
2705    case OP_LG2:
2706       emitSFnOp(insn, 3);
2707       break;
2708    case OP_EX2:
2709       emitSFnOp(insn, 2);
2710       break;
2711    case OP_SIN:
2712       emitSFnOp(insn, 1);
2713       break;
2714    case OP_COS:
2715       emitSFnOp(insn, 0);
2716       break;
2717    case OP_PRESIN:
2718    case OP_PREEX2:
2719       emitPreOp(insn);
2720       break;
2721    case OP_TEX:
2722    case OP_TXB:
2723    case OP_TXL:
2724    case OP_TXD:
2725    case OP_TXF:
2726    case OP_TXG:
2727    case OP_TXLQ:
2728       emitTEX(insn->asTex());
2729       break;
2730    case OP_TXQ:
2731       emitTXQ(insn->asTex());
2732       break;
2733    case OP_TEXBAR:
2734       emitTEXBAR(insn);
2735       break;
2736    case OP_SUBFM:
2737    case OP_SUCLAMP:
2738    case OP_SUEAU:
2739       emitSUCalc(insn);
2740       break;
2741    case OP_MADSP:
2742       emitMADSP(insn);
2743       break;
2744    case OP_SULDB:
2745       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2746          emitSULDGB(insn->asTex());
2747       else
2748          emitSULDB(insn->asTex());
2749       break;
2750    case OP_SUSTB:
2751    case OP_SUSTP:
2752       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2753          emitSUSTGx(insn->asTex());
2754       else
2755          emitSUSTx(insn->asTex());
2756       break;
2757    case OP_SULEA:
2758       emitSULEA(insn->asTex());
2759       break;
2760    case OP_ATOM:
2761       emitATOM(insn);
2762       break;
2763    case OP_BRA:
2764    case OP_CALL:
2765    case OP_PRERET:
2766    case OP_RET:
2767    case OP_DISCARD:
2768    case OP_EXIT:
2769    case OP_PRECONT:
2770    case OP_CONT:
2771    case OP_PREBREAK:
2772    case OP_BREAK:
2773    case OP_JOINAT:
2774    case OP_BRKPT:
2775    case OP_QUADON:
2776    case OP_QUADPOP:
2777       emitFlow(insn);
2778       break;
2779    case OP_QUADOP:
2780       emitQUADOP(insn, insn->subOp, insn->lanes);
2781       break;
2782    case OP_DFDX:
2783       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2784       break;
2785    case OP_DFDY:
2786       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2787       break;
2788    case OP_POPCNT:
2789       emitPOPC(insn);
2790       break;
2791    case OP_INSBF:
2792       emitINSBF(insn);
2793       break;
2794    case OP_EXTBF:
2795       emitEXTBF(insn);
2796       break;
2797    case OP_BFIND:
2798       emitBFIND(insn);
2799       break;
2800    case OP_PERMT:
2801       emitPERMT(insn);
2802       break;
2803    case OP_JOIN:
2804       emitNOP(insn);
2805       insn->join = 1;
2806       break;
2807    case OP_BAR:
2808       emitBAR(insn);
2809       break;
2810    case OP_MEMBAR:
2811       emitMEMBAR(insn);
2812       break;
2813    case OP_CCTL:
2814       emitCCTL(insn);
2815       break;
2816    case OP_VSHL:
2817       emitVSHL(insn);
2818       break;
2819    case OP_PIXLD:
2820       emitPIXLD(insn);
2821       break;
2822    case OP_VOTE:
2823       emitVOTE(insn);
2824       break;
2825    case OP_PHI:
2826    case OP_UNION:
2827    case OP_CONSTRAINT:
2828       ERROR("operation should have been eliminated");
2829       return false;
2830    case OP_EXP:
2831    case OP_LOG:
2832    case OP_SQRT:
2833    case OP_POW:
2834       ERROR("operation should have been lowered\n");
2835       return false;
2836    default:
2837       ERROR("unknown op: %u\n", insn->op);
2838       return false;
2839    }
2840 
2841    if (insn->join) {
2842       code[0] |= 0x10;
2843       assert(insn->encSize == 8);
2844    }
2845 
2846    code += insn->encSize / 4;
2847    codeSize += insn->encSize;
2848    return true;
2849 }
2850 
2851 uint32_t
getMinEncodingSize(const Instruction * i) const2852 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2853 {
2854    const Target::OpInfo &info = targ->getOpInfo(i);
2855 
2856    if (writeIssueDelays || info.minEncSize == 8 || 1)
2857       return 8;
2858 
2859    if (i->ftz || i->saturate || i->join)
2860       return 8;
2861    if (i->rnd != ROUND_N)
2862       return 8;
2863    if (i->predSrc >= 0 && i->op == OP_MAD)
2864       return 8;
2865 
2866    if (i->op == OP_PINTERP) {
2867       if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2868          return 8;
2869    } else
2870    if (i->op == OP_MOV && i->lanes != 0xf) {
2871       return 8;
2872    }
2873 
2874    for (int s = 0; i->srcExists(s); ++s) {
2875       if (i->src(s).isIndirect(0))
2876          return 8;
2877 
2878       if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2879          if (SDATA(i->src(s)).offset >= 0x100)
2880             return 8;
2881          if (i->getSrc(s)->reg.fileIndex > 1 &&
2882              i->getSrc(s)->reg.fileIndex != 16)
2883              return 8;
2884       } else
2885       if (i->src(s).getFile() == FILE_IMMEDIATE) {
2886          if (i->dType == TYPE_F32) {
2887             if (SDATA(i->src(s)).u32 >= 0x100)
2888                return 8;
2889          } else {
2890             if (SDATA(i->src(s)).u32 > 0xff)
2891                return 8;
2892          }
2893       }
2894 
2895       if (i->op == OP_CVT)
2896          continue;
2897       if (i->src(s).mod != Modifier(0)) {
2898          if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2899             if (i->op != OP_RSQ)
2900                return 8;
2901          if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2902             if (i->op != OP_ADD || s != 0)
2903                return 8;
2904       }
2905    }
2906 
2907    return 4;
2908 }
2909 
2910 // Simplified, erring on safe side.
2911 class SchedDataCalculator : public Pass
2912 {
2913 public:
SchedDataCalculator(const Target * targ)2914    SchedDataCalculator(const Target *targ) : targ(targ) { }
2915 
2916 private:
2917    struct RegScores
2918    {
2919       struct Resource {
2920          int st[DATA_FILE_COUNT]; // LD to LD delay 3
2921          int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2922          int tex; // TEX to non-TEX delay 17 (0x11)
2923          int sfu; // SFU to SFU delay 3 (except PRE-ops)
2924          int imul; // integer MUL to MUL delay 3
2925       } res;
2926       struct ScoreData {
2927          int r[256];
2928          int p[8];
2929          int c;
2930       } rd, wr;
2931       int base;
2932       int regs;
2933 
rebasenv50_ir::SchedDataCalculator::RegScores2934       void rebase(const int base)
2935       {
2936          const int delta = this->base - base;
2937          if (!delta)
2938             return;
2939          this->base = 0;
2940 
2941          for (int i = 0; i < regs; ++i) {
2942             rd.r[i] += delta;
2943             wr.r[i] += delta;
2944          }
2945          for (int i = 0; i < 8; ++i) {
2946             rd.p[i] += delta;
2947             wr.p[i] += delta;
2948          }
2949          rd.c += delta;
2950          wr.c += delta;
2951 
2952          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2953             res.ld[f] += delta;
2954             res.st[f] += delta;
2955          }
2956          res.sfu += delta;
2957          res.imul += delta;
2958          res.tex += delta;
2959       }
wipenv50_ir::SchedDataCalculator::RegScores2960       void wipe(int regs)
2961       {
2962          memset(&rd, 0, sizeof(rd));
2963          memset(&wr, 0, sizeof(wr));
2964          memset(&res, 0, sizeof(res));
2965          this->regs = regs;
2966       }
getLatestnv50_ir::SchedDataCalculator::RegScores2967       int getLatest(const ScoreData& d) const
2968       {
2969          int max = 0;
2970          for (int i = 0; i < regs; ++i)
2971             if (d.r[i] > max)
2972                max = d.r[i];
2973          for (int i = 0; i < 8; ++i)
2974             if (d.p[i] > max)
2975                max = d.p[i];
2976          if (d.c > max)
2977             max = d.c;
2978          return max;
2979       }
getLatestRdnv50_ir::SchedDataCalculator::RegScores2980       inline int getLatestRd() const
2981       {
2982          return getLatest(rd);
2983       }
getLatestWrnv50_ir::SchedDataCalculator::RegScores2984       inline int getLatestWr() const
2985       {
2986          return getLatest(wr);
2987       }
getLatestnv50_ir::SchedDataCalculator::RegScores2988       inline int getLatest() const
2989       {
2990          const int a = getLatestRd();
2991          const int b = getLatestWr();
2992 
2993          int max = MAX2(a, b);
2994          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2995             max = MAX2(res.ld[f], max);
2996             max = MAX2(res.st[f], max);
2997          }
2998          max = MAX2(res.sfu, max);
2999          max = MAX2(res.imul, max);
3000          max = MAX2(res.tex, max);
3001          return max;
3002       }
setMaxnv50_ir::SchedDataCalculator::RegScores3003       void setMax(const RegScores *that)
3004       {
3005          for (int i = 0; i < regs; ++i) {
3006             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3007             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3008          }
3009          for (int i = 0; i < 8; ++i) {
3010             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3011             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3012          }
3013          rd.c = MAX2(rd.c, that->rd.c);
3014          wr.c = MAX2(wr.c, that->wr.c);
3015 
3016          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3017             res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
3018             res.st[f] = MAX2(res.st[f], that->res.st[f]);
3019          }
3020          res.sfu = MAX2(res.sfu, that->res.sfu);
3021          res.imul = MAX2(res.imul, that->res.imul);
3022          res.tex = MAX2(res.tex, that->res.tex);
3023       }
printnv50_ir::SchedDataCalculator::RegScores3024       void print(int cycle)
3025       {
3026          for (int i = 0; i < regs; ++i) {
3027             if (rd.r[i] > cycle)
3028                INFO("rd $r%i @ %i\n", i, rd.r[i]);
3029             if (wr.r[i] > cycle)
3030                INFO("wr $r%i @ %i\n", i, wr.r[i]);
3031          }
3032          for (int i = 0; i < 8; ++i) {
3033             if (rd.p[i] > cycle)
3034                INFO("rd $p%i @ %i\n", i, rd.p[i]);
3035             if (wr.p[i] > cycle)
3036                INFO("wr $p%i @ %i\n", i, wr.p[i]);
3037          }
3038          if (rd.c > cycle)
3039             INFO("rd $c @ %i\n", rd.c);
3040          if (wr.c > cycle)
3041             INFO("wr $c @ %i\n", wr.c);
3042          if (res.sfu > cycle)
3043             INFO("sfu @ %i\n", res.sfu);
3044          if (res.imul > cycle)
3045             INFO("imul @ %i\n", res.imul);
3046          if (res.tex > cycle)
3047             INFO("tex @ %i\n", res.tex);
3048       }
3049    };
3050 
3051    RegScores *score; // for current BB
3052    std::vector<RegScores> scoreBoards;
3053    int prevData;
3054    operation prevOp;
3055 
3056    const Target *targ;
3057 
3058    bool visit(Function *);
3059    bool visit(BasicBlock *);
3060 
3061    void commitInsn(const Instruction *, int cycle);
3062    int calcDelay(const Instruction *, int cycle) const;
3063    void setDelay(Instruction *, int delay, Instruction *next);
3064 
3065    void recordRd(const Value *, const int ready);
3066    void recordWr(const Value *, const int ready);
3067    void checkRd(const Value *, int cycle, int& delay) const;
3068    void checkWr(const Value *, int cycle, int& delay) const;
3069 
3070    int getCycles(const Instruction *, int origDelay) const;
3071 };
3072 
3073 void
setDelay(Instruction * insn,int delay,Instruction * next)3074 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
3075 {
3076    if (insn->op == OP_EXIT || insn->op == OP_RET)
3077       delay = MAX2(delay, 14);
3078 
3079    if (insn->op == OP_TEXBAR) {
3080       // TODO: except if results not used before EXIT
3081       insn->sched = 0xc2;
3082    } else
3083    if (insn->op == OP_JOIN || insn->join) {
3084       insn->sched = 0x00;
3085    } else
3086    if (delay >= 0 || prevData == 0x04 ||
3087        !next || !targ->canDualIssue(insn, next)) {
3088       insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
3089       if (prevOp == OP_EXPORT)
3090          insn->sched |= 0x40;
3091       else
3092          insn->sched |= 0x20;
3093    } else {
3094       insn->sched = 0x04; // dual-issue
3095    }
3096 
3097    if (prevData != 0x04 || prevOp != OP_EXPORT)
3098       if (insn->sched != 0x04 || insn->op == OP_EXPORT)
3099          prevOp = insn->op;
3100 
3101    prevData = insn->sched;
3102 }
3103 
3104 int
getCycles(const Instruction * insn,int origDelay) const3105 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
3106 {
3107    if (insn->sched & 0x80) {
3108       int c = (insn->sched & 0x0f) * 2 + 1;
3109       if (insn->op == OP_TEXBAR && origDelay > 0)
3110          c += origDelay;
3111       return c;
3112    }
3113    if (insn->sched & 0x60)
3114       return (insn->sched & 0x1f) + 1;
3115    return (insn->sched == 0x04) ? 0 : 32;
3116 }
3117 
3118 bool
visit(Function * func)3119 SchedDataCalculator::visit(Function *func)
3120 {
3121    int regs = targ->getFileSize(FILE_GPR) + 1;
3122    scoreBoards.resize(func->cfg.getSize());
3123    for (size_t i = 0; i < scoreBoards.size(); ++i)
3124       scoreBoards[i].wipe(regs);
3125    return true;
3126 }
3127 
3128 bool
visit(BasicBlock * bb)3129 SchedDataCalculator::visit(BasicBlock *bb)
3130 {
3131    Instruction *insn;
3132    Instruction *next = NULL;
3133 
3134    int cycle = 0;
3135 
3136    prevData = 0x00;
3137    prevOp = OP_NOP;
3138    score = &scoreBoards.at(bb->getId());
3139 
3140    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
3141       // back branches will wait until all target dependencies are satisfied
3142       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
3143          continue;
3144       BasicBlock *in = BasicBlock::get(ei.getNode());
3145       if (in->getExit()) {
3146          if (prevData != 0x04)
3147             prevData = in->getExit()->sched;
3148          prevOp = in->getExit()->op;
3149       }
3150       score->setMax(&scoreBoards.at(in->getId()));
3151    }
3152    if (bb->cfg.incidentCount() > 1)
3153       prevOp = OP_NOP;
3154 
3155 #ifdef NVC0_DEBUG_SCHED_DATA
3156    INFO("=== BB:%i initial scores\n", bb->getId());
3157    score->print(cycle);
3158 #endif
3159 
3160    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
3161       next = insn->next;
3162 
3163       commitInsn(insn, cycle);
3164       int delay = calcDelay(next, cycle);
3165       setDelay(insn, delay, next);
3166       cycle += getCycles(insn, delay);
3167 
3168 #ifdef NVC0_DEBUG_SCHED_DATA
3169       INFO("cycle %i, sched %02x\n", cycle, insn->sched);
3170       insn->print();
3171       next->print();
3172 #endif
3173    }
3174    if (!insn)
3175       return true;
3176    commitInsn(insn, cycle);
3177 
3178    int bbDelay = -1;
3179 
3180    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
3181       BasicBlock *out = BasicBlock::get(ei.getNode());
3182 
3183       if (ei.getType() != Graph::Edge::BACK) {
3184          // only test the first instruction of the outgoing block
3185          next = out->getEntry();
3186          if (next)
3187             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
3188       } else {
3189          // wait until all dependencies are satisfied
3190          const int regsFree = score->getLatest();
3191          next = out->getFirst();
3192          for (int c = cycle; next && c < regsFree; next = next->next) {
3193             bbDelay = MAX2(bbDelay, calcDelay(next, c));
3194             c += getCycles(next, bbDelay);
3195          }
3196          next = NULL;
3197       }
3198    }
3199    if (bb->cfg.outgoingCount() != 1)
3200       next = NULL;
3201    setDelay(insn, bbDelay, next);
3202    cycle += getCycles(insn, bbDelay);
3203 
3204    score->rebase(cycle); // common base for initializing out blocks' scores
3205    return true;
3206 }
3207 
3208 #define NVE4_MAX_ISSUE_DELAY 0x1f
3209 int
calcDelay(const Instruction * insn,int cycle) const3210 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
3211 {
3212    int delay = 0, ready = cycle;
3213 
3214    for (int s = 0; insn->srcExists(s); ++s)
3215       checkRd(insn->getSrc(s), cycle, delay);
3216    // WAR & WAW don't seem to matter
3217    // for (int s = 0; insn->srcExists(s); ++s)
3218    //   recordRd(insn->getSrc(s), cycle);
3219 
3220    switch (Target::getOpClass(insn->op)) {
3221    case OPCLASS_SFU:
3222       ready = score->res.sfu;
3223       break;
3224    case OPCLASS_ARITH:
3225       if (insn->op == OP_MUL && !isFloatType(insn->dType))
3226          ready = score->res.imul;
3227       break;
3228    case OPCLASS_TEXTURE:
3229       ready = score->res.tex;
3230       break;
3231    case OPCLASS_LOAD:
3232       ready = score->res.ld[insn->src(0).getFile()];
3233       break;
3234    case OPCLASS_STORE:
3235       ready = score->res.st[insn->src(0).getFile()];
3236       break;
3237    default:
3238       break;
3239    }
3240    if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
3241       ready = MAX2(ready, score->res.tex);
3242 
3243    delay = MAX2(delay, ready - cycle);
3244 
3245    // if can issue next cycle, delay is 0, not 1
3246    return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
3247 }
3248 
3249 void
commitInsn(const Instruction * insn,int cycle)3250 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
3251 {
3252    const int ready = cycle + targ->getLatency(insn);
3253 
3254    for (int d = 0; insn->defExists(d); ++d)
3255       recordWr(insn->getDef(d), ready);
3256    // WAR & WAW don't seem to matter
3257    // for (int s = 0; insn->srcExists(s); ++s)
3258    //   recordRd(insn->getSrc(s), cycle);
3259 
3260    switch (Target::getOpClass(insn->op)) {
3261    case OPCLASS_SFU:
3262       score->res.sfu = cycle + 4;
3263       break;
3264    case OPCLASS_ARITH:
3265       if (insn->op == OP_MUL && !isFloatType(insn->dType))
3266          score->res.imul = cycle + 4;
3267       break;
3268    case OPCLASS_TEXTURE:
3269       score->res.tex = cycle + 18;
3270       break;
3271    case OPCLASS_LOAD:
3272       if (insn->src(0).getFile() == FILE_MEMORY_CONST)
3273          break;
3274       score->res.ld[insn->src(0).getFile()] = cycle + 4;
3275       score->res.st[insn->src(0).getFile()] = ready;
3276       break;
3277    case OPCLASS_STORE:
3278       score->res.st[insn->src(0).getFile()] = cycle + 4;
3279       score->res.ld[insn->src(0).getFile()] = ready;
3280       break;
3281    case OPCLASS_OTHER:
3282       if (insn->op == OP_TEXBAR)
3283          score->res.tex = cycle;
3284       break;
3285    default:
3286       break;
3287    }
3288 
3289 #ifdef NVC0_DEBUG_SCHED_DATA
3290    score->print(cycle);
3291 #endif
3292 }
3293 
3294 void
checkRd(const Value * v,int cycle,int & delay) const3295 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
3296 {
3297    int ready = cycle;
3298    int a, b;
3299 
3300    switch (v->reg.file) {
3301    case FILE_GPR:
3302       a = v->reg.data.id;
3303       b = a + v->reg.size / 4;
3304       for (int r = a; r < b; ++r)
3305          ready = MAX2(ready, score->rd.r[r]);
3306       break;
3307    case FILE_PREDICATE:
3308       ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3309       break;
3310    case FILE_FLAGS:
3311       ready = MAX2(ready, score->rd.c);
3312       break;
3313    case FILE_SHADER_INPUT:
3314    case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3315    case FILE_MEMORY_LOCAL:
3316    case FILE_MEMORY_CONST:
3317    case FILE_MEMORY_SHARED:
3318    case FILE_MEMORY_GLOBAL:
3319    case FILE_SYSTEM_VALUE:
3320       // TODO: any restrictions here ?
3321       break;
3322    case FILE_IMMEDIATE:
3323       break;
3324    default:
3325       assert(0);
3326       break;
3327    }
3328    if (cycle < ready)
3329       delay = MAX2(delay, ready - cycle);
3330 }
3331 
3332 void
checkWr(const Value * v,int cycle,int & delay) const3333 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3334 {
3335    int ready = cycle;
3336    int a, b;
3337 
3338    switch (v->reg.file) {
3339    case FILE_GPR:
3340       a = v->reg.data.id;
3341       b = a + v->reg.size / 4;
3342       for (int r = a; r < b; ++r)
3343          ready = MAX2(ready, score->wr.r[r]);
3344       break;
3345    case FILE_PREDICATE:
3346       ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3347       break;
3348    default:
3349       assert(v->reg.file == FILE_FLAGS);
3350       ready = MAX2(ready, score->wr.c);
3351       break;
3352    }
3353    if (cycle < ready)
3354       delay = MAX2(delay, ready - cycle);
3355 }
3356 
3357 void
recordWr(const Value * v,const int ready)3358 SchedDataCalculator::recordWr(const Value *v, const int ready)
3359 {
3360    int a = v->reg.data.id;
3361 
3362    if (v->reg.file == FILE_GPR) {
3363       int b = a + v->reg.size / 4;
3364       for (int r = a; r < b; ++r)
3365          score->rd.r[r] = ready;
3366    } else
3367    // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3368    if (v->reg.file == FILE_PREDICATE) {
3369       score->rd.p[a] = ready + 4;
3370    } else {
3371       assert(v->reg.file == FILE_FLAGS);
3372       score->rd.c = ready + 4;
3373    }
3374 }
3375 
3376 void
recordRd(const Value * v,const int ready)3377 SchedDataCalculator::recordRd(const Value *v, const int ready)
3378 {
3379    int a = v->reg.data.id;
3380 
3381    if (v->reg.file == FILE_GPR) {
3382       int b = a + v->reg.size / 4;
3383       for (int r = a; r < b; ++r)
3384          score->wr.r[r] = ready;
3385    } else
3386    if (v->reg.file == FILE_PREDICATE) {
3387       score->wr.p[a] = ready;
3388    } else
3389    if (v->reg.file == FILE_FLAGS) {
3390       score->wr.c = ready;
3391    }
3392 }
3393 
3394 bool
calculateSchedDataNVC0(const Target * targ,Function * func)3395 calculateSchedDataNVC0(const Target *targ, Function *func)
3396 {
3397    SchedDataCalculator sched(targ);
3398    return sched.run(func, true, true);
3399 }
3400 
3401 void
prepareEmission(Function * func)3402 CodeEmitterNVC0::prepareEmission(Function *func)
3403 {
3404    CodeEmitter::prepareEmission(func);
3405 
3406    if (targ->hasSWSched)
3407       calculateSchedDataNVC0(targ, func);
3408 }
3409 
CodeEmitterNVC0(const TargetNVC0 * target)3410 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3411    : CodeEmitter(target),
3412      targNVC0(target),
3413      writeIssueDelays(target->hasSWSched)
3414 {
3415    code = NULL;
3416    codeSize = codeSizeLimit = 0;
3417    relocInfo = NULL;
3418 }
3419 
3420 CodeEmitter *
createCodeEmitterNVC0(Program::Type type)3421 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3422 {
3423    CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3424    emit->setProgramType(type);
3425    return emit;
3426 }
3427 
3428 CodeEmitter *
getCodeEmitter(Program::Type type)3429 TargetNVC0::getCodeEmitter(Program::Type type)
3430 {
3431    if (chipset >= NVISA_GK20A_CHIPSET)
3432       return createCodeEmitterGK110(type);
3433    return createCodeEmitterNVC0(type);
3434 }
3435 
3436 } // namespace nv50_ir
3437