• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir_target_nvc0.h"
24 
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26 
27 namespace nv50_ir {
28 
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32    CodeEmitterGK110(const TargetNVC0 *);
33 
34    virtual bool emitInstruction(Instruction *);
35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
36    virtual void prepareEmission(Function *);
37 
setProgramType(Program::Type pType)38    inline void setProgramType(Program::Type pType) { progType = pType; }
39 
40 private:
41    const TargetNVC0 *targNVC0;
42 
43    Program::Type progType;
44 
45    const bool writeIssueDelays;
46 
47 private:
48    void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
49    void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
50    void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier);
51 
52    void emitPredicate(const Instruction *);
53 
54    void setCAddress14(const ValueRef&);
55    void setShortImmediate(const Instruction *, const int s);
56    void setImmediate32(const Instruction *, const int s, Modifier);
57    void setSUConst16(const Instruction *, const int s);
58 
59    void modNegAbsF32_3b(const Instruction *, const int s);
60 
61    void emitCondCode(CondCode cc, int pos, uint8_t mask);
62    void emitInterpMode(const Instruction *);
63    void emitLoadStoreType(DataType ty, const int pos);
64    void emitCachingMode(CacheMode c, const int pos);
65    void emitSUGType(DataType, const int pos);
66    void emitSUCachingMode(CacheMode c);
67 
68    inline uint8_t getSRegEncoding(const ValueRef&);
69 
70    void emitRoundMode(RoundMode, const int pos, const int rintPos);
71    void emitRoundModeF(RoundMode, const int pos);
72    void emitRoundModeI(RoundMode, const int pos);
73 
74    void emitNegAbs12(const Instruction *);
75 
76    void emitNOP(const Instruction *);
77 
78    void emitLOAD(const Instruction *);
79    void emitSTORE(const Instruction *);
80    void emitMOV(const Instruction *);
81    void emitATOM(const Instruction *);
82    void emitCCTL(const Instruction *);
83 
84    void emitINTERP(const Instruction *);
85    void emitAFETCH(const Instruction *);
86    void emitPFETCH(const Instruction *);
87    void emitVFETCH(const Instruction *);
88    void emitEXPORT(const Instruction *);
89    void emitOUT(const Instruction *);
90 
91    void emitUADD(const Instruction *);
92    void emitFADD(const Instruction *);
93    void emitDADD(const Instruction *);
94    void emitIMUL(const Instruction *);
95    void emitFMUL(const Instruction *);
96    void emitDMUL(const Instruction *);
97    void emitIMAD(const Instruction *);
98    void emitISAD(const Instruction *);
99    void emitSHLADD(const Instruction *);
100    void emitFMAD(const Instruction *);
101    void emitDMAD(const Instruction *);
102    void emitMADSP(const Instruction *i);
103 
104    void emitNOT(const Instruction *);
105    void emitLogicOp(const Instruction *, uint8_t subOp);
106    void emitPOPC(const Instruction *);
107    void emitINSBF(const Instruction *);
108    void emitEXTBF(const Instruction *);
109    void emitBFIND(const Instruction *);
110    void emitPERMT(const Instruction *);
111    void emitShift(const Instruction *);
112 
113    void emitSFnOp(const Instruction *, uint8_t subOp);
114 
115    void emitCVT(const Instruction *);
116    void emitMINMAX(const Instruction *);
117    void emitPreOp(const Instruction *);
118 
119    void emitSET(const CmpInstruction *);
120    void emitSLCT(const CmpInstruction *);
121    void emitSELP(const Instruction *);
122 
123    void emitTEXBAR(const Instruction *);
124    void emitTEX(const TexInstruction *);
125    void emitTEXCSAA(const TexInstruction *);
126    void emitTXQ(const TexInstruction *);
127 
128    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
129 
130    void emitPIXLD(const Instruction *);
131 
132    void emitBAR(const Instruction *);
133    void emitMEMBAR(const Instruction *);
134 
135    void emitFlow(const Instruction *);
136 
137    void emitVOTE(const Instruction *);
138 
139    void emitSULDGB(const TexInstruction *);
140    void emitSUSTGx(const TexInstruction *);
141    void emitSUCLAMPMode(uint16_t);
142    void emitSUCalc(Instruction *);
143 
144    void emitVSHL(const Instruction *);
145    void emitVectorSubOp(const Instruction *);
146 
147    inline void defId(const ValueDef&, const int pos);
148    inline void srcId(const ValueRef&, const int pos);
149    inline void srcId(const ValueRef *, const int pos);
150    inline void srcId(const Instruction *, int s, const int pos);
151 
152    inline void srcAddr32(const ValueRef&, const int pos); // address / 4
153 
154    inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
155 };
156 
157 #define GK110_GPR_ZERO 255
158 
159 #define NEG_(b, s) \
160    if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
161 #define ABS_(b, s) \
162    if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
163 
164 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))       \
165    code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
166 
167 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
168 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
169 
170 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
171 
172 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
173 
174 #define SDATA(a) ((a).rep()->reg.data)
175 #define DDATA(a) ((a).rep()->reg.data)
176 
srcId(const ValueRef & src,const int pos)177 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
178 {
179    code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
180 }
181 
srcId(const ValueRef * src,const int pos)182 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
183 {
184    code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
185 }
186 
srcId(const Instruction * insn,int s,int pos)187 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
188 {
189    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
190    code[pos / 32] |= r << (pos % 32);
191 }
192 
srcAddr32(const ValueRef & src,const int pos)193 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
194 {
195    code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
196 }
197 
defId(const ValueDef & def,const int pos)198 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
199 {
200    code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
201 }
202 
isLIMM(const ValueRef & ref,DataType ty,bool mod)203 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
204 {
205    const ImmediateValue *imm = ref.get()->asImm();
206 
207    return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
208 }
209 
210 void
emitRoundMode(RoundMode rnd,const int pos,const int rintPos)211 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
212 {
213    bool rint = false;
214    uint8_t n;
215 
216    switch (rnd) {
217    case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
218    case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
219    case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
220    default:
221       rint = rnd == ROUND_NI;
222       n = 0;
223       assert(rnd == ROUND_N || rnd == ROUND_NI);
224       break;
225    }
226    code[pos / 32] |= n << (pos % 32);
227    if (rint && rintPos >= 0)
228       code[rintPos / 32] |= 1 << (rintPos % 32);
229 }
230 
231 void
emitRoundModeF(RoundMode rnd,const int pos)232 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
233 {
234    uint8_t n;
235 
236    switch (rnd) {
237    case ROUND_M: n = 1; break;
238    case ROUND_P: n = 2; break;
239    case ROUND_Z: n = 3; break;
240    default:
241       n = 0;
242       assert(rnd == ROUND_N);
243       break;
244    }
245    code[pos / 32] |= n << (pos % 32);
246 }
247 
248 void
emitRoundModeI(RoundMode rnd,const int pos)249 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
250 {
251    uint8_t n;
252 
253    switch (rnd) {
254    case ROUND_MI: n = 1; break;
255    case ROUND_PI: n = 2; break;
256    case ROUND_ZI: n = 3; break;
257    default:
258       n = 0;
259       assert(rnd == ROUND_NI);
260       break;
261    }
262    code[pos / 32] |= n << (pos % 32);
263 }
264 
emitCondCode(CondCode cc,int pos,uint8_t mask)265 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
266 {
267    uint8_t n;
268 
269    switch (cc) {
270    case CC_FL:  n = 0x00; break;
271    case CC_LT:  n = 0x01; break;
272    case CC_EQ:  n = 0x02; break;
273    case CC_LE:  n = 0x03; break;
274    case CC_GT:  n = 0x04; break;
275    case CC_NE:  n = 0x05; break;
276    case CC_GE:  n = 0x06; break;
277    case CC_LTU: n = 0x09; break;
278    case CC_EQU: n = 0x0a; break;
279    case CC_LEU: n = 0x0b; break;
280    case CC_GTU: n = 0x0c; break;
281    case CC_NEU: n = 0x0d; break;
282    case CC_GEU: n = 0x0e; break;
283    case CC_TR:  n = 0x0f; break;
284    case CC_NO:  n = 0x10; break;
285    case CC_NC:  n = 0x11; break;
286    case CC_NS:  n = 0x12; break;
287    case CC_NA:  n = 0x13; break;
288    case CC_A:   n = 0x14; break;
289    case CC_S:   n = 0x15; break;
290    case CC_C:   n = 0x16; break;
291    case CC_O:   n = 0x17; break;
292    default:
293       n = 0;
294       assert(!"invalid condition code");
295       break;
296    }
297    code[pos / 32] |= (n & mask) << (pos % 32);
298 }
299 
300 void
emitPredicate(const Instruction * i)301 CodeEmitterGK110::emitPredicate(const Instruction *i)
302 {
303    if (i->predSrc >= 0) {
304       srcId(i->src(i->predSrc), 18);
305       if (i->cc == CC_NOT_P)
306          code[0] |= 8 << 18; // negate
307       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
308    } else {
309       code[0] |= 7 << 18;
310    }
311 }
312 
313 void
setCAddress14(const ValueRef & src)314 CodeEmitterGK110::setCAddress14(const ValueRef& src)
315 {
316    const Storage& res = src.get()->asSym()->reg;
317    const int32_t addr = res.data.offset / 4;
318 
319    code[0] |= (addr & 0x01ff) << 23;
320    code[1] |= (addr & 0x3e00) >> 9;
321    code[1] |= res.fileIndex << 5;
322 }
323 
324 void
setShortImmediate(const Instruction * i,const int s)325 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
326 {
327    const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
328    const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
329 
330    if (i->sType == TYPE_F32) {
331       assert(!(u32 & 0x00000fff));
332       code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
333       code[1] |= ((u32 & 0x7fe00000) >> 21);
334       code[1] |= ((u32 & 0x80000000) >> 4);
335    } else
336    if (i->sType == TYPE_F64) {
337       assert(!(u64 & 0x00000fffffffffffULL));
338       code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
339       code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
340       code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
341    } else {
342       assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
343       code[0] |= (u32 & 0x001ff) << 23;
344       code[1] |= (u32 & 0x7fe00) >> 9;
345       code[1] |= (u32 & 0x80000) << 8;
346    }
347 }
348 
349 void
setImmediate32(const Instruction * i,const int s,Modifier mod)350 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
351                                  Modifier mod)
352 {
353    uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
354 
355    if (mod) {
356       ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
357       mod.applyTo(imm);
358       u32 = imm.reg.data.u32;
359    }
360 
361    code[0] |= u32 << 23;
362    code[1] |= u32 >> 9;
363 }
364 
365 void
emitForm_L(const Instruction * i,uint32_t opc,uint8_t ctg,Modifier mod)366 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
367                              Modifier mod)
368 {
369    code[0] = ctg;
370    code[1] = opc << 20;
371 
372    emitPredicate(i);
373 
374    defId(i->def(0), 2);
375 
376    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
377       switch (i->src(s).getFile()) {
378       case FILE_GPR:
379          srcId(i->src(s), s ? 42 : 10);
380          break;
381       case FILE_IMMEDIATE:
382          setImmediate32(i, s, mod);
383          break;
384       default:
385          break;
386       }
387    }
388 }
389 
390 
391 void
emitForm_C(const Instruction * i,uint32_t opc,uint8_t ctg)392 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
393 {
394    code[0] = ctg;
395    code[1] = opc << 20;
396 
397    emitPredicate(i);
398 
399    defId(i->def(0), 2);
400 
401    switch (i->src(0).getFile()) {
402    case FILE_MEMORY_CONST:
403       code[1] |= 0x4 << 28;
404       setCAddress14(i->src(0));
405       break;
406    case FILE_GPR:
407       code[1] |= 0xc << 28;
408       srcId(i->src(0), 23);
409       break;
410    default:
411       assert(0);
412       break;
413    }
414 }
415 
416 // 0x2 for GPR, c[] and 0x1 for short immediate
417 void
emitForm_21(const Instruction * i,uint32_t opc2,uint32_t opc1)418 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
419                               uint32_t opc1)
420 {
421    const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
422 
423    int s1 = 23;
424    if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
425       s1 = 42;
426 
427    if (imm) {
428       code[0] = 0x1;
429       code[1] = opc1 << 20;
430    } else {
431       code[0] = 0x2;
432       code[1] = (0xc << 28) | (opc2 << 20);
433    }
434 
435    emitPredicate(i);
436 
437    defId(i->def(0), 2);
438 
439    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
440       switch (i->src(s).getFile()) {
441       case FILE_MEMORY_CONST:
442          code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
443          setCAddress14(i->src(s));
444          break;
445       case FILE_IMMEDIATE:
446          setShortImmediate(i, s);
447          break;
448       case FILE_GPR:
449          srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
450          break;
451       default:
452          if (i->op == OP_SELP) {
453             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
454             srcId(i->src(s), 42);
455          }
456          // ignore here, can be predicate or flags, but must not be address
457          break;
458       }
459    }
460    // 0x0 = invalid
461    // 0xc = rrr
462    // 0x8 = rrc
463    // 0x4 = rcr
464    assert(imm || (code[1] & (0xc << 28)));
465 }
466 
467 inline void
modNegAbsF32_3b(const Instruction * i,const int s)468 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
469 {
470    if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
471    if (i->src(s).mod.neg()) code[1] ^=  (1 << 27);
472 }
473 
474 void
emitNOP(const Instruction * i)475 CodeEmitterGK110::emitNOP(const Instruction *i)
476 {
477    code[0] = 0x00003c02;
478    code[1] = 0x85800000;
479 
480    if (i)
481       emitPredicate(i);
482    else
483       code[0] = 0x001c3c02;
484 }
485 
486 void
emitFMAD(const Instruction * i)487 CodeEmitterGK110::emitFMAD(const Instruction *i)
488 {
489    assert(!isLIMM(i->src(1), TYPE_F32));
490 
491    emitForm_21(i, 0x0c0, 0x940);
492 
493    NEG_(34, 2);
494    SAT_(35);
495    RND_(36, F);
496    FTZ_(38);
497    DNZ_(39);
498 
499    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
500 
501    if (code[0] & 0x1) {
502       if (neg1)
503          code[1] ^= 1 << 27;
504    } else
505    if (neg1) {
506       code[1] |= 1 << 19;
507    }
508 }
509 
510 void
emitDMAD(const Instruction * i)511 CodeEmitterGK110::emitDMAD(const Instruction *i)
512 {
513    assert(!i->saturate);
514    assert(!i->ftz);
515 
516    emitForm_21(i, 0x1b8, 0xb38);
517 
518    NEG_(34, 2);
519    RND_(36, F);
520 
521    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
522 
523    if (code[0] & 0x1) {
524       if (neg1)
525          code[1] ^= 1 << 27;
526    } else
527    if (neg1) {
528       code[1] |= 1 << 19;
529    }
530 }
531 
532 void
emitMADSP(const Instruction * i)533 CodeEmitterGK110::emitMADSP(const Instruction *i)
534 {
535    emitForm_21(i, 0x140, 0xa40);
536 
537    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
538       code[1] |= 0x00c00000;
539    } else {
540       code[1] |= (i->subOp & 0x00f) << 19; // imadp1
541       code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
542       code[1] |= (i->subOp & 0x100) << 11; // imadp3
543       code[1] |= (i->subOp & 0x200) << 15; // imadp3
544       code[1] |= (i->subOp & 0xc00) << 12; // imadp3
545    }
546 
547    if (i->flagsDef >= 0)
548       code[1] |= 1 << 18;
549 }
550 
551 void
emitFMUL(const Instruction * i)552 CodeEmitterGK110::emitFMUL(const Instruction *i)
553 {
554    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
555 
556    assert(i->postFactor >= -3 && i->postFactor <= 3);
557 
558    if (isLIMM(i->src(1), TYPE_F32)) {
559       emitForm_L(i, 0x200, 0x2, Modifier(0));
560 
561       FTZ_(38);
562       DNZ_(39);
563       SAT_(3a);
564       if (neg)
565          code[1] ^= 1 << 22;
566 
567       assert(i->postFactor == 0);
568    } else {
569       emitForm_21(i, 0x234, 0xc34);
570       code[1] |= ((i->postFactor > 0) ?
571                   (7 - i->postFactor) : (0 - i->postFactor)) << 12;
572 
573       RND_(2a, F);
574       FTZ_(2f);
575       DNZ_(30);
576       SAT_(35);
577 
578       if (code[0] & 0x1) {
579          if (neg)
580             code[1] ^= 1 << 27;
581       } else
582       if (neg) {
583          code[1] |= 1 << 19;
584       }
585    }
586 }
587 
588 void
emitDMUL(const Instruction * i)589 CodeEmitterGK110::emitDMUL(const Instruction *i)
590 {
591    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
592 
593    assert(!i->postFactor);
594    assert(!i->saturate);
595    assert(!i->ftz);
596    assert(!i->dnz);
597 
598    emitForm_21(i, 0x240, 0xc40);
599 
600    RND_(2a, F);
601 
602    if (code[0] & 0x1) {
603       if (neg)
604          code[1] ^= 1 << 27;
605    } else
606    if (neg) {
607       code[1] |= 1 << 19;
608    }
609 }
610 
611 void
emitIMUL(const Instruction * i)612 CodeEmitterGK110::emitIMUL(const Instruction *i)
613 {
614    assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
615    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
616 
617    if (i->src(1).getFile() == FILE_IMMEDIATE) {
618       emitForm_L(i, 0x280, 2, Modifier(0));
619 
620       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
621          code[1] |= 1 << 24;
622       if (i->sType == TYPE_S32)
623          code[1] |= 3 << 25;
624    } else {
625       emitForm_21(i, 0x21c, 0xc1c);
626 
627       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
628          code[1] |= 1 << 10;
629       if (i->sType == TYPE_S32)
630          code[1] |= 3 << 11;
631    }
632 }
633 
634 void
emitFADD(const Instruction * i)635 CodeEmitterGK110::emitFADD(const Instruction *i)
636 {
637    if (isLIMM(i->src(1), TYPE_F32)) {
638       assert(i->rnd == ROUND_N);
639       assert(!i->saturate);
640 
641       Modifier mod = i->src(1).mod ^
642          Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
643 
644       emitForm_L(i, 0x400, 0, mod);
645 
646       FTZ_(3a);
647       NEG_(3b, 0);
648       ABS_(39, 0);
649    } else {
650       emitForm_21(i, 0x22c, 0xc2c);
651 
652       FTZ_(2f);
653       RND_(2a, F);
654       ABS_(31, 0);
655       NEG_(33, 0);
656       SAT_(35);
657 
658       if (code[0] & 0x1) {
659          modNegAbsF32_3b(i, 1);
660          if (i->op == OP_SUB) code[1] ^= 1 << 27;
661       } else {
662          ABS_(34, 1);
663          NEG_(30, 1);
664          if (i->op == OP_SUB) code[1] ^= 1 << 16;
665       }
666    }
667 }
668 
669 void
emitDADD(const Instruction * i)670 CodeEmitterGK110::emitDADD(const Instruction *i)
671 {
672    assert(!i->saturate);
673    assert(!i->ftz);
674 
675    emitForm_21(i, 0x238, 0xc38);
676    RND_(2a, F);
677    ABS_(31, 0);
678    NEG_(33, 0);
679    if (code[0] & 0x1) {
680       modNegAbsF32_3b(i, 1);
681       if (i->op == OP_SUB) code[1] ^= 1 << 27;
682    } else {
683       NEG_(30, 1);
684       ABS_(34, 1);
685       if (i->op == OP_SUB) code[1] ^= 1 << 16;
686    }
687 }
688 
689 void
emitUADD(const Instruction * i)690 CodeEmitterGK110::emitUADD(const Instruction *i)
691 {
692    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
693 
694    if (i->op == OP_SUB)
695       addOp ^= 1;
696 
697    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
698 
699    if (isLIMM(i->src(1), TYPE_S32)) {
700       emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
701 
702       if (addOp & 2)
703          code[1] |= 1 << 27;
704 
705       assert(!i->defExists(1));
706       assert(i->flagsSrc < 0);
707 
708       SAT_(39);
709    } else {
710       emitForm_21(i, 0x208, 0xc08);
711 
712       assert(addOp != 3); // would be add-plus-one
713 
714       code[1] |= addOp << 19;
715 
716       if (i->defExists(1))
717          code[1] |= 1 << 18; // write carry
718       if (i->flagsSrc >= 0)
719          code[1] |= 1 << 14; // add carry
720 
721       SAT_(35);
722    }
723 }
724 
725 void
emitIMAD(const Instruction * i)726 CodeEmitterGK110::emitIMAD(const Instruction *i)
727 {
728    uint8_t addOp =
729       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
730 
731    emitForm_21(i, 0x100, 0xa00);
732 
733    assert(addOp != 3);
734    code[1] |= addOp << 26;
735 
736    if (i->sType == TYPE_S32)
737       code[1] |= (1 << 19) | (1 << 24);
738 
739    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
740       code[1] |= 1 << 25;
741 
742    if (i->flagsDef >= 0) code[1] |= 1 << 18;
743    if (i->flagsSrc >= 0) code[1] |= 1 << 20;
744 
745    SAT_(35);
746 }
747 
748 void
emitISAD(const Instruction * i)749 CodeEmitterGK110::emitISAD(const Instruction *i)
750 {
751    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
752 
753    emitForm_21(i, 0x1f4, 0xb74);
754 
755    if (i->dType == TYPE_S32)
756       code[1] |= 1 << 19;
757 }
758 
759 void
emitSHLADD(const Instruction * i)760 CodeEmitterGK110::emitSHLADD(const Instruction *i)
761 {
762    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
763    const ImmediateValue *imm = i->src(1).get()->asImm();
764    assert(imm);
765 
766    if (i->src(2).getFile() == FILE_IMMEDIATE) {
767       code[0] = 0x1;
768       code[1] = 0xc0c << 20;
769    } else {
770       code[0] = 0x2;
771       code[1] = 0x20c << 20;
772    }
773    code[1] |= addOp << 19;
774 
775    emitPredicate(i);
776 
777    defId(i->def(0), 2);
778    srcId(i->src(0), 10);
779 
780    if (i->flagsDef >= 0)
781       code[1] |= 1 << 18;
782 
783    assert(!(imm->reg.data.u32 & 0xffffffe0));
784    code[1] |= imm->reg.data.u32 << 10;
785 
786    switch (i->src(2).getFile()) {
787    case FILE_GPR:
788       assert(code[0] & 0x2);
789       code[1] |= 0xc << 28;
790       srcId(i->src(2), 23);
791       break;
792    case FILE_MEMORY_CONST:
793       assert(code[0] & 0x2);
794       code[1] |= 0x4 << 28;
795       setCAddress14(i->src(2));
796       break;
797    case FILE_IMMEDIATE:
798       assert(code[0] & 0x1);
799       setShortImmediate(i, 2);
800       break;
801    default:
802       assert(!"bad src2 file");
803       break;
804    }
805 }
806 
807 void
emitNOT(const Instruction * i)808 CodeEmitterGK110::emitNOT(const Instruction *i)
809 {
810    code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
811    code[1] = 0x22003800;
812 
813    emitPredicate(i);
814 
815    defId(i->def(0), 2);
816 
817    switch (i->src(0).getFile()) {
818    case FILE_GPR:
819       code[1] |= 0xc << 28;
820       srcId(i->src(0), 23);
821       break;
822    case FILE_MEMORY_CONST:
823       code[1] |= 0x4 << 28;
824       setCAddress14(i->src(0));
825       break;
826    default:
827       assert(0);
828       break;
829    }
830 }
831 
832 void
emitLogicOp(const Instruction * i,uint8_t subOp)833 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
834 {
835    if (i->def(0).getFile() == FILE_PREDICATE) {
836       code[0] = 0x00000002 | (subOp << 27);
837       code[1] = 0x84800000;
838 
839       emitPredicate(i);
840 
841       defId(i->def(0), 5);
842       srcId(i->src(0), 14);
843       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
844       srcId(i->src(1), 32);
845       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
846 
847       if (i->defExists(1)) {
848          defId(i->def(1), 2);
849       } else {
850          code[0] |= 7 << 2;
851       }
852       // (a OP b) OP c
853       if (i->predSrc != 2 && i->srcExists(2)) {
854          code[1] |= subOp << 16;
855          srcId(i->src(2), 42);
856          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
857       } else {
858          code[1] |= 7 << 10;
859       }
860    } else
861    if (isLIMM(i->src(1), TYPE_S32)) {
862       emitForm_L(i, 0x200, 0, i->src(1).mod);
863       code[1] |= subOp << 24;
864       NOT_(3a, 0);
865    } else {
866       emitForm_21(i, 0x220, 0xc20);
867       code[1] |= subOp << 12;
868       NOT_(2a, 0);
869       NOT_(2b, 1);
870    }
871 }
872 
873 void
emitPOPC(const Instruction * i)874 CodeEmitterGK110::emitPOPC(const Instruction *i)
875 {
876    assert(!isLIMM(i->src(1), TYPE_S32, true));
877 
878    emitForm_21(i, 0x204, 0xc04);
879 
880    NOT_(2a, 0);
881    if (!(code[0] & 0x1))
882       NOT_(2b, 1);
883 }
884 
885 void
emitINSBF(const Instruction * i)886 CodeEmitterGK110::emitINSBF(const Instruction *i)
887 {
888    emitForm_21(i, 0x1f8, 0xb78);
889 }
890 
891 void
emitEXTBF(const Instruction * i)892 CodeEmitterGK110::emitEXTBF(const Instruction *i)
893 {
894    emitForm_21(i, 0x600, 0xc00);
895 
896    if (i->dType == TYPE_S32)
897       code[1] |= 0x80000;
898    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
899       code[1] |= 0x800;
900 }
901 
902 void
emitBFIND(const Instruction * i)903 CodeEmitterGK110::emitBFIND(const Instruction *i)
904 {
905    emitForm_C(i, 0x218, 0x2);
906 
907    if (i->dType == TYPE_S32)
908       code[1] |= 0x80000;
909    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
910       code[1] |= 0x800;
911    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
912       code[1] |= 0x1000;
913 }
914 
915 void
emitPERMT(const Instruction * i)916 CodeEmitterGK110::emitPERMT(const Instruction *i)
917 {
918    emitForm_21(i, 0x1e0, 0xb60);
919 
920    code[1] |= i->subOp << 19;
921 }
922 
923 void
emitShift(const Instruction * i)924 CodeEmitterGK110::emitShift(const Instruction *i)
925 {
926    if (i->op == OP_SHR) {
927       emitForm_21(i, 0x214, 0xc14);
928       if (isSignedType(i->dType))
929          code[1] |= 1 << 19;
930    } else {
931       emitForm_21(i, 0x224, 0xc24);
932    }
933 
934    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
935       code[1] |= 1 << 10;
936 }
937 
938 void
emitPreOp(const Instruction * i)939 CodeEmitterGK110::emitPreOp(const Instruction *i)
940 {
941    emitForm_C(i, 0x248, 0x2);
942 
943    if (i->op == OP_PREEX2)
944       code[1] |= 1 << 10;
945 
946    NEG_(30, 0);
947    ABS_(34, 0);
948 }
949 
950 void
emitSFnOp(const Instruction * i,uint8_t subOp)951 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
952 {
953    code[0] = 0x00000002 | (subOp << 23);
954    code[1] = 0x84000000;
955 
956    emitPredicate(i);
957 
958    defId(i->def(0), 2);
959    srcId(i->src(0), 10);
960 
961    NEG_(33, 0);
962    ABS_(31, 0);
963    SAT_(35);
964 }
965 
966 void
emitMINMAX(const Instruction * i)967 CodeEmitterGK110::emitMINMAX(const Instruction *i)
968 {
969    uint32_t op2, op1;
970 
971    switch (i->dType) {
972    case TYPE_U32:
973    case TYPE_S32:
974       op2 = 0x210;
975       op1 = 0xc10;
976       break;
977    case TYPE_F32:
978       op2 = 0x230;
979       op1 = 0xc30;
980       break;
981    case TYPE_F64:
982       op2 = 0x228;
983       op1 = 0xc28;
984       break;
985    default:
986       assert(0);
987       op2 = 0;
988       op1 = 0;
989       break;
990    }
991    emitForm_21(i, op2, op1);
992 
993    if (i->dType == TYPE_S32)
994       code[1] |= 1 << 19;
995    code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
996 
997    FTZ_(2f);
998    ABS_(31, 0);
999    NEG_(33, 0);
1000    if (code[0] & 0x1) {
1001       modNegAbsF32_3b(i, 1);
1002    } else {
1003       ABS_(34, 1);
1004       NEG_(30, 1);
1005    }
1006 }
1007 
1008 void
emitCVT(const Instruction * i)1009 CodeEmitterGK110::emitCVT(const Instruction *i)
1010 {
1011    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1012    const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
1013    const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
1014 
1015    bool sat = i->saturate;
1016    bool abs = i->src(0).mod.abs();
1017    bool neg = i->src(0).mod.neg();
1018 
1019    RoundMode rnd = i->rnd;
1020 
1021    switch (i->op) {
1022    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1023    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1024    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1025    case OP_SAT: sat = true; break;
1026    case OP_NEG: neg = !neg; break;
1027    case OP_ABS: abs = true; neg = false; break;
1028    default:
1029       break;
1030    }
1031 
1032    DataType dType;
1033 
1034    if (i->op == OP_NEG && i->dType == TYPE_U32)
1035       dType = TYPE_S32;
1036    else
1037       dType = i->dType;
1038 
1039 
1040    uint32_t op;
1041 
1042    if      (f2f) op = 0x254;
1043    else if (f2i) op = 0x258;
1044    else if (i2f) op = 0x25c;
1045    else          op = 0x260;
1046 
1047    emitForm_C(i, op, 0x2);
1048 
1049    FTZ_(2f);
1050    if (neg) code[1] |= 1 << 16;
1051    if (abs) code[1] |= 1 << 20;
1052    if (sat) code[1] |= 1 << 21;
1053 
1054    emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
1055 
1056    code[0] |= typeSizeofLog2(dType) << 10;
1057    code[0] |= typeSizeofLog2(i->sType) << 12;
1058    code[1] |= i->subOp << 12;
1059 
1060    if (isSignedIntType(dType))
1061       code[0] |= 0x4000;
1062    if (isSignedIntType(i->sType))
1063       code[0] |= 0x8000;
1064 }
1065 
1066 void
emitSET(const CmpInstruction * i)1067 CodeEmitterGK110::emitSET(const CmpInstruction *i)
1068 {
1069    uint16_t op1, op2;
1070 
1071    if (i->def(0).getFile() == FILE_PREDICATE) {
1072       switch (i->sType) {
1073       case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
1074       case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
1075       default:
1076          op2 = 0x1b0;
1077          op1 = 0xb30;
1078          break;
1079       }
1080       emitForm_21(i, op2, op1);
1081 
1082       NEG_(2e, 0);
1083       ABS_(9, 0);
1084       if (!(code[0] & 0x1)) {
1085          NEG_(8, 1);
1086          ABS_(2f, 1);
1087       } else {
1088          modNegAbsF32_3b(i, 1);
1089       }
1090       FTZ_(32);
1091 
1092       // normal DST field is negated predicate result
1093       code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
1094       if (i->defExists(1))
1095          defId(i->def(1), 2);
1096       else
1097          code[0] |= 0x1c;
1098    } else {
1099       switch (i->sType) {
1100       case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
1101       case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
1102       default:
1103          op2 = 0x1a8;
1104          op1 = 0xb28;
1105          break;
1106       }
1107       emitForm_21(i, op2, op1);
1108 
1109       NEG_(2e, 0);
1110       ABS_(39, 0);
1111       if (!(code[0] & 0x1)) {
1112          NEG_(38, 1);
1113          ABS_(2f, 1);
1114       } else {
1115          modNegAbsF32_3b(i, 1);
1116       }
1117       FTZ_(3a);
1118 
1119       if (i->dType == TYPE_F32) {
1120          if (isFloatType(i->sType))
1121             code[1] |= 1 << 23;
1122          else
1123             code[1] |= 1 << 15;
1124       }
1125    }
1126    if (i->sType == TYPE_S32)
1127       code[1] |= 1 << 19;
1128 
1129    if (i->op != OP_SET) {
1130       switch (i->op) {
1131       case OP_SET_AND: code[1] |= 0x0 << 16; break;
1132       case OP_SET_OR:  code[1] |= 0x1 << 16; break;
1133       case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1134       default:
1135          assert(0);
1136          break;
1137       }
1138       srcId(i->src(2), 0x2a);
1139    } else {
1140       code[1] |= 0x7 << 10;
1141    }
1142    emitCondCode(i->setCond,
1143                 isFloatType(i->sType) ? 0x33 : 0x34,
1144                 isFloatType(i->sType) ? 0xf : 0x7);
1145 }
1146 
1147 void
emitSLCT(const CmpInstruction * i)1148 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1149 {
1150    CondCode cc = i->setCond;
1151    if (i->src(2).mod.neg())
1152       cc = reverseCondCode(cc);
1153 
1154    if (i->dType == TYPE_F32) {
1155       emitForm_21(i, 0x1d0, 0xb50);
1156       FTZ_(32);
1157       emitCondCode(cc, 0x33, 0xf);
1158    } else {
1159       emitForm_21(i, 0x1a0, 0xb20);
1160       emitCondCode(cc, 0x34, 0x7);
1161    }
1162 }
1163 
1164 static void
selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1165 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1166 {
1167    int loc = entry->loc;
1168    if (data.force_persample_interp)
1169       code[loc + 1] |= 1 << 13;
1170    else
1171       code[loc + 1] &= ~(1 << 13);
1172 }
1173 
emitSELP(const Instruction * i)1174 void CodeEmitterGK110::emitSELP(const Instruction *i)
1175 {
1176    emitForm_21(i, 0x250, 0x050);
1177 
1178    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1179       code[1] |= 1 << 13;
1180 
1181    if (i->subOp == 1) {
1182       addInterp(0, 0, selpFlip);
1183    }
1184 }
1185 
emitTEXBAR(const Instruction * i)1186 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1187 {
1188    code[0] = 0x0000003e | (i->subOp << 23);
1189    code[1] = 0x77000000;
1190 
1191    emitPredicate(i);
1192 }
1193 
emitTEXCSAA(const TexInstruction * i)1194 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1195 {
1196    code[0] = 0x00000002;
1197    code[1] = 0x76c00000;
1198 
1199    code[1] |= i->tex.r << 9;
1200    // code[1] |= i->tex.s << (9 + 8);
1201 
1202    if (i->tex.liveOnly)
1203       code[0] |= 0x80000000;
1204 
1205    defId(i->def(0), 2);
1206    srcId(i->src(0), 10);
1207 }
1208 
1209 static inline bool
isNextIndependentTex(const TexInstruction * i)1210 isNextIndependentTex(const TexInstruction *i)
1211 {
1212    if (!i->next || !isTextureOp(i->next->op))
1213       return false;
1214    if (i->getDef(0)->interfers(i->next->getSrc(0)))
1215       return false;
1216    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1217 }
1218 
1219 void
emitTEX(const TexInstruction * i)1220 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1221 {
1222    const bool ind = i->tex.rIndirectSrc >= 0;
1223 
1224    if (ind) {
1225       code[0] = 0x00000002;
1226       switch (i->op) {
1227       case OP_TXD:
1228          code[1] = 0x7e000000;
1229          break;
1230       case OP_TXLQ:
1231          code[1] = 0x7e800000;
1232          break;
1233       case OP_TXF:
1234          code[1] = 0x78000000;
1235          break;
1236       case OP_TXG:
1237          code[1] = 0x7dc00000;
1238          break;
1239       default:
1240          code[1] = 0x7d800000;
1241          break;
1242       }
1243    } else {
1244       switch (i->op) {
1245       case OP_TXD:
1246          code[0] = 0x00000002;
1247          code[1] = 0x76000000;
1248          code[1] |= i->tex.r << 9;
1249          break;
1250       case OP_TXLQ:
1251          code[0] = 0x00000002;
1252          code[1] = 0x76800000;
1253          code[1] |= i->tex.r << 9;
1254          break;
1255       case OP_TXF:
1256          code[0] = 0x00000002;
1257          code[1] = 0x70000000;
1258          code[1] |= i->tex.r << 13;
1259          break;
1260       case OP_TXG:
1261          code[0] = 0x00000001;
1262          code[1] = 0x70000000;
1263          code[1] |= i->tex.r << 15;
1264          break;
1265       default:
1266          code[0] = 0x00000001;
1267          code[1] = 0x60000000;
1268          code[1] |= i->tex.r << 15;
1269          break;
1270       }
1271    }
1272 
1273    code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1274 
1275    if (i->tex.liveOnly)
1276       code[0] |= 0x80000000;
1277 
1278    switch (i->op) {
1279    case OP_TEX: break;
1280    case OP_TXB: code[1] |= 0x2000; break;
1281    case OP_TXL: code[1] |= 0x3000; break;
1282    case OP_TXF: break;
1283    case OP_TXG: break;
1284    case OP_TXD: break;
1285    case OP_TXLQ: break;
1286    default:
1287       assert(!"invalid texture op");
1288       break;
1289    }
1290 
1291    if (i->op == OP_TXF) {
1292       if (!i->tex.levelZero)
1293          code[1] |= 0x1000;
1294    } else
1295    if (i->tex.levelZero) {
1296       code[1] |= 0x1000;
1297    }
1298 
1299    if (i->op != OP_TXD && i->tex.derivAll)
1300       code[1] |= 0x200;
1301 
1302    emitPredicate(i);
1303 
1304    code[1] |= i->tex.mask << 2;
1305 
1306    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1307 
1308    defId(i->def(0), 2);
1309    srcId(i->src(0), 10);
1310    srcId(i, src1, 23);
1311 
1312    if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1313 
1314    // texture target:
1315    code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1316    if (i->tex.target.isArray())
1317       code[1] |= 0x40;
1318    if (i->tex.target.isShadow())
1319       code[1] |= 0x400;
1320    if (i->tex.target == TEX_TARGET_2D_MS ||
1321        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1322       code[1] |= 0x800;
1323 
1324    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1325       // ?
1326    }
1327 
1328    if (i->tex.useOffsets == 1) {
1329       switch (i->op) {
1330       case OP_TXF: code[1] |= 0x200; break;
1331       case OP_TXD: code[1] |= 0x00400000; break;
1332       default: code[1] |= 0x800; break;
1333       }
1334    }
1335    if (i->tex.useOffsets == 4)
1336       code[1] |= 0x1000;
1337 }
1338 
1339 void
emitTXQ(const TexInstruction * i)1340 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1341 {
1342    code[0] = 0x00000002;
1343    code[1] = 0x75400001;
1344 
1345    switch (i->tex.query) {
1346    case TXQ_DIMS:            code[0] |= 0x01 << 25; break;
1347    case TXQ_TYPE:            code[0] |= 0x02 << 25; break;
1348    case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1349    case TXQ_FILTER:          code[0] |= 0x10 << 25; break;
1350    case TXQ_LOD:             code[0] |= 0x12 << 25; break;
1351    case TXQ_BORDER_COLOUR:   code[0] |= 0x16 << 25; break;
1352    default:
1353       assert(!"invalid texture query");
1354       break;
1355    }
1356 
1357    code[1] |= i->tex.mask << 2;
1358    code[1] |= i->tex.r << 9;
1359    if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1360       code[1] |= 0x08000000;
1361 
1362    defId(i->def(0), 2);
1363    srcId(i->src(0), 10);
1364 
1365    emitPredicate(i);
1366 }
1367 
1368 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1369 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1370 {
1371    code[0] = 0x00000002 | ((qOp & 1) << 31);
1372    code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
1373 
1374    defId(i->def(0), 2);
1375    srcId(i->src(0), 10);
1376    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
1377 
1378    emitPredicate(i);
1379 }
1380 
1381 void
emitPIXLD(const Instruction * i)1382 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1383 {
1384    emitForm_L(i, 0x7f4, 2, Modifier(0));
1385    code[1] |= i->subOp << 2;
1386    code[1] |= 0x00070000;
1387 }
1388 
1389 void
emitBAR(const Instruction * i)1390 CodeEmitterGK110::emitBAR(const Instruction *i)
1391 {
1392    code[0] = 0x00000002;
1393    code[1] = 0x85400000;
1394 
1395    switch (i->subOp) {
1396    case NV50_IR_SUBOP_BAR_ARRIVE:   code[1] |= 0x08; break;
1397    case NV50_IR_SUBOP_BAR_RED_AND:  code[1] |= 0x50; break;
1398    case NV50_IR_SUBOP_BAR_RED_OR:   code[1] |= 0x90; break;
1399    case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
1400    default:
1401       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1402       break;
1403    }
1404 
1405    emitPredicate(i);
1406 
1407    // barrier id
1408    if (i->src(0).getFile() == FILE_GPR) {
1409       srcId(i->src(0), 10);
1410    } else {
1411       ImmediateValue *imm = i->getSrc(0)->asImm();
1412       assert(imm);
1413       code[0] |= imm->reg.data.u32 << 10;
1414       code[1] |= 0x8000;
1415    }
1416 
1417    // thread count
1418    if (i->src(1).getFile() == FILE_GPR) {
1419       srcId(i->src(1), 23);
1420    } else {
1421       ImmediateValue *imm = i->getSrc(0)->asImm();
1422       assert(imm);
1423       assert(imm->reg.data.u32 <= 0xfff);
1424       code[0] |= imm->reg.data.u32 << 23;
1425       code[1] |= imm->reg.data.u32 >> 9;
1426       code[1] |= 0x4000;
1427    }
1428 
1429    if (i->srcExists(2) && (i->predSrc != 2)) {
1430       srcId(i->src(2), 32 + 10);
1431       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1432          code[1] |= 1 << 13;
1433    } else {
1434       code[1] |= 7 << 10;
1435    }
1436 }
1437 
emitMEMBAR(const Instruction * i)1438 void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
1439 {
1440    code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
1441    code[1] = 0x7cc00000;
1442 
1443    emitPredicate(i);
1444 }
1445 
1446 void
emitFlow(const Instruction * i)1447 CodeEmitterGK110::emitFlow(const Instruction *i)
1448 {
1449    const FlowInstruction *f = i->asFlow();
1450 
1451    unsigned mask; // bit 0: predicate, bit 1: target
1452 
1453    code[0] = 0x00000000;
1454 
1455    switch (i->op) {
1456    case OP_BRA:
1457       code[1] = f->absolute ? 0x10800000 : 0x12000000;
1458       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1459          code[0] |= 0x80;
1460       mask = 3;
1461       break;
1462    case OP_CALL:
1463       code[1] = f->absolute ? 0x11000000 : 0x13000000;
1464       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1465          code[0] |= 0x80;
1466       mask = 2;
1467       break;
1468 
1469    case OP_EXIT:    code[1] = 0x18000000; mask = 1; break;
1470    case OP_RET:     code[1] = 0x19000000; mask = 1; break;
1471    case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1472    case OP_BREAK:   code[1] = 0x1a000000; mask = 1; break;
1473    case OP_CONT:    code[1] = 0x1a800000; mask = 1; break;
1474 
1475    case OP_JOINAT:   code[1] = 0x14800000; mask = 2; break;
1476    case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1477    case OP_PRECONT:  code[1] = 0x15800000; mask = 2; break;
1478    case OP_PRERET:   code[1] = 0x13800000; mask = 2; break;
1479 
1480    case OP_QUADON:  code[1] = 0x1b800000; mask = 0; break;
1481    case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1482    case OP_BRKPT:   code[1] = 0x00000000; mask = 0; break;
1483    default:
1484       assert(!"invalid flow operation");
1485       return;
1486    }
1487 
1488    if (mask & 1) {
1489       emitPredicate(i);
1490       if (i->flagsSrc < 0)
1491          code[0] |= 0x3c;
1492    }
1493 
1494    if (!f)
1495       return;
1496 
1497    if (f->allWarp)
1498       code[0] |= 1 << 9;
1499    if (f->limit)
1500       code[0] |= 1 << 8;
1501 
1502    if (f->op == OP_CALL) {
1503       if (f->builtin) {
1504          assert(f->absolute);
1505          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1506          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1507          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1508       } else {
1509          assert(!f->absolute);
1510          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1511          code[0] |= (pcRel & 0x1ff) << 23;
1512          code[1] |= (pcRel >> 9) & 0x7fff;
1513       }
1514    } else
1515    if (mask & 2) {
1516       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1517       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1518          pcRel += 8;
1519       // currently we don't want absolute branches
1520       assert(!f->absolute);
1521       code[0] |= (pcRel & 0x1ff) << 23;
1522       code[1] |= (pcRel >> 9) & 0x7fff;
1523    }
1524 }
1525 
1526 void
emitVOTE(const Instruction * i)1527 CodeEmitterGK110::emitVOTE(const Instruction *i)
1528 {
1529    assert(i->src(0).getFile() == FILE_PREDICATE);
1530 
1531    code[0] = 0x00000002;
1532    code[1] = 0x86c00000 | (i->subOp << 19);
1533 
1534    emitPredicate(i);
1535 
1536    unsigned rp = 0;
1537    for (int d = 0; i->defExists(d); d++) {
1538       if (i->def(d).getFile() == FILE_PREDICATE) {
1539          assert(!(rp & 2));
1540          rp |= 2;
1541          defId(i->def(d), 48);
1542       } else if (i->def(d).getFile() == FILE_GPR) {
1543          assert(!(rp & 1));
1544          rp |= 1;
1545          defId(i->def(d), 2);
1546       } else {
1547          assert(!"Unhandled def");
1548       }
1549    }
1550    if (!(rp & 1))
1551       code[0] |= 255 << 2;
1552    if (!(rp & 2))
1553       code[1] |= 7 << 16;
1554    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
1555       code[1] |= 1 << 13;
1556    srcId(i->src(0), 42);
1557 }
1558 
1559 void
emitSUGType(DataType ty,const int pos)1560 CodeEmitterGK110::emitSUGType(DataType ty, const int pos)
1561 {
1562    uint8_t n = 0;
1563 
1564    switch (ty) {
1565    case TYPE_S32: n = 1; break;
1566    case TYPE_U8:  n = 2; break;
1567    case TYPE_S8:  n = 3; break;
1568    default:
1569       assert(ty == TYPE_U32);
1570       break;
1571    }
1572    code[pos / 32] |= n << (pos % 32);
1573 }
1574 
1575 void
emitSUCachingMode(CacheMode c)1576 CodeEmitterGK110::emitSUCachingMode(CacheMode c)
1577 {
1578    uint8_t n = 0;
1579 
1580    switch (c) {
1581    case CACHE_CA:
1582 // case CACHE_WB:
1583       n = 0;
1584       break;
1585    case CACHE_CG:
1586       n = 1;
1587       break;
1588    case CACHE_CS:
1589       n = 2;
1590       break;
1591    case CACHE_CV:
1592 // case CACHE_WT:
1593       n = 3;
1594       break;
1595    default:
1596       assert(!"invalid caching mode");
1597       break;
1598    }
1599    code[0] |= (n & 1) << 31;
1600    code[1] |= (n & 2) >> 1;
1601 }
1602 
1603 void
setSUConst16(const Instruction * i,const int s)1604 CodeEmitterGK110::setSUConst16(const Instruction *i, const int s)
1605 {
1606    const uint32_t offset = i->getSrc(s)->reg.data.offset;
1607 
1608    assert(offset == (offset & 0xfffc));
1609 
1610    code[0] |= offset << 21;
1611    code[1] |= offset >> 11;
1612    code[1] |= i->getSrc(s)->reg.fileIndex << 5;
1613 }
1614 
1615 void
emitSULDGB(const TexInstruction * i)1616 CodeEmitterGK110::emitSULDGB(const TexInstruction *i)
1617 {
1618    code[0] = 0x00000002;
1619    code[1] = 0x30000000 | (i->subOp << 14);
1620 
1621    if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1622       emitLoadStoreType(i->dType, 0x38);
1623       emitCachingMode(i->cache, 0x36);
1624 
1625       // format
1626       setSUConst16(i, 1);
1627    } else {
1628       assert(i->src(1).getFile() == FILE_GPR);
1629       code[1] |= 0x49800000;
1630 
1631       emitLoadStoreType(i->dType, 0x21);
1632       emitSUCachingMode(i->cache);
1633 
1634       srcId(i->src(1), 23);
1635    }
1636 
1637    emitSUGType(i->sType, 0x34);
1638 
1639    emitPredicate(i);
1640    defId(i->def(0), 2); // destination
1641    srcId(i->src(0), 10); // address
1642 
1643    // surface predicate
1644    if (!i->srcExists(2) || (i->predSrc == 2)) {
1645       code[1] |= 0x7 << 10;
1646    } else {
1647       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1648          code[1] |= 1 << 13;
1649       srcId(i->src(2), 32 + 10);
1650    }
1651 }
1652 
1653 void
emitSUSTGx(const TexInstruction * i)1654 CodeEmitterGK110::emitSUSTGx(const TexInstruction *i)
1655 {
1656    assert(i->op == OP_SUSTP);
1657 
1658    code[0] = 0x00000002;
1659    code[1] = 0x38000000;
1660 
1661    if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1662       code[0] |= i->subOp << 2;
1663 
1664       if (i->op == OP_SUSTP)
1665          code[0] |= i->tex.mask << 4;
1666 
1667       emitSUGType(i->sType, 0x8);
1668       emitCachingMode(i->cache, 0x36);
1669 
1670       // format
1671       setSUConst16(i, 1);
1672    } else {
1673       assert(i->src(1).getFile() == FILE_GPR);
1674 
1675       code[0] |= i->subOp << 23;
1676       code[1] |= 0x41c00000;
1677 
1678       if (i->op == OP_SUSTP)
1679          code[0] |= i->tex.mask << 25;
1680 
1681       emitSUGType(i->sType, 0x1d);
1682       emitSUCachingMode(i->cache);
1683 
1684       srcId(i->src(1), 2);
1685    }
1686 
1687    emitPredicate(i);
1688    srcId(i->src(0), 10); // address
1689    srcId(i->src(3), 42); // values
1690 
1691    // surface predicate
1692    if (!i->srcExists(2) || (i->predSrc == 2)) {
1693       code[1] |= 0x7 << 18;
1694    } else {
1695       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1696          code[1] |= 1 << 21;
1697       srcId(i->src(2), 32 + 18);
1698    }
1699 }
1700 
1701 void
emitSUCLAMPMode(uint16_t subOp)1702 CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp)
1703 {
1704    uint8_t m;
1705    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1706    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1707    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1708    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1709    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1710    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1711    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1712    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1713    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1714    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1715    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1716    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1717    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1718    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1719    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1720    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1721    default:
1722       return;
1723    }
1724    code[1] |= m << 20;
1725    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1726       code[1] |= 1 << 24;
1727 }
1728 
1729 void
emitSUCalc(Instruction * i)1730 CodeEmitterGK110::emitSUCalc(Instruction *i)
1731 {
1732    ImmediateValue *imm = NULL;
1733    uint64_t opc1, opc2;
1734 
1735    if (i->srcExists(2)) {
1736       imm = i->getSrc(2)->asImm();
1737       if (imm)
1738          i->setSrc(2, NULL); // special case, make emitForm_21 not assert
1739    }
1740 
1741    switch (i->op) {
1742    case OP_SUCLAMP:  opc1 = 0xb00; opc2 = 0x580; break;
1743    case OP_SUBFM:    opc1 = 0xb68; opc2 = 0x1e8; break;
1744    case OP_SUEAU:    opc1 = 0xb6c; opc2 = 0x1ec; break;
1745    default:
1746       assert(0);
1747       return;
1748    }
1749    emitForm_21(i, opc2, opc1);
1750 
1751    if (i->op == OP_SUCLAMP) {
1752       if (i->dType == TYPE_S32)
1753          code[1] |= 1 << 19;
1754       emitSUCLAMPMode(i->subOp);
1755    }
1756 
1757    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1758       code[1] |= 1 << 18;
1759 
1760    if (i->op != OP_SUEAU) {
1761       const uint8_t pos = i->op == OP_SUBFM ? 19 : 16;
1762       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1763          code[0] |= 255 << 2;
1764          code[1] |= i->getDef(1)->reg.data.id << pos;
1765       } else
1766       if (i->defExists(1)) { // r, p
1767          assert(i->def(1).getFile() == FILE_PREDICATE);
1768          code[1] |= i->getDef(1)->reg.data.id << pos;
1769       } else { // r, #
1770          code[1] |= 7 << pos;
1771       }
1772    }
1773 
1774    if (imm) {
1775       assert(i->op == OP_SUCLAMP);
1776       i->setSrc(2, imm);
1777       code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint6
1778    }
1779 }
1780 
1781 
1782 void
emitVectorSubOp(const Instruction * i)1783 CodeEmitterGK110::emitVectorSubOp(const Instruction *i)
1784 {
1785    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1786    case 0:
1787       code[1] |= (i->subOp & 0x000f) << 7;  // vsrc1
1788       code[1] |= (i->subOp & 0x00e0) >> 6;  // vsrc2
1789       code[1] |= (i->subOp & 0x0100) << 13; // vsrc2
1790       code[1] |= (i->subOp & 0x3c00) << 12; // vdst
1791       break;
1792    default:
1793       assert(0);
1794       break;
1795    }
1796 }
1797 
1798 void
emitVSHL(const Instruction * i)1799 CodeEmitterGK110::emitVSHL(const Instruction *i)
1800 {
1801    code[0] = 0x00000002;
1802    code[1] = 0xb8000000;
1803 
1804    assert(NV50_IR_SUBOP_Vn(i->subOp) == 0);
1805 
1806    if (isSignedType(i->dType)) code[1] |= 1 << 25;
1807    if (isSignedType(i->sType)) code[1] |= 1 << 19;
1808 
1809    emitVectorSubOp(i);
1810 
1811    emitPredicate(i);
1812    defId(i->def(0), 2);
1813    srcId(i->src(0), 10);
1814 
1815    if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) {
1816       ImmediateValue *imm = i->getSrc(1)->asImm();
1817       assert(imm);
1818       code[0] |= (imm->reg.data.u32 & 0x01ff) << 23;
1819       code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9;
1820    } else {
1821       assert(i->getSrc(1)->reg.file == FILE_GPR);
1822       code[1] |= 1 << 21;
1823       srcId(i->src(1), 23);
1824    }
1825    srcId(i->src(2), 42);
1826 
1827    if (i->saturate)
1828       code[0] |= 1 << 22;
1829    if (i->flagsDef >= 0)
1830       code[1] |= 1 << 18;
1831 }
1832 
1833 void
emitAFETCH(const Instruction * i)1834 CodeEmitterGK110::emitAFETCH(const Instruction *i)
1835 {
1836    uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
1837 
1838    code[0] = 0x00000002 | (offset << 23);
1839    code[1] = 0x7d000000 | (offset >> 9);
1840 
1841    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1842       code[1] |= 0x8;
1843 
1844    emitPredicate(i);
1845 
1846    defId(i->def(0), 2);
1847    srcId(i->src(0).getIndirect(0), 10);
1848 }
1849 
1850 void
emitPFETCH(const Instruction * i)1851 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1852 {
1853    uint32_t prim = i->src(0).get()->reg.data.u32;
1854 
1855    code[0] = 0x00000002 | ((prim & 0xff) << 23);
1856    code[1] = 0x7f800000;
1857 
1858    emitPredicate(i);
1859 
1860    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1861 
1862    defId(i->def(0), 2);
1863    srcId(i, src1, 10);
1864 }
1865 
1866 void
emitVFETCH(const Instruction * i)1867 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1868 {
1869    unsigned int size = typeSizeof(i->dType);
1870    uint32_t offset = i->src(0).get()->reg.data.offset;
1871 
1872    code[0] = 0x00000002 | (offset << 23);
1873    code[1] = 0x7ec00000 | (offset >> 9);
1874    code[1] |= (size / 4 - 1) << 18;
1875 
1876    if (i->perPatch)
1877       code[1] |= 0x4;
1878    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1879       code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
1880 
1881    emitPredicate(i);
1882 
1883    defId(i->def(0), 2);
1884    srcId(i->src(0).getIndirect(0), 10);
1885    srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
1886 }
1887 
1888 void
emitEXPORT(const Instruction * i)1889 CodeEmitterGK110::emitEXPORT(const Instruction *i)
1890 {
1891    unsigned int size = typeSizeof(i->dType);
1892    uint32_t offset = i->src(0).get()->reg.data.offset;
1893 
1894    code[0] = 0x00000002 | (offset << 23);
1895    code[1] = 0x7f000000 | (offset >> 9);
1896    code[1] |= (size / 4 - 1) << 18;
1897 
1898    if (i->perPatch)
1899       code[1] |= 0x4;
1900 
1901    emitPredicate(i);
1902 
1903    assert(i->src(1).getFile() == FILE_GPR);
1904 
1905    srcId(i->src(0).getIndirect(0), 10);
1906    srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
1907    srcId(i->src(1), 2);
1908 }
1909 
1910 void
emitOUT(const Instruction * i)1911 CodeEmitterGK110::emitOUT(const Instruction *i)
1912 {
1913    assert(i->src(0).getFile() == FILE_GPR);
1914 
1915    emitForm_21(i, 0x1f0, 0xb70);
1916 
1917    if (i->op == OP_EMIT)
1918       code[1] |= 1 << 10;
1919    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1920       code[1] |= 1 << 11;
1921 }
1922 
1923 void
emitInterpMode(const Instruction * i)1924 CodeEmitterGK110::emitInterpMode(const Instruction *i)
1925 {
1926    code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
1927    code[1] |= (i->ipa & 0xc) << (19 - 2);
1928 }
1929 
1930 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)1931 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1932 {
1933    int ipa = entry->ipa;
1934    int reg = entry->reg;
1935    int loc = entry->loc;
1936 
1937    if (data.flatshade &&
1938        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1939       ipa = NV50_IR_INTERP_FLAT;
1940       reg = 0xff;
1941    } else if (data.force_persample_interp &&
1942               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1943               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1944       ipa |= NV50_IR_INTERP_CENTROID;
1945    }
1946    code[loc + 1] &= ~(0xf << 19);
1947    code[loc + 1] |= (ipa & 0x3) << 21;
1948    code[loc + 1] |= (ipa & 0xc) << (19 - 2);
1949    code[loc + 0] &= ~(0xff << 23);
1950    code[loc + 0] |= reg << 23;
1951 }
1952 
1953 void
emitINTERP(const Instruction * i)1954 CodeEmitterGK110::emitINTERP(const Instruction *i)
1955 {
1956    const uint32_t base = i->getSrc(0)->reg.data.offset;
1957 
1958    code[0] = 0x00000002 | (base << 31);
1959    code[1] = 0x74800000 | (base >> 1);
1960 
1961    if (i->saturate)
1962       code[1] |= 1 << 18;
1963 
1964    if (i->op == OP_PINTERP) {
1965       srcId(i->src(1), 23);
1966       addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
1967    } else {
1968       code[0] |= 0xff << 23;
1969       addInterp(i->ipa, 0xff, interpApply);
1970    }
1971 
1972    srcId(i->src(0).getIndirect(0), 10);
1973    emitInterpMode(i);
1974 
1975    emitPredicate(i);
1976    defId(i->def(0), 2);
1977 
1978    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1979       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
1980    else
1981       code[1] |= 0xff << 10;
1982 }
1983 
1984 void
emitLoadStoreType(DataType ty,const int pos)1985 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
1986 {
1987    uint8_t n;
1988 
1989    switch (ty) {
1990    case TYPE_U8:
1991       n = 0;
1992       break;
1993    case TYPE_S8:
1994       n = 1;
1995       break;
1996    case TYPE_U16:
1997       n = 2;
1998       break;
1999    case TYPE_S16:
2000       n = 3;
2001       break;
2002    case TYPE_F32:
2003    case TYPE_U32:
2004    case TYPE_S32:
2005       n = 4;
2006       break;
2007    case TYPE_F64:
2008    case TYPE_U64:
2009    case TYPE_S64:
2010       n = 5;
2011       break;
2012    case TYPE_B128:
2013       n = 6;
2014       break;
2015    default:
2016       n = 0;
2017       assert(!"invalid ld/st type");
2018       break;
2019    }
2020    code[pos / 32] |= n << (pos % 32);
2021 }
2022 
2023 void
emitCachingMode(CacheMode c,const int pos)2024 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
2025 {
2026    uint8_t n;
2027 
2028    switch (c) {
2029    case CACHE_CA:
2030 // case CACHE_WB:
2031       n = 0;
2032       break;
2033    case CACHE_CG:
2034       n = 1;
2035       break;
2036    case CACHE_CS:
2037       n = 2;
2038       break;
2039    case CACHE_CV:
2040 // case CACHE_WT:
2041       n = 3;
2042       break;
2043    default:
2044       n = 0;
2045       assert(!"invalid caching mode");
2046       break;
2047    }
2048    code[pos / 32] |= n << (pos % 32);
2049 }
2050 
2051 void
emitSTORE(const Instruction * i)2052 CodeEmitterGK110::emitSTORE(const Instruction *i)
2053 {
2054    int32_t offset = SDATA(i->src(0)).offset;
2055 
2056    switch (i->src(0).getFile()) {
2057    case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
2058    case FILE_MEMORY_LOCAL:  code[1] = 0x7a800000; code[0] = 0x00000002; break;
2059    case FILE_MEMORY_SHARED:
2060       code[0] = 0x00000002;
2061       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
2062          code[1] = 0x78400000;
2063       else
2064          code[1] = 0x7ac00000;
2065       break;
2066    default:
2067       assert(!"invalid memory file");
2068       break;
2069    }
2070 
2071    if (code[0] & 0x2) {
2072       offset &= 0xffffff;
2073       emitLoadStoreType(i->dType, 0x33);
2074       if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2075          emitCachingMode(i->cache, 0x2f);
2076    } else {
2077       emitLoadStoreType(i->dType, 0x38);
2078       emitCachingMode(i->cache, 0x3b);
2079    }
2080    code[0] |= offset << 23;
2081    code[1] |= offset >> 9;
2082 
2083    // Unlocked store on shared memory can fail.
2084    if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2085        i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
2086       assert(i->defExists(0));
2087       defId(i->def(0), 32 + 16);
2088    }
2089 
2090    emitPredicate(i);
2091 
2092    srcId(i->src(1), 2);
2093    srcId(i->src(0).getIndirect(0), 10);
2094    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2095        i->src(0).isIndirect(0) &&
2096        i->getIndirect(0, 0)->reg.size == 8)
2097       code[1] |= 1 << 23;
2098 }
2099 
2100 void
emitLOAD(const Instruction * i)2101 CodeEmitterGK110::emitLOAD(const Instruction *i)
2102 {
2103    int32_t offset = SDATA(i->src(0)).offset;
2104 
2105    switch (i->src(0).getFile()) {
2106    case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
2107    case FILE_MEMORY_LOCAL:  code[1] = 0x7a000000; code[0] = 0x00000002; break;
2108    case FILE_MEMORY_SHARED:
2109       code[0] = 0x00000002;
2110       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
2111          code[1] = 0x77400000;
2112       else
2113          code[1] = 0x7a400000;
2114       break;
2115    case FILE_MEMORY_CONST:
2116       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
2117          emitMOV(i);
2118          return;
2119       }
2120       offset &= 0xffff;
2121       code[0] = 0x00000002;
2122       code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
2123       code[1] |= i->subOp << 15;
2124       break;
2125    default:
2126       assert(!"invalid memory file");
2127       break;
2128    }
2129 
2130    if (code[0] & 0x2) {
2131       offset &= 0xffffff;
2132       emitLoadStoreType(i->dType, 0x33);
2133       if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2134          emitCachingMode(i->cache, 0x2f);
2135    } else {
2136       emitLoadStoreType(i->dType, 0x38);
2137       emitCachingMode(i->cache, 0x3b);
2138    }
2139    code[0] |= offset << 23;
2140    code[1] |= offset >> 9;
2141 
2142    // Locked store on shared memory can fail.
2143    int r = 0, p = -1;
2144    if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2145        i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
2146       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2147          r = -1;
2148          p = 0;
2149       } else if (i->defExists(1)) { // r, p
2150          p = 1;
2151       } else {
2152          assert(!"Expected predicate dest for load locked");
2153       }
2154    }
2155 
2156    emitPredicate(i);
2157 
2158    if (r >= 0)
2159       defId(i->def(r), 2);
2160    else
2161       code[0] |= 255 << 2;
2162 
2163    if (p >= 0)
2164       defId(i->def(p), 32 + 16);
2165 
2166    if (i->getIndirect(0, 0)) {
2167       srcId(i->src(0).getIndirect(0), 10);
2168       if (i->getIndirect(0, 0)->reg.size == 8)
2169          code[1] |= 1 << 23;
2170    } else {
2171       code[0] |= 255 << 10;
2172    }
2173 }
2174 
2175 uint8_t
getSRegEncoding(const ValueRef & ref)2176 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
2177 {
2178    switch (SDATA(ref).sv.sv) {
2179    case SV_LANEID:        return 0x00;
2180    case SV_PHYSID:        return 0x03;
2181    case SV_VERTEX_COUNT:  return 0x10;
2182    case SV_INVOCATION_ID: return 0x11;
2183    case SV_YDIR:          return 0x12;
2184    case SV_THREAD_KILL:   return 0x13;
2185    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
2186    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
2187    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
2188    case SV_GRIDID:        return 0x2c;
2189    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
2190    case SV_LBASE:         return 0x34;
2191    case SV_SBASE:         return 0x30;
2192    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
2193    default:
2194       assert(!"no sreg for system value");
2195       return 0;
2196    }
2197 }
2198 
2199 void
emitMOV(const Instruction * i)2200 CodeEmitterGK110::emitMOV(const Instruction *i)
2201 {
2202    if (i->def(0).getFile() == FILE_PREDICATE) {
2203       if (i->src(0).getFile() == FILE_GPR) {
2204          // Use ISETP.NE.AND dst, PT, src, RZ, PT
2205          code[0] = 0x00000002;
2206          code[1] = 0xdb500000;
2207 
2208          code[0] |= 0x7 << 2;
2209          code[0] |= 0xff << 23;
2210          code[1] |= 0x7 << 10;
2211          srcId(i->src(0), 10);
2212       } else
2213       if (i->src(0).getFile() == FILE_PREDICATE) {
2214          // Use PSETP.AND.AND dst, PT, src, PT, PT
2215          code[0] = 0x00000002;
2216          code[1] = 0x84800000;
2217 
2218          code[0] |= 0x7 << 2;
2219          code[1] |= 0x7 << 0;
2220          code[1] |= 0x7 << 10;
2221 
2222          srcId(i->src(0), 14);
2223       } else {
2224          assert(!"Unexpected source for predicate destination");
2225          emitNOP(i);
2226       }
2227       emitPredicate(i);
2228       defId(i->def(0), 5);
2229    } else
2230    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2231       code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
2232       code[1] = 0x86400000;
2233       emitPredicate(i);
2234       defId(i->def(0), 2);
2235    } else
2236    if (i->src(0).getFile() == FILE_IMMEDIATE) {
2237       code[0] = 0x00000002 | (i->lanes << 14);
2238       code[1] = 0x74000000;
2239       emitPredicate(i);
2240       defId(i->def(0), 2);
2241       setImmediate32(i, 0, Modifier(0));
2242    } else
2243    if (i->src(0).getFile() == FILE_PREDICATE) {
2244       code[0] = 0x00000002;
2245       code[1] = 0x84401c07;
2246       emitPredicate(i);
2247       defId(i->def(0), 2);
2248       srcId(i->src(0), 14);
2249    } else {
2250       emitForm_C(i, 0x24c, 2);
2251       code[1] |= i->lanes << 10;
2252    }
2253 }
2254 
2255 static inline bool
uses64bitAddress(const Instruction * ldst)2256 uses64bitAddress(const Instruction *ldst)
2257 {
2258    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2259       ldst->src(0).isIndirect(0) &&
2260       ldst->getIndirect(0, 0)->reg.size == 8;
2261 }
2262 
2263 void
emitATOM(const Instruction * i)2264 CodeEmitterGK110::emitATOM(const Instruction *i)
2265 {
2266    const bool hasDst = i->defExists(0);
2267    const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
2268 
2269    code[0] = 0x00000002;
2270    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
2271       code[1] = 0x77800000;
2272    else
2273       code[1] = 0x68000000;
2274 
2275    switch (i->subOp) {
2276    case NV50_IR_SUBOP_ATOM_CAS: break;
2277    case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
2278    default: code[1] |= i->subOp << 23; break;
2279    }
2280 
2281    switch (i->dType) {
2282    case TYPE_U32: break;
2283    case TYPE_S32: code[1] |= 0x00100000; break;
2284    case TYPE_U64: code[1] |= 0x00200000; break;
2285    case TYPE_F32: code[1] |= 0x00300000; break;
2286    case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
2287    case TYPE_S64: code[1] |= 0x00500000; break;
2288    default: assert(!"unsupported type"); break;
2289    }
2290 
2291    emitPredicate(i);
2292 
2293    /* TODO: cas: check that src regs line up */
2294    /* TODO: cas: flip bits if $r255 is used */
2295    srcId(i->src(1), 23);
2296 
2297    if (hasDst) {
2298       defId(i->def(0), 2);
2299    } else
2300    if (!exch) {
2301       code[0] |= 255 << 2;
2302    }
2303 
2304    if (hasDst || !exch) {
2305       const int32_t offset = SDATA(i->src(0)).offset;
2306       assert(offset < 0x80000 && offset >= -0x80000);
2307       code[0] |= (offset & 1) << 31;
2308       code[1] |= (offset & 0xffffe) >> 1;
2309    } else {
2310       srcAddr32(i->src(0), 31);
2311    }
2312 
2313    if (i->getIndirect(0, 0)) {
2314       srcId(i->getIndirect(0, 0), 10);
2315       if (i->getIndirect(0, 0)->reg.size == 8)
2316          code[1] |= 1 << 19;
2317    } else {
2318       code[0] |= 255 << 10;
2319    }
2320 }
2321 
2322 void
emitCCTL(const Instruction * i)2323 CodeEmitterGK110::emitCCTL(const Instruction *i)
2324 {
2325    int32_t offset = SDATA(i->src(0)).offset;
2326 
2327    code[0] = 0x00000002 | (i->subOp << 2);
2328 
2329    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2330       code[1] = 0x7b000000;
2331    } else {
2332       code[1] = 0x7c000000;
2333       offset &= 0xffffff;
2334    }
2335    code[0] |= offset << 23;
2336    code[1] |= offset >> 9;
2337 
2338    if (uses64bitAddress(i))
2339       code[1] |= 1 << 23;
2340    srcId(i->src(0).getIndirect(0), 10);
2341 
2342    emitPredicate(i);
2343 }
2344 
2345 bool
emitInstruction(Instruction * insn)2346 CodeEmitterGK110::emitInstruction(Instruction *insn)
2347 {
2348    const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
2349 
2350    if (insn->encSize != 8) {
2351       ERROR("skipping unencodable instruction: ");
2352       insn->print();
2353       return false;
2354    } else
2355    if (codeSize + size > codeSizeLimit) {
2356       ERROR("code emitter output buffer too small\n");
2357       return false;
2358    }
2359 
2360    if (writeIssueDelays) {
2361       int id = (codeSize & 0x3f) / 8 - 1;
2362       if (id < 0) {
2363          id += 1;
2364          code[0] = 0x00000000; // cf issue delay "instruction"
2365          code[1] = 0x08000000;
2366          code += 2;
2367          codeSize += 8;
2368       }
2369       uint32_t *data = code - (id * 2 + 2);
2370 
2371       switch (id) {
2372       case 0: data[0] |= insn->sched << 2; break;
2373       case 1: data[0] |= insn->sched << 10; break;
2374       case 2: data[0] |= insn->sched << 18; break;
2375       case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
2376       case 4: data[1] |= insn->sched << 2; break;
2377       case 5: data[1] |= insn->sched << 10; break;
2378       case 6: data[1] |= insn->sched << 18; break;
2379       default:
2380          assert(0);
2381          break;
2382       }
2383    }
2384 
2385    // assert that instructions with multiple defs don't corrupt registers
2386    for (int d = 0; insn->defExists(d); ++d)
2387       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2388 
2389    switch (insn->op) {
2390    case OP_MOV:
2391    case OP_RDSV:
2392       emitMOV(insn);
2393       break;
2394    case OP_NOP:
2395       break;
2396    case OP_LOAD:
2397       emitLOAD(insn);
2398       break;
2399    case OP_STORE:
2400       emitSTORE(insn);
2401       break;
2402    case OP_LINTERP:
2403    case OP_PINTERP:
2404       emitINTERP(insn);
2405       break;
2406    case OP_VFETCH:
2407       emitVFETCH(insn);
2408       break;
2409    case OP_EXPORT:
2410       emitEXPORT(insn);
2411       break;
2412    case OP_AFETCH:
2413       emitAFETCH(insn);
2414       break;
2415    case OP_PFETCH:
2416       emitPFETCH(insn);
2417       break;
2418    case OP_EMIT:
2419    case OP_RESTART:
2420       emitOUT(insn);
2421       break;
2422    case OP_ADD:
2423    case OP_SUB:
2424       if (insn->dType == TYPE_F64)
2425          emitDADD(insn);
2426       else if (isFloatType(insn->dType))
2427          emitFADD(insn);
2428       else
2429          emitUADD(insn);
2430       break;
2431    case OP_MUL:
2432       if (insn->dType == TYPE_F64)
2433          emitDMUL(insn);
2434       else if (isFloatType(insn->dType))
2435          emitFMUL(insn);
2436       else
2437          emitIMUL(insn);
2438       break;
2439    case OP_MAD:
2440    case OP_FMA:
2441       if (insn->dType == TYPE_F64)
2442          emitDMAD(insn);
2443       else if (isFloatType(insn->dType))
2444          emitFMAD(insn);
2445       else
2446          emitIMAD(insn);
2447       break;
2448    case OP_MADSP:
2449       emitMADSP(insn);
2450       break;
2451    case OP_SAD:
2452       emitISAD(insn);
2453       break;
2454    case OP_SHLADD:
2455       emitSHLADD(insn);
2456       break;
2457    case OP_NOT:
2458       emitNOT(insn);
2459       break;
2460    case OP_AND:
2461       emitLogicOp(insn, 0);
2462       break;
2463    case OP_OR:
2464       emitLogicOp(insn, 1);
2465       break;
2466    case OP_XOR:
2467       emitLogicOp(insn, 2);
2468       break;
2469    case OP_SHL:
2470    case OP_SHR:
2471       emitShift(insn);
2472       break;
2473    case OP_SET:
2474    case OP_SET_AND:
2475    case OP_SET_OR:
2476    case OP_SET_XOR:
2477       emitSET(insn->asCmp());
2478       break;
2479    case OP_SELP:
2480       emitSELP(insn);
2481       break;
2482    case OP_SLCT:
2483       emitSLCT(insn->asCmp());
2484       break;
2485    case OP_MIN:
2486    case OP_MAX:
2487       emitMINMAX(insn);
2488       break;
2489    case OP_ABS:
2490    case OP_NEG:
2491    case OP_CEIL:
2492    case OP_FLOOR:
2493    case OP_TRUNC:
2494    case OP_SAT:
2495       emitCVT(insn);
2496       break;
2497    case OP_CVT:
2498       if (insn->def(0).getFile() == FILE_PREDICATE ||
2499           insn->src(0).getFile() == FILE_PREDICATE)
2500          emitMOV(insn);
2501       else
2502          emitCVT(insn);
2503       break;
2504    case OP_RSQ:
2505       emitSFnOp(insn, 5 + 2 * insn->subOp);
2506       break;
2507    case OP_RCP:
2508       emitSFnOp(insn, 4 + 2 * insn->subOp);
2509       break;
2510    case OP_LG2:
2511       emitSFnOp(insn, 3);
2512       break;
2513    case OP_EX2:
2514       emitSFnOp(insn, 2);
2515       break;
2516    case OP_SIN:
2517       emitSFnOp(insn, 1);
2518       break;
2519    case OP_COS:
2520       emitSFnOp(insn, 0);
2521       break;
2522    case OP_PRESIN:
2523    case OP_PREEX2:
2524       emitPreOp(insn);
2525       break;
2526    case OP_TEX:
2527    case OP_TXB:
2528    case OP_TXL:
2529    case OP_TXD:
2530    case OP_TXF:
2531    case OP_TXG:
2532    case OP_TXLQ:
2533       emitTEX(insn->asTex());
2534       break;
2535    case OP_TXQ:
2536       emitTXQ(insn->asTex());
2537       break;
2538    case OP_TEXBAR:
2539       emitTEXBAR(insn);
2540       break;
2541    case OP_PIXLD:
2542       emitPIXLD(insn);
2543       break;
2544    case OP_BRA:
2545    case OP_CALL:
2546    case OP_PRERET:
2547    case OP_RET:
2548    case OP_DISCARD:
2549    case OP_EXIT:
2550    case OP_PRECONT:
2551    case OP_CONT:
2552    case OP_PREBREAK:
2553    case OP_BREAK:
2554    case OP_JOINAT:
2555    case OP_BRKPT:
2556    case OP_QUADON:
2557    case OP_QUADPOP:
2558       emitFlow(insn);
2559       break;
2560    case OP_QUADOP:
2561       emitQUADOP(insn, insn->subOp, insn->lanes);
2562       break;
2563    case OP_DFDX:
2564       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2565       break;
2566    case OP_DFDY:
2567       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2568       break;
2569    case OP_POPCNT:
2570       emitPOPC(insn);
2571       break;
2572    case OP_INSBF:
2573       emitINSBF(insn);
2574       break;
2575    case OP_EXTBF:
2576       emitEXTBF(insn);
2577       break;
2578    case OP_BFIND:
2579       emitBFIND(insn);
2580       break;
2581    case OP_PERMT:
2582       emitPERMT(insn);
2583       break;
2584    case OP_JOIN:
2585       emitNOP(insn);
2586       insn->join = 1;
2587       break;
2588    case OP_BAR:
2589       emitBAR(insn);
2590       break;
2591    case OP_MEMBAR:
2592       emitMEMBAR(insn);
2593       break;
2594    case OP_ATOM:
2595       emitATOM(insn);
2596       break;
2597    case OP_CCTL:
2598       emitCCTL(insn);
2599       break;
2600    case OP_VOTE:
2601       emitVOTE(insn);
2602       break;
2603    case OP_SULDB:
2604       emitSULDGB(insn->asTex());
2605       break;
2606    case OP_SUSTB:
2607    case OP_SUSTP:
2608       emitSUSTGx(insn->asTex());
2609       break;
2610    case OP_SUBFM:
2611    case OP_SUCLAMP:
2612    case OP_SUEAU:
2613       emitSUCalc(insn);
2614       break;
2615    case OP_VSHL:
2616       emitVSHL(insn);
2617       break;
2618    case OP_PHI:
2619    case OP_UNION:
2620    case OP_CONSTRAINT:
2621       ERROR("operation should have been eliminated");
2622       return false;
2623    case OP_EXP:
2624    case OP_LOG:
2625    case OP_SQRT:
2626    case OP_POW:
2627       ERROR("operation should have been lowered\n");
2628       return false;
2629    default:
2630       ERROR("unknown op: %u\n", insn->op);
2631       return false;
2632    }
2633 
2634    if (insn->join)
2635       code[0] |= 1 << 22;
2636 
2637    code += 2;
2638    codeSize += 8;
2639    return true;
2640 }
2641 
2642 uint32_t
getMinEncodingSize(const Instruction * i) const2643 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
2644 {
2645    // No more short instruction encodings.
2646    return 8;
2647 }
2648 
2649 void
prepareEmission(Function * func)2650 CodeEmitterGK110::prepareEmission(Function *func)
2651 {
2652    const Target *targ = func->getProgram()->getTarget();
2653 
2654    CodeEmitter::prepareEmission(func);
2655 
2656    if (targ->hasSWSched)
2657       calculateSchedDataNVC0(targ, func);
2658 }
2659 
CodeEmitterGK110(const TargetNVC0 * target)2660 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
2661    : CodeEmitter(target),
2662      targNVC0(target),
2663      writeIssueDelays(target->hasSWSched)
2664 {
2665    code = NULL;
2666    codeSize = codeSizeLimit = 0;
2667    relocInfo = NULL;
2668 }
2669 
2670 CodeEmitter *
createCodeEmitterGK110(Program::Type type)2671 TargetNVC0::createCodeEmitterGK110(Program::Type type)
2672 {
2673    CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
2674    emit->setProgramType(type);
2675    return emit;
2676 }
2677 
2678 } // namespace nv50_ir
2679