• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir_target_nvc0.h"
24 
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26 
27 namespace nv50_ir {
28 
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32    CodeEmitterGK110(const TargetNVC0 *);
33 
34    virtual bool emitInstruction(Instruction *);
35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
36    virtual void prepareEmission(Function *);
37 
setProgramType(Program::Type pType)38    inline void setProgramType(Program::Type pType) { progType = pType; }
39 
40 private:
41    const TargetNVC0 *targNVC0;
42 
43    Program::Type progType;
44 
45    const bool writeIssueDelays;
46 
47 private:
48    void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
49    void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
50    void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3);
51 
52    void emitPredicate(const Instruction *);
53 
54    void setCAddress14(const ValueRef&);
55    void setShortImmediate(const Instruction *, const int s);
56    void setImmediate32(const Instruction *, const int s, Modifier);
57    void setSUConst16(const Instruction *, const int s);
58 
59    void modNegAbsF32_3b(const Instruction *, const int s);
60 
61    void emitCondCode(CondCode cc, int pos, uint8_t mask);
62    void emitInterpMode(const Instruction *);
63    void emitLoadStoreType(DataType ty, const int pos);
64    void emitCachingMode(CacheMode c, const int pos);
65    void emitSUGType(DataType, const int pos);
66    void emitSUCachingMode(CacheMode c);
67 
68    inline uint8_t getSRegEncoding(const ValueRef&);
69 
70    void emitRoundMode(RoundMode, const int pos, const int rintPos);
71    void emitRoundModeF(RoundMode, const int pos);
72    void emitRoundModeI(RoundMode, const int pos);
73 
74    void emitNegAbs12(const Instruction *);
75 
76    void emitNOP(const Instruction *);
77 
78    void emitLOAD(const Instruction *);
79    void emitSTORE(const Instruction *);
80    void emitMOV(const Instruction *);
81    void emitATOM(const Instruction *);
82    void emitCCTL(const Instruction *);
83 
84    void emitINTERP(const Instruction *);
85    void emitAFETCH(const Instruction *);
86    void emitPFETCH(const Instruction *);
87    void emitVFETCH(const Instruction *);
88    void emitEXPORT(const Instruction *);
89    void emitOUT(const Instruction *);
90 
91    void emitUADD(const Instruction *);
92    void emitFADD(const Instruction *);
93    void emitDADD(const Instruction *);
94    void emitIMUL(const Instruction *);
95    void emitFMUL(const Instruction *);
96    void emitDMUL(const Instruction *);
97    void emitIMAD(const Instruction *);
98    void emitISAD(const Instruction *);
99    void emitSHLADD(const Instruction *);
100    void emitFMAD(const Instruction *);
101    void emitDMAD(const Instruction *);
102    void emitMADSP(const Instruction *i);
103 
104    void emitNOT(const Instruction *);
105    void emitLogicOp(const Instruction *, uint8_t subOp);
106    void emitPOPC(const Instruction *);
107    void emitINSBF(const Instruction *);
108    void emitEXTBF(const Instruction *);
109    void emitBFIND(const Instruction *);
110    void emitPERMT(const Instruction *);
111    void emitShift(const Instruction *);
112    void emitShift64(const Instruction *);
113 
114    void emitSFnOp(const Instruction *, uint8_t subOp);
115 
116    void emitCVT(const Instruction *);
117    void emitMINMAX(const Instruction *);
118    void emitPreOp(const Instruction *);
119 
120    void emitSET(const CmpInstruction *);
121    void emitSLCT(const CmpInstruction *);
122    void emitSELP(const Instruction *);
123 
124    void emitTEXBAR(const Instruction *);
125    void emitTEX(const TexInstruction *);
126    void emitTEXCSAA(const TexInstruction *);
127    void emitTXQ(const TexInstruction *);
128 
129    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
130 
131    void emitPIXLD(const Instruction *);
132 
133    void emitBAR(const Instruction *);
134    void emitMEMBAR(const Instruction *);
135 
136    void emitFlow(const Instruction *);
137 
138    void emitSHFL(const Instruction *);
139 
140    void emitVOTE(const Instruction *);
141 
142    void emitSULDGB(const TexInstruction *);
143    void emitSUSTGx(const TexInstruction *);
144    void emitSUCLAMPMode(uint16_t);
145    void emitSUCalc(Instruction *);
146 
147    void emitVSHL(const Instruction *);
148    void emitVectorSubOp(const Instruction *);
149 
150    inline void defId(const ValueDef&, const int pos);
151    inline void srcId(const ValueRef&, const int pos);
152    inline void srcId(const ValueRef *, const int pos);
153    inline void srcId(const Instruction *, int s, const int pos);
154 
155    inline void srcAddr32(const ValueRef&, const int pos); // address / 4
156 
157    inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
158 };
159 
160 #define GK110_GPR_ZERO 255
161 
162 #define NEG_(b, s) \
163    if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
164 #define ABS_(b, s) \
165    if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
166 
167 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))       \
168    code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
169 
170 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
171 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
172 
173 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
174 
175 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
176 
177 #define SDATA(a) ((a).rep()->reg.data)
178 #define DDATA(a) ((a).rep()->reg.data)
179 
srcId(const ValueRef & src,const int pos)180 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
181 {
182    code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
183 }
184 
srcId(const ValueRef * src,const int pos)185 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
186 {
187    code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
188 }
189 
srcId(const Instruction * insn,int s,int pos)190 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
191 {
192    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
193    code[pos / 32] |= r << (pos % 32);
194 }
195 
srcAddr32(const ValueRef & src,const int pos)196 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
197 {
198    code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
199 }
200 
defId(const ValueDef & def,const int pos)201 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
202 {
203    code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
204 }
205 
isLIMM(const ValueRef & ref,DataType ty,bool mod)206 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
207 {
208    const ImmediateValue *imm = ref.get()->asImm();
209 
210    if (ty == TYPE_F32)
211       return imm && imm->reg.data.u32 & 0xfff;
212    else
213       return imm && (imm->reg.data.s32 > 0x7ffff ||
214                      imm->reg.data.s32 < -0x80000);
215 }
216 
217 void
emitRoundMode(RoundMode rnd,const int pos,const int rintPos)218 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
219 {
220    bool rint = false;
221    uint8_t n;
222 
223    switch (rnd) {
224    case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
225    case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
226    case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
227    default:
228       rint = rnd == ROUND_NI;
229       n = 0;
230       assert(rnd == ROUND_N || rnd == ROUND_NI);
231       break;
232    }
233    code[pos / 32] |= n << (pos % 32);
234    if (rint && rintPos >= 0)
235       code[rintPos / 32] |= 1 << (rintPos % 32);
236 }
237 
238 void
emitRoundModeF(RoundMode rnd,const int pos)239 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
240 {
241    uint8_t n;
242 
243    switch (rnd) {
244    case ROUND_M: n = 1; break;
245    case ROUND_P: n = 2; break;
246    case ROUND_Z: n = 3; break;
247    default:
248       n = 0;
249       assert(rnd == ROUND_N);
250       break;
251    }
252    code[pos / 32] |= n << (pos % 32);
253 }
254 
255 void
emitRoundModeI(RoundMode rnd,const int pos)256 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
257 {
258    uint8_t n;
259 
260    switch (rnd) {
261    case ROUND_MI: n = 1; break;
262    case ROUND_PI: n = 2; break;
263    case ROUND_ZI: n = 3; break;
264    default:
265       n = 0;
266       assert(rnd == ROUND_NI);
267       break;
268    }
269    code[pos / 32] |= n << (pos % 32);
270 }
271 
emitCondCode(CondCode cc,int pos,uint8_t mask)272 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
273 {
274    uint8_t n;
275 
276    switch (cc) {
277    case CC_FL:  n = 0x00; break;
278    case CC_LT:  n = 0x01; break;
279    case CC_EQ:  n = 0x02; break;
280    case CC_LE:  n = 0x03; break;
281    case CC_GT:  n = 0x04; break;
282    case CC_NE:  n = 0x05; break;
283    case CC_GE:  n = 0x06; break;
284    case CC_LTU: n = 0x09; break;
285    case CC_EQU: n = 0x0a; break;
286    case CC_LEU: n = 0x0b; break;
287    case CC_GTU: n = 0x0c; break;
288    case CC_NEU: n = 0x0d; break;
289    case CC_GEU: n = 0x0e; break;
290    case CC_TR:  n = 0x0f; break;
291    case CC_NO:  n = 0x10; break;
292    case CC_NC:  n = 0x11; break;
293    case CC_NS:  n = 0x12; break;
294    case CC_NA:  n = 0x13; break;
295    case CC_A:   n = 0x14; break;
296    case CC_S:   n = 0x15; break;
297    case CC_C:   n = 0x16; break;
298    case CC_O:   n = 0x17; break;
299    default:
300       n = 0;
301       assert(!"invalid condition code");
302       break;
303    }
304    code[pos / 32] |= (n & mask) << (pos % 32);
305 }
306 
307 void
emitPredicate(const Instruction * i)308 CodeEmitterGK110::emitPredicate(const Instruction *i)
309 {
310    if (i->predSrc >= 0) {
311       srcId(i->src(i->predSrc), 18);
312       if (i->cc == CC_NOT_P)
313          code[0] |= 8 << 18; // negate
314       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
315    } else {
316       code[0] |= 7 << 18;
317    }
318 }
319 
320 void
setCAddress14(const ValueRef & src)321 CodeEmitterGK110::setCAddress14(const ValueRef& src)
322 {
323    const Storage& res = src.get()->asSym()->reg;
324    const int32_t addr = res.data.offset / 4;
325 
326    code[0] |= (addr & 0x01ff) << 23;
327    code[1] |= (addr & 0x3e00) >> 9;
328    code[1] |= res.fileIndex << 5;
329 }
330 
331 void
setShortImmediate(const Instruction * i,const int s)332 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
333 {
334    const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
335    const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
336 
337    if (i->sType == TYPE_F32) {
338       assert(!(u32 & 0x00000fff));
339       code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
340       code[1] |= ((u32 & 0x7fe00000) >> 21);
341       code[1] |= ((u32 & 0x80000000) >> 4);
342    } else
343    if (i->sType == TYPE_F64) {
344       assert(!(u64 & 0x00000fffffffffffULL));
345       code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
346       code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
347       code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
348    } else {
349       assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);
350       code[0] |= (u32 & 0x001ff) << 23;
351       code[1] |= (u32 & 0x7fe00) >> 9;
352       code[1] |= (u32 & 0x80000) << 8;
353    }
354 }
355 
356 void
setImmediate32(const Instruction * i,const int s,Modifier mod)357 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
358                                  Modifier mod)
359 {
360    uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
361 
362    if (mod) {
363       ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
364       mod.applyTo(imm);
365       u32 = imm.reg.data.u32;
366    }
367 
368    code[0] |= u32 << 23;
369    code[1] |= u32 >> 9;
370 }
371 
372 void
emitForm_L(const Instruction * i,uint32_t opc,uint8_t ctg,Modifier mod,int sCount)373 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
374                              Modifier mod, int sCount)
375 {
376    code[0] = ctg;
377    code[1] = opc << 20;
378 
379    emitPredicate(i);
380 
381    defId(i->def(0), 2);
382 
383    for (int s = 0; s < sCount && i->srcExists(s); ++s) {
384       switch (i->src(s).getFile()) {
385       case FILE_GPR:
386          srcId(i->src(s), s ? 42 : 10);
387          break;
388       case FILE_IMMEDIATE:
389          setImmediate32(i, s, mod);
390          break;
391       default:
392          break;
393       }
394    }
395 }
396 
397 
398 void
emitForm_C(const Instruction * i,uint32_t opc,uint8_t ctg)399 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
400 {
401    code[0] = ctg;
402    code[1] = opc << 20;
403 
404    emitPredicate(i);
405 
406    defId(i->def(0), 2);
407 
408    switch (i->src(0).getFile()) {
409    case FILE_MEMORY_CONST:
410       code[1] |= 0x4 << 28;
411       setCAddress14(i->src(0));
412       break;
413    case FILE_GPR:
414       code[1] |= 0xc << 28;
415       srcId(i->src(0), 23);
416       break;
417    default:
418       assert(0);
419       break;
420    }
421 }
422 
423 // 0x2 for GPR, c[] and 0x1 for short immediate
424 void
emitForm_21(const Instruction * i,uint32_t opc2,uint32_t opc1)425 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
426                               uint32_t opc1)
427 {
428    const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
429 
430    int s1 = 23;
431    if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
432       s1 = 42;
433 
434    if (imm) {
435       code[0] = 0x1;
436       code[1] = opc1 << 20;
437    } else {
438       code[0] = 0x2;
439       code[1] = (0xc << 28) | (opc2 << 20);
440    }
441 
442    emitPredicate(i);
443 
444    defId(i->def(0), 2);
445 
446    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
447       switch (i->src(s).getFile()) {
448       case FILE_MEMORY_CONST:
449          code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
450          setCAddress14(i->src(s));
451          break;
452       case FILE_IMMEDIATE:
453          setShortImmediate(i, s);
454          break;
455       case FILE_GPR:
456          srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
457          break;
458       default:
459          if (i->op == OP_SELP) {
460             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
461             srcId(i->src(s), 42);
462          }
463          // ignore here, can be predicate or flags, but must not be address
464          break;
465       }
466    }
467    // 0x0 = invalid
468    // 0xc = rrr
469    // 0x8 = rrc
470    // 0x4 = rcr
471    assert(imm || (code[1] & (0xc << 28)));
472 }
473 
474 inline void
modNegAbsF32_3b(const Instruction * i,const int s)475 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
476 {
477    if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
478    if (i->src(s).mod.neg()) code[1] ^=  (1 << 27);
479 }
480 
481 void
emitNOP(const Instruction * i)482 CodeEmitterGK110::emitNOP(const Instruction *i)
483 {
484    code[0] = 0x00003c02;
485    code[1] = 0x85800000;
486 
487    if (i)
488       emitPredicate(i);
489    else
490       code[0] = 0x001c3c02;
491 }
492 
493 void
emitFMAD(const Instruction * i)494 CodeEmitterGK110::emitFMAD(const Instruction *i)
495 {
496    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
497 
498    if (isLIMM(i->src(1), TYPE_F32)) {
499       assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id);
500 
501       // last source is dst, so force 2 sources
502       emitForm_L(i, 0x600, 0x0, 0, 2);
503 
504       if (i->flagsDef >= 0)
505          code[1] |= 1 << 23;
506 
507       SAT_(3a);
508       NEG_(3c, 2);
509 
510       if (neg1) {
511          code[1] |= 1 << 27;
512       }
513    } else {
514       emitForm_21(i, 0x0c0, 0x940);
515 
516       NEG_(34, 2);
517       SAT_(35);
518       RND_(36, F);
519 
520       if (code[0] & 0x1) {
521          if (neg1)
522             code[1] ^= 1 << 27;
523       } else
524       if (neg1) {
525          code[1] |= 1 << 19;
526       }
527    }
528 
529    FTZ_(38);
530    DNZ_(39);
531 }
532 
533 void
emitDMAD(const Instruction * i)534 CodeEmitterGK110::emitDMAD(const Instruction *i)
535 {
536    assert(!i->saturate);
537    assert(!i->ftz);
538 
539    emitForm_21(i, 0x1b8, 0xb38);
540 
541    NEG_(34, 2);
542    RND_(36, F);
543 
544    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
545 
546    if (code[0] & 0x1) {
547       if (neg1)
548          code[1] ^= 1 << 27;
549    } else
550    if (neg1) {
551       code[1] |= 1 << 19;
552    }
553 }
554 
555 void
emitMADSP(const Instruction * i)556 CodeEmitterGK110::emitMADSP(const Instruction *i)
557 {
558    emitForm_21(i, 0x140, 0xa40);
559 
560    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
561       code[1] |= 0x00c00000;
562    } else {
563       code[1] |= (i->subOp & 0x00f) << 19; // imadp1
564       code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
565       code[1] |= (i->subOp & 0x100) << 11; // imadp3
566       code[1] |= (i->subOp & 0x200) << 15; // imadp3
567       code[1] |= (i->subOp & 0xc00) << 12; // imadp3
568    }
569 
570    if (i->flagsDef >= 0)
571       code[1] |= 1 << 18;
572 }
573 
574 void
emitFMUL(const Instruction * i)575 CodeEmitterGK110::emitFMUL(const Instruction *i)
576 {
577    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
578 
579    assert(i->postFactor >= -3 && i->postFactor <= 3);
580 
581    if (isLIMM(i->src(1), TYPE_F32)) {
582       emitForm_L(i, 0x200, 0x2, Modifier(0));
583 
584       FTZ_(38);
585       DNZ_(39);
586       SAT_(3a);
587       if (neg)
588          code[1] ^= 1 << 22;
589 
590       assert(i->postFactor == 0);
591    } else {
592       emitForm_21(i, 0x234, 0xc34);
593       code[1] |= ((i->postFactor > 0) ?
594                   (7 - i->postFactor) : (0 - i->postFactor)) << 12;
595 
596       RND_(2a, F);
597       FTZ_(2f);
598       DNZ_(30);
599       SAT_(35);
600 
601       if (code[0] & 0x1) {
602          if (neg)
603             code[1] ^= 1 << 27;
604       } else
605       if (neg) {
606          code[1] |= 1 << 19;
607       }
608    }
609 }
610 
611 void
emitDMUL(const Instruction * i)612 CodeEmitterGK110::emitDMUL(const Instruction *i)
613 {
614    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
615 
616    assert(!i->postFactor);
617    assert(!i->saturate);
618    assert(!i->ftz);
619    assert(!i->dnz);
620 
621    emitForm_21(i, 0x240, 0xc40);
622 
623    RND_(2a, F);
624 
625    if (code[0] & 0x1) {
626       if (neg)
627          code[1] ^= 1 << 27;
628    } else
629    if (neg) {
630       code[1] |= 1 << 19;
631    }
632 }
633 
634 void
emitIMUL(const Instruction * i)635 CodeEmitterGK110::emitIMUL(const Instruction *i)
636 {
637    assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
638    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
639 
640    if (isLIMM(i->src(1), TYPE_S32)) {
641       emitForm_L(i, 0x280, 2, Modifier(0));
642 
643       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
644          code[1] |= 1 << 24;
645       if (i->sType == TYPE_S32)
646          code[1] |= 3 << 25;
647    } else {
648       emitForm_21(i, 0x21c, 0xc1c);
649 
650       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
651          code[1] |= 1 << 10;
652       if (i->sType == TYPE_S32)
653          code[1] |= 3 << 11;
654    }
655 }
656 
657 void
emitFADD(const Instruction * i)658 CodeEmitterGK110::emitFADD(const Instruction *i)
659 {
660    if (isLIMM(i->src(1), TYPE_F32)) {
661       assert(i->rnd == ROUND_N);
662       assert(!i->saturate);
663 
664       Modifier mod = i->src(1).mod ^
665          Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
666 
667       emitForm_L(i, 0x400, 0, mod);
668 
669       FTZ_(3a);
670       NEG_(3b, 0);
671       ABS_(39, 0);
672    } else {
673       emitForm_21(i, 0x22c, 0xc2c);
674 
675       FTZ_(2f);
676       RND_(2a, F);
677       ABS_(31, 0);
678       NEG_(33, 0);
679       SAT_(35);
680 
681       if (code[0] & 0x1) {
682          modNegAbsF32_3b(i, 1);
683          if (i->op == OP_SUB) code[1] ^= 1 << 27;
684       } else {
685          ABS_(34, 1);
686          NEG_(30, 1);
687          if (i->op == OP_SUB) code[1] ^= 1 << 16;
688       }
689    }
690 }
691 
692 void
emitDADD(const Instruction * i)693 CodeEmitterGK110::emitDADD(const Instruction *i)
694 {
695    assert(!i->saturate);
696    assert(!i->ftz);
697 
698    emitForm_21(i, 0x238, 0xc38);
699    RND_(2a, F);
700    ABS_(31, 0);
701    NEG_(33, 0);
702    if (code[0] & 0x1) {
703       modNegAbsF32_3b(i, 1);
704       if (i->op == OP_SUB) code[1] ^= 1 << 27;
705    } else {
706       NEG_(30, 1);
707       ABS_(34, 1);
708       if (i->op == OP_SUB) code[1] ^= 1 << 16;
709    }
710 }
711 
712 void
emitUADD(const Instruction * i)713 CodeEmitterGK110::emitUADD(const Instruction *i)
714 {
715    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
716 
717    if (i->op == OP_SUB)
718       addOp ^= 1;
719 
720    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
721 
722    if (isLIMM(i->src(1), TYPE_S32)) {
723       emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
724 
725       if (addOp & 2)
726          code[1] |= 1 << 27;
727 
728       assert(i->flagsDef < 0);
729       assert(i->flagsSrc < 0);
730 
731       SAT_(39);
732    } else {
733       emitForm_21(i, 0x208, 0xc08);
734 
735       assert(addOp != 3); // would be add-plus-one
736 
737       code[1] |= addOp << 19;
738 
739       if (i->flagsDef >= 0)
740          code[1] |= 1 << 18; // write carry
741       if (i->flagsSrc >= 0)
742          code[1] |= 1 << 14; // add carry
743 
744       SAT_(35);
745    }
746 }
747 
748 void
emitIMAD(const Instruction * i)749 CodeEmitterGK110::emitIMAD(const Instruction *i)
750 {
751    uint8_t addOp =
752       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
753 
754    emitForm_21(i, 0x100, 0xa00);
755 
756    assert(addOp != 3);
757    code[1] |= addOp << 26;
758 
759    if (i->sType == TYPE_S32)
760       code[1] |= (1 << 19) | (1 << 24);
761 
762    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
763       code[1] |= 1 << 25;
764 
765    if (i->flagsDef >= 0) code[1] |= 1 << 18;
766    if (i->flagsSrc >= 0) code[1] |= 1 << 20;
767 
768    SAT_(35);
769 }
770 
771 void
emitISAD(const Instruction * i)772 CodeEmitterGK110::emitISAD(const Instruction *i)
773 {
774    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
775 
776    emitForm_21(i, 0x1f4, 0xb74);
777 
778    if (i->dType == TYPE_S32)
779       code[1] |= 1 << 19;
780 }
781 
782 void
emitSHLADD(const Instruction * i)783 CodeEmitterGK110::emitSHLADD(const Instruction *i)
784 {
785    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
786    const ImmediateValue *imm = i->src(1).get()->asImm();
787    assert(imm);
788 
789    if (i->src(2).getFile() == FILE_IMMEDIATE) {
790       code[0] = 0x1;
791       code[1] = 0xc0c << 20;
792    } else {
793       code[0] = 0x2;
794       code[1] = 0x20c << 20;
795    }
796    code[1] |= addOp << 19;
797 
798    emitPredicate(i);
799 
800    defId(i->def(0), 2);
801    srcId(i->src(0), 10);
802 
803    if (i->flagsDef >= 0)
804       code[1] |= 1 << 18;
805 
806    assert(!(imm->reg.data.u32 & 0xffffffe0));
807    code[1] |= imm->reg.data.u32 << 10;
808 
809    switch (i->src(2).getFile()) {
810    case FILE_GPR:
811       assert(code[0] & 0x2);
812       code[1] |= 0xc << 28;
813       srcId(i->src(2), 23);
814       break;
815    case FILE_MEMORY_CONST:
816       assert(code[0] & 0x2);
817       code[1] |= 0x4 << 28;
818       setCAddress14(i->src(2));
819       break;
820    case FILE_IMMEDIATE:
821       assert(code[0] & 0x1);
822       setShortImmediate(i, 2);
823       break;
824    default:
825       assert(!"bad src2 file");
826       break;
827    }
828 }
829 
830 void
emitNOT(const Instruction * i)831 CodeEmitterGK110::emitNOT(const Instruction *i)
832 {
833    code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
834    code[1] = 0x22003800;
835 
836    emitPredicate(i);
837 
838    defId(i->def(0), 2);
839 
840    switch (i->src(0).getFile()) {
841    case FILE_GPR:
842       code[1] |= 0xc << 28;
843       srcId(i->src(0), 23);
844       break;
845    case FILE_MEMORY_CONST:
846       code[1] |= 0x4 << 28;
847       setCAddress14(i->src(0));
848       break;
849    default:
850       assert(0);
851       break;
852    }
853 }
854 
855 void
emitLogicOp(const Instruction * i,uint8_t subOp)856 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
857 {
858    if (i->def(0).getFile() == FILE_PREDICATE) {
859       code[0] = 0x00000002 | (subOp << 27);
860       code[1] = 0x84800000;
861 
862       emitPredicate(i);
863 
864       defId(i->def(0), 5);
865       srcId(i->src(0), 14);
866       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
867       srcId(i->src(1), 32);
868       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
869 
870       if (i->defExists(1)) {
871          defId(i->def(1), 2);
872       } else {
873          code[0] |= 7 << 2;
874       }
875       // (a OP b) OP c
876       if (i->predSrc != 2 && i->srcExists(2)) {
877          code[1] |= subOp << 16;
878          srcId(i->src(2), 42);
879          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
880       } else {
881          code[1] |= 7 << 10;
882       }
883    } else
884    if (isLIMM(i->src(1), TYPE_S32)) {
885       emitForm_L(i, 0x200, 0, i->src(1).mod);
886       code[1] |= subOp << 24;
887       NOT_(3a, 0);
888    } else {
889       emitForm_21(i, 0x220, 0xc20);
890       code[1] |= subOp << 12;
891       NOT_(2a, 0);
892       NOT_(2b, 1);
893    }
894 }
895 
896 void
emitPOPC(const Instruction * i)897 CodeEmitterGK110::emitPOPC(const Instruction *i)
898 {
899    assert(!isLIMM(i->src(1), TYPE_S32, true));
900 
901    emitForm_21(i, 0x204, 0xc04);
902 
903    NOT_(2a, 0);
904    if (!(code[0] & 0x1))
905       NOT_(2b, 1);
906 }
907 
908 void
emitINSBF(const Instruction * i)909 CodeEmitterGK110::emitINSBF(const Instruction *i)
910 {
911    emitForm_21(i, 0x1f8, 0xb78);
912 }
913 
914 void
emitEXTBF(const Instruction * i)915 CodeEmitterGK110::emitEXTBF(const Instruction *i)
916 {
917    emitForm_21(i, 0x600, 0xc00);
918 
919    if (i->dType == TYPE_S32)
920       code[1] |= 0x80000;
921    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
922       code[1] |= 0x800;
923 }
924 
925 void
emitBFIND(const Instruction * i)926 CodeEmitterGK110::emitBFIND(const Instruction *i)
927 {
928    emitForm_C(i, 0x218, 0x2);
929 
930    if (i->dType == TYPE_S32)
931       code[1] |= 0x80000;
932    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
933       code[1] |= 0x800;
934    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
935       code[1] |= 0x1000;
936 }
937 
938 void
emitPERMT(const Instruction * i)939 CodeEmitterGK110::emitPERMT(const Instruction *i)
940 {
941    emitForm_21(i, 0x1e0, 0xb60);
942 
943    code[1] |= i->subOp << 19;
944 }
945 
946 void
emitShift(const Instruction * i)947 CodeEmitterGK110::emitShift(const Instruction *i)
948 {
949    if (i->op == OP_SHR) {
950       emitForm_21(i, 0x214, 0xc14);
951       if (isSignedType(i->dType))
952          code[1] |= 1 << 19;
953    } else {
954       emitForm_21(i, 0x224, 0xc24);
955    }
956 
957    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
958       code[1] |= 1 << 10;
959 }
960 
961 void
emitShift64(const Instruction * i)962 CodeEmitterGK110::emitShift64(const Instruction *i)
963 {
964    if (i->op == OP_SHR) {
965       emitForm_21(i, 0x27c, 0xc7c);
966       if (isSignedType(i->sType))
967          code[1] |= 0x100;
968       if (i->subOp & NV50_IR_SUBOP_SHIFT_HIGH)
969          code[1] |= 1 << 19;
970    } else {
971       emitForm_21(i, 0xdfc, 0xf7c);
972    }
973    code[1] |= 0x200;
974 
975    if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
976       code[1] |= 1 << 21;
977 }
978 
979 void
emitPreOp(const Instruction * i)980 CodeEmitterGK110::emitPreOp(const Instruction *i)
981 {
982    emitForm_C(i, 0x248, 0x2);
983 
984    if (i->op == OP_PREEX2)
985       code[1] |= 1 << 10;
986 
987    NEG_(30, 0);
988    ABS_(34, 0);
989 }
990 
991 void
emitSFnOp(const Instruction * i,uint8_t subOp)992 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
993 {
994    code[0] = 0x00000002 | (subOp << 23);
995    code[1] = 0x84000000;
996 
997    emitPredicate(i);
998 
999    defId(i->def(0), 2);
1000    srcId(i->src(0), 10);
1001 
1002    NEG_(33, 0);
1003    ABS_(31, 0);
1004    SAT_(35);
1005 }
1006 
1007 void
emitMINMAX(const Instruction * i)1008 CodeEmitterGK110::emitMINMAX(const Instruction *i)
1009 {
1010    uint32_t op2, op1;
1011 
1012    switch (i->dType) {
1013    case TYPE_U32:
1014    case TYPE_S32:
1015       op2 = 0x210;
1016       op1 = 0xc10;
1017       break;
1018    case TYPE_F32:
1019       op2 = 0x230;
1020       op1 = 0xc30;
1021       break;
1022    case TYPE_F64:
1023       op2 = 0x228;
1024       op1 = 0xc28;
1025       break;
1026    default:
1027       assert(0);
1028       op2 = 0;
1029       op1 = 0;
1030       break;
1031    }
1032    emitForm_21(i, op2, op1);
1033 
1034    if (i->dType == TYPE_S32)
1035       code[1] |= 1 << 19;
1036    code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
1037    code[1] |= i->subOp << 14;
1038    if (i->flagsDef >= 0)
1039       code[1] |= i->subOp << 18;
1040 
1041    FTZ_(2f);
1042    ABS_(31, 0);
1043    NEG_(33, 0);
1044    if (code[0] & 0x1) {
1045       modNegAbsF32_3b(i, 1);
1046    } else {
1047       ABS_(34, 1);
1048       NEG_(30, 1);
1049    }
1050 }
1051 
1052 void
emitCVT(const Instruction * i)1053 CodeEmitterGK110::emitCVT(const Instruction *i)
1054 {
1055    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1056    const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
1057    const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
1058 
1059    bool sat = i->saturate;
1060    bool abs = i->src(0).mod.abs();
1061    bool neg = i->src(0).mod.neg();
1062 
1063    RoundMode rnd = i->rnd;
1064 
1065    switch (i->op) {
1066    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1067    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1068    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1069    case OP_SAT: sat = true; break;
1070    case OP_NEG: neg = !neg; break;
1071    case OP_ABS: abs = true; neg = false; break;
1072    default:
1073       break;
1074    }
1075 
1076    DataType dType;
1077 
1078    if (i->op == OP_NEG && i->dType == TYPE_U32)
1079       dType = TYPE_S32;
1080    else
1081       dType = i->dType;
1082 
1083 
1084    uint32_t op;
1085 
1086    if      (f2f) op = 0x254;
1087    else if (f2i) op = 0x258;
1088    else if (i2f) op = 0x25c;
1089    else          op = 0x260;
1090 
1091    emitForm_C(i, op, 0x2);
1092 
1093    FTZ_(2f);
1094    if (neg) code[1] |= 1 << 16;
1095    if (abs) code[1] |= 1 << 20;
1096    if (sat) code[1] |= 1 << 21;
1097 
1098    emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
1099 
1100    code[0] |= typeSizeofLog2(dType) << 10;
1101    code[0] |= typeSizeofLog2(i->sType) << 12;
1102    code[1] |= i->subOp << 12;
1103 
1104    if (isSignedIntType(dType))
1105       code[0] |= 0x4000;
1106    if (isSignedIntType(i->sType))
1107       code[0] |= 0x8000;
1108 }
1109 
1110 void
emitSET(const CmpInstruction * i)1111 CodeEmitterGK110::emitSET(const CmpInstruction *i)
1112 {
1113    uint16_t op1, op2;
1114 
1115    if (i->def(0).getFile() == FILE_PREDICATE) {
1116       switch (i->sType) {
1117       case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
1118       case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
1119       default:
1120          op2 = 0x1b0;
1121          op1 = 0xb30;
1122          break;
1123       }
1124       emitForm_21(i, op2, op1);
1125 
1126       NEG_(2e, 0);
1127       ABS_(9, 0);
1128       if (!(code[0] & 0x1)) {
1129          NEG_(8, 1);
1130          ABS_(2f, 1);
1131       } else {
1132          modNegAbsF32_3b(i, 1);
1133       }
1134       FTZ_(32);
1135 
1136       // normal DST field is negated predicate result
1137       code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
1138       if (i->defExists(1))
1139          defId(i->def(1), 2);
1140       else
1141          code[0] |= 0x1c;
1142    } else {
1143       switch (i->sType) {
1144       case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
1145       case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
1146       default:
1147          op2 = 0x1a8;
1148          op1 = 0xb28;
1149          break;
1150       }
1151       emitForm_21(i, op2, op1);
1152 
1153       NEG_(2e, 0);
1154       ABS_(39, 0);
1155       if (!(code[0] & 0x1)) {
1156          NEG_(38, 1);
1157          ABS_(2f, 1);
1158       } else {
1159          modNegAbsF32_3b(i, 1);
1160       }
1161       FTZ_(3a);
1162 
1163       if (i->dType == TYPE_F32) {
1164          if (isFloatType(i->sType))
1165             code[1] |= 1 << 23;
1166          else
1167             code[1] |= 1 << 15;
1168       }
1169    }
1170    if (i->sType == TYPE_S32)
1171       code[1] |= 1 << 19;
1172 
1173    if (i->op != OP_SET) {
1174       switch (i->op) {
1175       case OP_SET_AND: code[1] |= 0x0 << 16; break;
1176       case OP_SET_OR:  code[1] |= 0x1 << 16; break;
1177       case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1178       default:
1179          assert(0);
1180          break;
1181       }
1182       srcId(i->src(2), 0x2a);
1183    } else {
1184       code[1] |= 0x7 << 10;
1185    }
1186    if (i->flagsSrc >= 0)
1187       code[1] |= 1 << 14;
1188    emitCondCode(i->setCond,
1189                 isFloatType(i->sType) ? 0x33 : 0x34,
1190                 isFloatType(i->sType) ? 0xf : 0x7);
1191 }
1192 
1193 void
emitSLCT(const CmpInstruction * i)1194 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1195 {
1196    CondCode cc = i->setCond;
1197    if (i->src(2).mod.neg())
1198       cc = reverseCondCode(cc);
1199 
1200    if (i->dType == TYPE_F32) {
1201       emitForm_21(i, 0x1d0, 0xb50);
1202       FTZ_(32);
1203       emitCondCode(cc, 0x33, 0xf);
1204    } else {
1205       emitForm_21(i, 0x1a0, 0xb20);
1206       emitCondCode(cc, 0x34, 0x7);
1207       if (i->dType == TYPE_S32)
1208          code[1] |= 1 << 19;
1209    }
1210 }
1211 
1212 void
gk110_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1213 gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1214 {
1215    int loc = entry->loc;
1216    if (data.force_persample_interp)
1217       code[loc + 1] |= 1 << 13;
1218    else
1219       code[loc + 1] &= ~(1 << 13);
1220 }
1221 
emitSELP(const Instruction * i)1222 void CodeEmitterGK110::emitSELP(const Instruction *i)
1223 {
1224    emitForm_21(i, 0x250, 0x050);
1225 
1226    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1227       code[1] |= 1 << 13;
1228 
1229    if (i->subOp == 1) {
1230       addInterp(0, 0, gk110_selpFlip);
1231    }
1232 }
1233 
emitTEXBAR(const Instruction * i)1234 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1235 {
1236    code[0] = 0x0000003e | (i->subOp << 23);
1237    code[1] = 0x77000000;
1238 
1239    emitPredicate(i);
1240 }
1241 
emitTEXCSAA(const TexInstruction * i)1242 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1243 {
1244    code[0] = 0x00000002;
1245    code[1] = 0x76c00000;
1246 
1247    code[1] |= i->tex.r << 9;
1248    // code[1] |= i->tex.s << (9 + 8);
1249 
1250    if (i->tex.liveOnly)
1251       code[0] |= 0x80000000;
1252 
1253    defId(i->def(0), 2);
1254    srcId(i->src(0), 10);
1255 }
1256 
1257 static inline bool
isNextIndependentTex(const TexInstruction * i)1258 isNextIndependentTex(const TexInstruction *i)
1259 {
1260    if (!i->next || !isTextureOp(i->next->op))
1261       return false;
1262    if (i->getDef(0)->interfers(i->next->getSrc(0)))
1263       return false;
1264    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1265 }
1266 
1267 void
emitTEX(const TexInstruction * i)1268 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1269 {
1270    const bool ind = i->tex.rIndirectSrc >= 0;
1271 
1272    if (ind) {
1273       code[0] = 0x00000002;
1274       switch (i->op) {
1275       case OP_TXD:
1276          code[1] = 0x7e000000;
1277          break;
1278       case OP_TXLQ:
1279          code[1] = 0x7e800000;
1280          break;
1281       case OP_TXF:
1282          code[1] = 0x78000000;
1283          break;
1284       case OP_TXG:
1285          code[1] = 0x7dc00000;
1286          break;
1287       default:
1288          code[1] = 0x7d800000;
1289          break;
1290       }
1291    } else {
1292       switch (i->op) {
1293       case OP_TXD:
1294          code[0] = 0x00000002;
1295          code[1] = 0x76000000;
1296          code[1] |= i->tex.r << 9;
1297          break;
1298       case OP_TXLQ:
1299          code[0] = 0x00000002;
1300          code[1] = 0x76800000;
1301          code[1] |= i->tex.r << 9;
1302          break;
1303       case OP_TXF:
1304          code[0] = 0x00000002;
1305          code[1] = 0x70000000;
1306          code[1] |= i->tex.r << 13;
1307          break;
1308       case OP_TXG:
1309          code[0] = 0x00000001;
1310          code[1] = 0x70000000;
1311          code[1] |= i->tex.r << 15;
1312          break;
1313       default:
1314          code[0] = 0x00000001;
1315          code[1] = 0x60000000;
1316          code[1] |= i->tex.r << 15;
1317          break;
1318       }
1319    }
1320 
1321    code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1322 
1323    if (i->tex.liveOnly)
1324       code[0] |= 0x80000000;
1325 
1326    switch (i->op) {
1327    case OP_TEX: break;
1328    case OP_TXB: code[1] |= 0x2000; break;
1329    case OP_TXL: code[1] |= 0x3000; break;
1330    case OP_TXF: break;
1331    case OP_TXG: break;
1332    case OP_TXD: break;
1333    case OP_TXLQ: break;
1334    default:
1335       assert(!"invalid texture op");
1336       break;
1337    }
1338 
1339    if (i->op == OP_TXF) {
1340       if (!i->tex.levelZero)
1341          code[1] |= 0x1000;
1342    } else
1343    if (i->tex.levelZero) {
1344       code[1] |= 0x1000;
1345    }
1346 
1347    if (i->op != OP_TXD && i->tex.derivAll)
1348       code[1] |= 0x200;
1349 
1350    emitPredicate(i);
1351 
1352    code[1] |= i->tex.mask << 2;
1353 
1354    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1355 
1356    defId(i->def(0), 2);
1357    srcId(i->src(0), 10);
1358    srcId(i, src1, 23);
1359 
1360    if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1361 
1362    // texture target:
1363    code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1364    if (i->tex.target.isArray())
1365       code[1] |= 0x40;
1366    if (i->tex.target.isShadow())
1367       code[1] |= 0x400;
1368    if (i->tex.target == TEX_TARGET_2D_MS ||
1369        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1370       code[1] |= 0x800;
1371 
1372    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1373       // ?
1374    }
1375 
1376    if (i->tex.useOffsets == 1) {
1377       switch (i->op) {
1378       case OP_TXF: code[1] |= 0x200; break;
1379       case OP_TXD: code[1] |= 0x00400000; break;
1380       default: code[1] |= 0x800; break;
1381       }
1382    }
1383    if (i->tex.useOffsets == 4)
1384       code[1] |= 0x1000;
1385 }
1386 
1387 void
emitTXQ(const TexInstruction * i)1388 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1389 {
1390    code[0] = 0x00000002;
1391    code[1] = 0x75400001;
1392 
1393    switch (i->tex.query) {
1394    case TXQ_DIMS:            code[0] |= 0x01 << 25; break;
1395    case TXQ_TYPE:            code[0] |= 0x02 << 25; break;
1396    case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1397    case TXQ_FILTER:          code[0] |= 0x10 << 25; break;
1398    case TXQ_LOD:             code[0] |= 0x12 << 25; break;
1399    case TXQ_BORDER_COLOUR:   code[0] |= 0x16 << 25; break;
1400    default:
1401       assert(!"invalid texture query");
1402       break;
1403    }
1404 
1405    code[1] |= i->tex.mask << 2;
1406    code[1] |= i->tex.r << 9;
1407    if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1408       code[1] |= 0x08000000;
1409 
1410    defId(i->def(0), 2);
1411    srcId(i->src(0), 10);
1412 
1413    emitPredicate(i);
1414 }
1415 
1416 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1417 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1418 {
1419    code[0] = 0x00000002 | ((qOp & 1) << 31);
1420    code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
1421 
1422    defId(i->def(0), 2);
1423    srcId(i->src(0), 10);
1424    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
1425 
1426    emitPredicate(i);
1427 }
1428 
1429 void
emitPIXLD(const Instruction * i)1430 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1431 {
1432    emitForm_L(i, 0x7f4, 2, Modifier(0));
1433    code[1] |= i->subOp << 2;
1434    code[1] |= 0x00070000;
1435 }
1436 
1437 void
emitBAR(const Instruction * i)1438 CodeEmitterGK110::emitBAR(const Instruction *i)
1439 {
1440    code[0] = 0x00000002;
1441    code[1] = 0x85400000;
1442 
1443    switch (i->subOp) {
1444    case NV50_IR_SUBOP_BAR_ARRIVE:   code[1] |= 0x08; break;
1445    case NV50_IR_SUBOP_BAR_RED_AND:  code[1] |= 0x50; break;
1446    case NV50_IR_SUBOP_BAR_RED_OR:   code[1] |= 0x90; break;
1447    case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
1448    default:
1449       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1450       break;
1451    }
1452 
1453    emitPredicate(i);
1454 
1455    // barrier id
1456    if (i->src(0).getFile() == FILE_GPR) {
1457       srcId(i->src(0), 10);
1458    } else {
1459       ImmediateValue *imm = i->getSrc(0)->asImm();
1460       assert(imm);
1461       code[0] |= imm->reg.data.u32 << 10;
1462       code[1] |= 0x8000;
1463    }
1464 
1465    // thread count
1466    if (i->src(1).getFile() == FILE_GPR) {
1467       srcId(i->src(1), 23);
1468    } else {
1469       ImmediateValue *imm = i->getSrc(0)->asImm();
1470       assert(imm);
1471       assert(imm->reg.data.u32 <= 0xfff);
1472       code[0] |= imm->reg.data.u32 << 23;
1473       code[1] |= imm->reg.data.u32 >> 9;
1474       code[1] |= 0x4000;
1475    }
1476 
1477    if (i->srcExists(2) && (i->predSrc != 2)) {
1478       srcId(i->src(2), 32 + 10);
1479       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1480          code[1] |= 1 << 13;
1481    } else {
1482       code[1] |= 7 << 10;
1483    }
1484 }
1485 
emitMEMBAR(const Instruction * i)1486 void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
1487 {
1488    code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
1489    code[1] = 0x7cc00000;
1490 
1491    emitPredicate(i);
1492 }
1493 
1494 void
emitFlow(const Instruction * i)1495 CodeEmitterGK110::emitFlow(const Instruction *i)
1496 {
1497    const FlowInstruction *f = i->asFlow();
1498 
1499    unsigned mask; // bit 0: predicate, bit 1: target
1500 
1501    code[0] = 0x00000000;
1502 
1503    switch (i->op) {
1504    case OP_BRA:
1505       code[1] = f->absolute ? 0x10800000 : 0x12000000;
1506       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1507          code[0] |= 0x80;
1508       mask = 3;
1509       break;
1510    case OP_CALL:
1511       code[1] = f->absolute ? 0x11000000 : 0x13000000;
1512       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1513          code[0] |= 0x80;
1514       mask = 2;
1515       break;
1516 
1517    case OP_EXIT:    code[1] = 0x18000000; mask = 1; break;
1518    case OP_RET:     code[1] = 0x19000000; mask = 1; break;
1519    case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1520    case OP_BREAK:   code[1] = 0x1a000000; mask = 1; break;
1521    case OP_CONT:    code[1] = 0x1a800000; mask = 1; break;
1522 
1523    case OP_JOINAT:   code[1] = 0x14800000; mask = 2; break;
1524    case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1525    case OP_PRECONT:  code[1] = 0x15800000; mask = 2; break;
1526    case OP_PRERET:   code[1] = 0x13800000; mask = 2; break;
1527 
1528    case OP_QUADON:  code[1] = 0x1b800000; mask = 0; break;
1529    case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1530    case OP_BRKPT:   code[1] = 0x00000000; mask = 0; break;
1531    default:
1532       assert(!"invalid flow operation");
1533       return;
1534    }
1535 
1536    if (mask & 1) {
1537       emitPredicate(i);
1538       if (i->flagsSrc < 0)
1539          code[0] |= 0x3c;
1540    }
1541 
1542    if (!f)
1543       return;
1544 
1545    if (f->allWarp)
1546       code[0] |= 1 << 9;
1547    if (f->limit)
1548       code[0] |= 1 << 8;
1549 
1550    if (f->op == OP_CALL) {
1551       if (f->builtin) {
1552          assert(f->absolute);
1553          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1554          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1555          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1556       } else {
1557          assert(!f->absolute);
1558          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1559          code[0] |= (pcRel & 0x1ff) << 23;
1560          code[1] |= (pcRel >> 9) & 0x7fff;
1561       }
1562    } else
1563    if (mask & 2) {
1564       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1565       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1566          pcRel += 8;
1567       // currently we don't want absolute branches
1568       assert(!f->absolute);
1569       code[0] |= (pcRel & 0x1ff) << 23;
1570       code[1] |= (pcRel >> 9) & 0x7fff;
1571    }
1572 }
1573 
1574 void
emitSHFL(const Instruction * i)1575 CodeEmitterGK110::emitSHFL(const Instruction *i)
1576 {
1577    const ImmediateValue *imm;
1578 
1579    code[0] = 0x00000002;
1580    code[1] = 0x78800000 | (i->subOp << 1);
1581 
1582    emitPredicate(i);
1583 
1584    defId(i->def(0), 2);
1585    srcId(i->src(0), 10);
1586 
1587    switch (i->src(1).getFile()) {
1588    case FILE_GPR:
1589       srcId(i->src(1), 23);
1590       break;
1591    case FILE_IMMEDIATE:
1592       imm = i->getSrc(1)->asImm();
1593       assert(imm && imm->reg.data.u32 < 0x20);
1594       code[0] |= imm->reg.data.u32 << 23;
1595       code[0] |= 1 << 31;
1596       break;
1597    default:
1598       assert(!"invalid src1 file");
1599       break;
1600    }
1601 
1602    switch (i->src(2).getFile()) {
1603    case FILE_GPR:
1604       srcId(i->src(2), 42);
1605       break;
1606    case FILE_IMMEDIATE:
1607       imm = i->getSrc(2)->asImm();
1608       assert(imm && imm->reg.data.u32 < 0x2000);
1609       code[1] |= imm->reg.data.u32 << 5;
1610       code[1] |= 1;
1611       break;
1612    default:
1613       assert(!"invalid src2 file");
1614       break;
1615    }
1616 
1617    if (!i->defExists(1))
1618       code[1] |= 7 << 19;
1619    else {
1620       assert(i->def(1).getFile() == FILE_PREDICATE);
1621       defId(i->def(1), 51);
1622    }
1623 }
1624 
1625 void
emitVOTE(const Instruction * i)1626 CodeEmitterGK110::emitVOTE(const Instruction *i)
1627 {
1628    const ImmediateValue *imm;
1629    uint32_t u32;
1630 
1631    code[0] = 0x00000002;
1632    code[1] = 0x86c00000 | (i->subOp << 19);
1633 
1634    emitPredicate(i);
1635 
1636    unsigned rp = 0;
1637    for (int d = 0; i->defExists(d); d++) {
1638       if (i->def(d).getFile() == FILE_PREDICATE) {
1639          assert(!(rp & 2));
1640          rp |= 2;
1641          defId(i->def(d), 48);
1642       } else if (i->def(d).getFile() == FILE_GPR) {
1643          assert(!(rp & 1));
1644          rp |= 1;
1645          defId(i->def(d), 2);
1646       } else {
1647          assert(!"Unhandled def");
1648       }
1649    }
1650    if (!(rp & 1))
1651       code[0] |= 255 << 2;
1652    if (!(rp & 2))
1653       code[1] |= 7 << 16;
1654 
1655    switch (i->src(0).getFile()) {
1656    case FILE_PREDICATE:
1657       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
1658          code[0] |= 1 << 13;
1659       srcId(i->src(0), 42);
1660       break;
1661    case FILE_IMMEDIATE:
1662       imm = i->getSrc(0)->asImm();
1663       assert(imm);
1664       u32 = imm->reg.data.u32;
1665       assert(u32 == 0 || u32 == 1);
1666       code[1] |= (u32 == 1 ? 0x7 : 0xf) << 10;
1667       break;
1668    default:
1669       assert(!"Unhandled src");
1670       break;
1671    }
1672 }
1673 
1674 void
emitSUGType(DataType ty,const int pos)1675 CodeEmitterGK110::emitSUGType(DataType ty, const int pos)
1676 {
1677    uint8_t n = 0;
1678 
1679    switch (ty) {
1680    case TYPE_S32: n = 1; break;
1681    case TYPE_U8:  n = 2; break;
1682    case TYPE_S8:  n = 3; break;
1683    default:
1684       assert(ty == TYPE_U32);
1685       break;
1686    }
1687    code[pos / 32] |= n << (pos % 32);
1688 }
1689 
1690 void
emitSUCachingMode(CacheMode c)1691 CodeEmitterGK110::emitSUCachingMode(CacheMode c)
1692 {
1693    uint8_t n = 0;
1694 
1695    switch (c) {
1696    case CACHE_CA:
1697 // case CACHE_WB:
1698       n = 0;
1699       break;
1700    case CACHE_CG:
1701       n = 1;
1702       break;
1703    case CACHE_CS:
1704       n = 2;
1705       break;
1706    case CACHE_CV:
1707 // case CACHE_WT:
1708       n = 3;
1709       break;
1710    default:
1711       assert(!"invalid caching mode");
1712       break;
1713    }
1714    code[0] |= (n & 1) << 31;
1715    code[1] |= (n & 2) >> 1;
1716 }
1717 
1718 void
setSUConst16(const Instruction * i,const int s)1719 CodeEmitterGK110::setSUConst16(const Instruction *i, const int s)
1720 {
1721    const uint32_t offset = i->getSrc(s)->reg.data.offset;
1722 
1723    assert(offset == (offset & 0xfffc));
1724 
1725    code[0] |= offset << 21;
1726    code[1] |= offset >> 11;
1727    code[1] |= i->getSrc(s)->reg.fileIndex << 5;
1728 }
1729 
1730 void
emitSULDGB(const TexInstruction * i)1731 CodeEmitterGK110::emitSULDGB(const TexInstruction *i)
1732 {
1733    code[0] = 0x00000002;
1734    code[1] = 0x30000000 | (i->subOp << 14);
1735 
1736    if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1737       emitLoadStoreType(i->dType, 0x38);
1738       emitCachingMode(i->cache, 0x36);
1739 
1740       // format
1741       setSUConst16(i, 1);
1742    } else {
1743       assert(i->src(1).getFile() == FILE_GPR);
1744       code[1] |= 0x49800000;
1745 
1746       emitLoadStoreType(i->dType, 0x21);
1747       emitSUCachingMode(i->cache);
1748 
1749       srcId(i->src(1), 23);
1750    }
1751 
1752    emitSUGType(i->sType, 0x34);
1753 
1754    emitPredicate(i);
1755    defId(i->def(0), 2); // destination
1756    srcId(i->src(0), 10); // address
1757 
1758    // surface predicate
1759    if (!i->srcExists(2) || (i->predSrc == 2)) {
1760       code[1] |= 0x7 << 10;
1761    } else {
1762       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1763          code[1] |= 1 << 13;
1764       srcId(i->src(2), 32 + 10);
1765    }
1766 }
1767 
1768 void
emitSUSTGx(const TexInstruction * i)1769 CodeEmitterGK110::emitSUSTGx(const TexInstruction *i)
1770 {
1771    assert(i->op == OP_SUSTP);
1772 
1773    code[0] = 0x00000002;
1774    code[1] = 0x38000000;
1775 
1776    if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1777       code[0] |= i->subOp << 2;
1778 
1779       if (i->op == OP_SUSTP)
1780          code[0] |= i->tex.mask << 4;
1781 
1782       emitSUGType(i->sType, 0x8);
1783       emitCachingMode(i->cache, 0x36);
1784 
1785       // format
1786       setSUConst16(i, 1);
1787    } else {
1788       assert(i->src(1).getFile() == FILE_GPR);
1789 
1790       code[0] |= i->subOp << 23;
1791       code[1] |= 0x41c00000;
1792 
1793       if (i->op == OP_SUSTP)
1794          code[0] |= i->tex.mask << 25;
1795 
1796       emitSUGType(i->sType, 0x1d);
1797       emitSUCachingMode(i->cache);
1798 
1799       srcId(i->src(1), 2);
1800    }
1801 
1802    emitPredicate(i);
1803    srcId(i->src(0), 10); // address
1804    srcId(i->src(3), 42); // values
1805 
1806    // surface predicate
1807    if (!i->srcExists(2) || (i->predSrc == 2)) {
1808       code[1] |= 0x7 << 18;
1809    } else {
1810       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1811          code[1] |= 1 << 21;
1812       srcId(i->src(2), 32 + 18);
1813    }
1814 }
1815 
1816 void
emitSUCLAMPMode(uint16_t subOp)1817 CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp)
1818 {
1819    uint8_t m;
1820    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1821    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1822    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1823    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1824    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1825    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1826    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1827    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1828    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1829    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1830    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1831    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1832    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1833    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1834    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1835    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1836    default:
1837       return;
1838    }
1839    code[1] |= m << 20;
1840    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1841       code[1] |= 1 << 24;
1842 }
1843 
1844 void
emitSUCalc(Instruction * i)1845 CodeEmitterGK110::emitSUCalc(Instruction *i)
1846 {
1847    ImmediateValue *imm = NULL;
1848    uint64_t opc1, opc2;
1849 
1850    if (i->srcExists(2)) {
1851       imm = i->getSrc(2)->asImm();
1852       if (imm)
1853          i->setSrc(2, NULL); // special case, make emitForm_21 not assert
1854    }
1855 
1856    switch (i->op) {
1857    case OP_SUCLAMP:  opc1 = 0xb00; opc2 = 0x580; break;
1858    case OP_SUBFM:    opc1 = 0xb68; opc2 = 0x1e8; break;
1859    case OP_SUEAU:    opc1 = 0xb6c; opc2 = 0x1ec; break;
1860    default:
1861       assert(0);
1862       return;
1863    }
1864    emitForm_21(i, opc2, opc1);
1865 
1866    if (i->op == OP_SUCLAMP) {
1867       if (i->dType == TYPE_S32)
1868          code[1] |= 1 << 19;
1869       emitSUCLAMPMode(i->subOp);
1870    }
1871 
1872    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1873       code[1] |= 1 << 18;
1874 
1875    if (i->op != OP_SUEAU) {
1876       const uint8_t pos = i->op == OP_SUBFM ? 19 : 16;
1877       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1878          code[0] |= 255 << 2;
1879          code[1] |= i->getDef(1)->reg.data.id << pos;
1880       } else
1881       if (i->defExists(1)) { // r, p
1882          assert(i->def(1).getFile() == FILE_PREDICATE);
1883          code[1] |= i->getDef(1)->reg.data.id << pos;
1884       } else { // r, #
1885          code[1] |= 7 << pos;
1886       }
1887    }
1888 
1889    if (imm) {
1890       assert(i->op == OP_SUCLAMP);
1891       i->setSrc(2, imm);
1892       code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint6
1893    }
1894 }
1895 
1896 
1897 void
emitVectorSubOp(const Instruction * i)1898 CodeEmitterGK110::emitVectorSubOp(const Instruction *i)
1899 {
1900    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1901    case 0:
1902       code[1] |= (i->subOp & 0x000f) << 7;  // vsrc1
1903       code[1] |= (i->subOp & 0x00e0) >> 6;  // vsrc2
1904       code[1] |= (i->subOp & 0x0100) << 13; // vsrc2
1905       code[1] |= (i->subOp & 0x3c00) << 12; // vdst
1906       break;
1907    default:
1908       assert(0);
1909       break;
1910    }
1911 }
1912 
1913 void
emitVSHL(const Instruction * i)1914 CodeEmitterGK110::emitVSHL(const Instruction *i)
1915 {
1916    code[0] = 0x00000002;
1917    code[1] = 0xb8000000;
1918 
1919    assert(NV50_IR_SUBOP_Vn(i->subOp) == 0);
1920 
1921    if (isSignedType(i->dType)) code[1] |= 1 << 25;
1922    if (isSignedType(i->sType)) code[1] |= 1 << 19;
1923 
1924    emitVectorSubOp(i);
1925 
1926    emitPredicate(i);
1927    defId(i->def(0), 2);
1928    srcId(i->src(0), 10);
1929 
1930    if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) {
1931       ImmediateValue *imm = i->getSrc(1)->asImm();
1932       assert(imm);
1933       code[0] |= (imm->reg.data.u32 & 0x01ff) << 23;
1934       code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9;
1935    } else {
1936       assert(i->getSrc(1)->reg.file == FILE_GPR);
1937       code[1] |= 1 << 21;
1938       srcId(i->src(1), 23);
1939    }
1940    srcId(i->src(2), 42);
1941 
1942    if (i->saturate)
1943       code[0] |= 1 << 22;
1944    if (i->flagsDef >= 0)
1945       code[1] |= 1 << 18;
1946 }
1947 
1948 void
emitAFETCH(const Instruction * i)1949 CodeEmitterGK110::emitAFETCH(const Instruction *i)
1950 {
1951    uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
1952 
1953    code[0] = 0x00000002 | (offset << 23);
1954    code[1] = 0x7d000000 | (offset >> 9);
1955 
1956    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1957       code[1] |= 0x8;
1958 
1959    emitPredicate(i);
1960 
1961    defId(i->def(0), 2);
1962    srcId(i->src(0).getIndirect(0), 10);
1963 }
1964 
1965 void
emitPFETCH(const Instruction * i)1966 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1967 {
1968    uint32_t prim = i->src(0).get()->reg.data.u32;
1969 
1970    code[0] = 0x00000002 | ((prim & 0xff) << 23);
1971    code[1] = 0x7f800000;
1972 
1973    emitPredicate(i);
1974 
1975    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1976 
1977    defId(i->def(0), 2);
1978    srcId(i, src1, 10);
1979 }
1980 
1981 void
emitVFETCH(const Instruction * i)1982 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1983 {
1984    unsigned int size = typeSizeof(i->dType);
1985    uint32_t offset = i->src(0).get()->reg.data.offset;
1986 
1987    code[0] = 0x00000002 | (offset << 23);
1988    code[1] = 0x7ec00000 | (offset >> 9);
1989    code[1] |= (size / 4 - 1) << 18;
1990 
1991    if (i->perPatch)
1992       code[1] |= 0x4;
1993    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1994       code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
1995 
1996    emitPredicate(i);
1997 
1998    defId(i->def(0), 2);
1999    srcId(i->src(0).getIndirect(0), 10);
2000    srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
2001 }
2002 
2003 void
emitEXPORT(const Instruction * i)2004 CodeEmitterGK110::emitEXPORT(const Instruction *i)
2005 {
2006    unsigned int size = typeSizeof(i->dType);
2007    uint32_t offset = i->src(0).get()->reg.data.offset;
2008 
2009    code[0] = 0x00000002 | (offset << 23);
2010    code[1] = 0x7f000000 | (offset >> 9);
2011    code[1] |= (size / 4 - 1) << 18;
2012 
2013    if (i->perPatch)
2014       code[1] |= 0x4;
2015 
2016    emitPredicate(i);
2017 
2018    assert(i->src(1).getFile() == FILE_GPR);
2019 
2020    srcId(i->src(0).getIndirect(0), 10);
2021    srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
2022    srcId(i->src(1), 2);
2023 }
2024 
2025 void
emitOUT(const Instruction * i)2026 CodeEmitterGK110::emitOUT(const Instruction *i)
2027 {
2028    assert(i->src(0).getFile() == FILE_GPR);
2029 
2030    emitForm_21(i, 0x1f0, 0xb70);
2031 
2032    if (i->op == OP_EMIT)
2033       code[1] |= 1 << 10;
2034    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
2035       code[1] |= 1 << 11;
2036 }
2037 
2038 void
emitInterpMode(const Instruction * i)2039 CodeEmitterGK110::emitInterpMode(const Instruction *i)
2040 {
2041    code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
2042    code[1] |= (i->ipa & 0xc) << (19 - 2);
2043 }
2044 
2045 void
gk110_interpApply(const struct FixupEntry * entry,uint32_t * code,const FixupData & data)2046 gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data)
2047 {
2048    int ipa = entry->ipa;
2049    int reg = entry->reg;
2050    int loc = entry->loc;
2051 
2052    if (data.flatshade &&
2053        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2054       ipa = NV50_IR_INTERP_FLAT;
2055       reg = 0xff;
2056    } else if (data.force_persample_interp &&
2057               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2058               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2059       ipa |= NV50_IR_INTERP_CENTROID;
2060    }
2061    code[loc + 1] &= ~(0xf << 19);
2062    code[loc + 1] |= (ipa & 0x3) << 21;
2063    code[loc + 1] |= (ipa & 0xc) << (19 - 2);
2064    code[loc + 0] &= ~(0xff << 23);
2065    code[loc + 0] |= reg << 23;
2066 }
2067 
2068 void
emitINTERP(const Instruction * i)2069 CodeEmitterGK110::emitINTERP(const Instruction *i)
2070 {
2071    const uint32_t base = i->getSrc(0)->reg.data.offset;
2072 
2073    code[0] = 0x00000002 | (base << 31);
2074    code[1] = 0x74800000 | (base >> 1);
2075 
2076    if (i->saturate)
2077       code[1] |= 1 << 18;
2078 
2079    if (i->op == OP_PINTERP) {
2080       srcId(i->src(1), 23);
2081       addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
2082    } else {
2083       code[0] |= 0xff << 23;
2084       addInterp(i->ipa, 0xff, gk110_interpApply);
2085    }
2086 
2087    srcId(i->src(0).getIndirect(0), 10);
2088    emitInterpMode(i);
2089 
2090    emitPredicate(i);
2091    defId(i->def(0), 2);
2092 
2093    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
2094       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
2095    else
2096       code[1] |= 0xff << 10;
2097 }
2098 
2099 void
emitLoadStoreType(DataType ty,const int pos)2100 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
2101 {
2102    uint8_t n;
2103 
2104    switch (ty) {
2105    case TYPE_U8:
2106       n = 0;
2107       break;
2108    case TYPE_S8:
2109       n = 1;
2110       break;
2111    case TYPE_U16:
2112       n = 2;
2113       break;
2114    case TYPE_S16:
2115       n = 3;
2116       break;
2117    case TYPE_F32:
2118    case TYPE_U32:
2119    case TYPE_S32:
2120       n = 4;
2121       break;
2122    case TYPE_F64:
2123    case TYPE_U64:
2124    case TYPE_S64:
2125       n = 5;
2126       break;
2127    case TYPE_B128:
2128       n = 6;
2129       break;
2130    default:
2131       n = 0;
2132       assert(!"invalid ld/st type");
2133       break;
2134    }
2135    code[pos / 32] |= n << (pos % 32);
2136 }
2137 
2138 void
emitCachingMode(CacheMode c,const int pos)2139 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
2140 {
2141    uint8_t n;
2142 
2143    switch (c) {
2144    case CACHE_CA:
2145 // case CACHE_WB:
2146       n = 0;
2147       break;
2148    case CACHE_CG:
2149       n = 1;
2150       break;
2151    case CACHE_CS:
2152       n = 2;
2153       break;
2154    case CACHE_CV:
2155 // case CACHE_WT:
2156       n = 3;
2157       break;
2158    default:
2159       n = 0;
2160       assert(!"invalid caching mode");
2161       break;
2162    }
2163    code[pos / 32] |= n << (pos % 32);
2164 }
2165 
2166 void
emitSTORE(const Instruction * i)2167 CodeEmitterGK110::emitSTORE(const Instruction *i)
2168 {
2169    int32_t offset = SDATA(i->src(0)).offset;
2170 
2171    switch (i->src(0).getFile()) {
2172    case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
2173    case FILE_MEMORY_LOCAL:  code[1] = 0x7a800000; code[0] = 0x00000002; break;
2174    case FILE_MEMORY_SHARED:
2175       code[0] = 0x00000002;
2176       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
2177          code[1] = 0x78400000;
2178       else
2179          code[1] = 0x7ac00000;
2180       break;
2181    default:
2182       assert(!"invalid memory file");
2183       break;
2184    }
2185 
2186    if (code[0] & 0x2) {
2187       offset &= 0xffffff;
2188       emitLoadStoreType(i->dType, 0x33);
2189       if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2190          emitCachingMode(i->cache, 0x2f);
2191    } else {
2192       emitLoadStoreType(i->dType, 0x38);
2193       emitCachingMode(i->cache, 0x3b);
2194    }
2195    code[0] |= offset << 23;
2196    code[1] |= offset >> 9;
2197 
2198    // Unlocked store on shared memory can fail.
2199    if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2200        i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
2201       assert(i->defExists(0));
2202       defId(i->def(0), 32 + 16);
2203    }
2204 
2205    emitPredicate(i);
2206 
2207    srcId(i->src(1), 2);
2208    srcId(i->src(0).getIndirect(0), 10);
2209    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2210        i->src(0).isIndirect(0) &&
2211        i->getIndirect(0, 0)->reg.size == 8)
2212       code[1] |= 1 << 23;
2213 }
2214 
2215 void
emitLOAD(const Instruction * i)2216 CodeEmitterGK110::emitLOAD(const Instruction *i)
2217 {
2218    int32_t offset = SDATA(i->src(0)).offset;
2219 
2220    switch (i->src(0).getFile()) {
2221    case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
2222    case FILE_MEMORY_LOCAL:  code[1] = 0x7a000000; code[0] = 0x00000002; break;
2223    case FILE_MEMORY_SHARED:
2224       code[0] = 0x00000002;
2225       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
2226          code[1] = 0x77400000;
2227       else
2228          code[1] = 0x7a400000;
2229       break;
2230    case FILE_MEMORY_CONST:
2231       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
2232          emitMOV(i);
2233          return;
2234       }
2235       offset &= 0xffff;
2236       code[0] = 0x00000002;
2237       code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
2238       code[1] |= i->subOp << 15;
2239       break;
2240    default:
2241       assert(!"invalid memory file");
2242       break;
2243    }
2244 
2245    if (code[0] & 0x2) {
2246       offset &= 0xffffff;
2247       emitLoadStoreType(i->dType, 0x33);
2248       if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2249          emitCachingMode(i->cache, 0x2f);
2250    } else {
2251       emitLoadStoreType(i->dType, 0x38);
2252       emitCachingMode(i->cache, 0x3b);
2253    }
2254    code[0] |= offset << 23;
2255    code[1] |= offset >> 9;
2256 
2257    // Locked store on shared memory can fail.
2258    int r = 0, p = -1;
2259    if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2260        i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
2261       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2262          r = -1;
2263          p = 0;
2264       } else if (i->defExists(1)) { // r, p
2265          p = 1;
2266       } else {
2267          assert(!"Expected predicate dest for load locked");
2268       }
2269    }
2270 
2271    emitPredicate(i);
2272 
2273    if (r >= 0)
2274       defId(i->def(r), 2);
2275    else
2276       code[0] |= 255 << 2;
2277 
2278    if (p >= 0)
2279       defId(i->def(p), 32 + 16);
2280 
2281    if (i->getIndirect(0, 0)) {
2282       srcId(i->src(0).getIndirect(0), 10);
2283       if (i->getIndirect(0, 0)->reg.size == 8)
2284          code[1] |= 1 << 23;
2285    } else {
2286       code[0] |= 255 << 10;
2287    }
2288 }
2289 
2290 uint8_t
getSRegEncoding(const ValueRef & ref)2291 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
2292 {
2293    switch (SDATA(ref).sv.sv) {
2294    case SV_LANEID:        return 0x00;
2295    case SV_PHYSID:        return 0x03;
2296    case SV_VERTEX_COUNT:  return 0x10;
2297    case SV_INVOCATION_ID: return 0x11;
2298    case SV_YDIR:          return 0x12;
2299    case SV_THREAD_KILL:   return 0x13;
2300    case SV_COMBINED_TID:  return 0x20;
2301    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
2302    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
2303    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
2304    case SV_GRIDID:        return 0x2c;
2305    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
2306    case SV_LBASE:         return 0x34;
2307    case SV_SBASE:         return 0x30;
2308    case SV_LANEMASK_EQ:   return 0x38;
2309    case SV_LANEMASK_LT:   return 0x39;
2310    case SV_LANEMASK_LE:   return 0x3a;
2311    case SV_LANEMASK_GT:   return 0x3b;
2312    case SV_LANEMASK_GE:   return 0x3c;
2313    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
2314    default:
2315       assert(!"no sreg for system value");
2316       return 0;
2317    }
2318 }
2319 
2320 void
emitMOV(const Instruction * i)2321 CodeEmitterGK110::emitMOV(const Instruction *i)
2322 {
2323    if (i->def(0).getFile() == FILE_PREDICATE) {
2324       if (i->src(0).getFile() == FILE_GPR) {
2325          // Use ISETP.NE.AND dst, PT, src, RZ, PT
2326          code[0] = 0x00000002;
2327          code[1] = 0xdb500000;
2328 
2329          code[0] |= 0x7 << 2;
2330          code[0] |= 0xff << 23;
2331          code[1] |= 0x7 << 10;
2332          srcId(i->src(0), 10);
2333       } else
2334       if (i->src(0).getFile() == FILE_PREDICATE) {
2335          // Use PSETP.AND.AND dst, PT, src, PT, PT
2336          code[0] = 0x00000002;
2337          code[1] = 0x84800000;
2338 
2339          code[0] |= 0x7 << 2;
2340          code[1] |= 0x7 << 0;
2341          code[1] |= 0x7 << 10;
2342 
2343          srcId(i->src(0), 14);
2344       } else {
2345          assert(!"Unexpected source for predicate destination");
2346          emitNOP(i);
2347       }
2348       emitPredicate(i);
2349       defId(i->def(0), 5);
2350    } else
2351    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2352       code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
2353       code[1] = 0x86400000;
2354       emitPredicate(i);
2355       defId(i->def(0), 2);
2356    } else
2357    if (i->src(0).getFile() == FILE_IMMEDIATE) {
2358       code[0] = 0x00000002 | (i->lanes << 14);
2359       code[1] = 0x74000000;
2360       emitPredicate(i);
2361       defId(i->def(0), 2);
2362       setImmediate32(i, 0, Modifier(0));
2363    } else
2364    if (i->src(0).getFile() == FILE_PREDICATE) {
2365       code[0] = 0x00000002;
2366       code[1] = 0x84401c07;
2367       emitPredicate(i);
2368       defId(i->def(0), 2);
2369       srcId(i->src(0), 14);
2370    } else {
2371       emitForm_C(i, 0x24c, 2);
2372       code[1] |= i->lanes << 10;
2373    }
2374 }
2375 
2376 static inline bool
uses64bitAddress(const Instruction * ldst)2377 uses64bitAddress(const Instruction *ldst)
2378 {
2379    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2380       ldst->src(0).isIndirect(0) &&
2381       ldst->getIndirect(0, 0)->reg.size == 8;
2382 }
2383 
2384 void
emitATOM(const Instruction * i)2385 CodeEmitterGK110::emitATOM(const Instruction *i)
2386 {
2387    const bool hasDst = i->defExists(0);
2388    const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
2389 
2390    code[0] = 0x00000002;
2391    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
2392       code[1] = 0x77800000;
2393    else
2394       code[1] = 0x68000000;
2395 
2396    switch (i->subOp) {
2397    case NV50_IR_SUBOP_ATOM_CAS: break;
2398    case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
2399    default: code[1] |= i->subOp << 23; break;
2400    }
2401 
2402    switch (i->dType) {
2403    case TYPE_U32: break;
2404    case TYPE_S32: code[1] |= 0x00100000; break;
2405    case TYPE_U64: code[1] |= 0x00200000; break;
2406    case TYPE_F32: code[1] |= 0x00300000; break;
2407    case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
2408    case TYPE_S64: code[1] |= 0x00500000; break;
2409    default: assert(!"unsupported type"); break;
2410    }
2411 
2412    emitPredicate(i);
2413 
2414    /* TODO: cas: check that src regs line up */
2415    /* TODO: cas: flip bits if $r255 is used */
2416    srcId(i->src(1), 23);
2417 
2418    if (hasDst) {
2419       defId(i->def(0), 2);
2420    } else
2421    if (!exch) {
2422       code[0] |= 255 << 2;
2423    }
2424 
2425    if (hasDst || !exch) {
2426       const int32_t offset = SDATA(i->src(0)).offset;
2427       assert(offset < 0x80000 && offset >= -0x80000);
2428       code[0] |= (offset & 1) << 31;
2429       code[1] |= (offset & 0xffffe) >> 1;
2430    } else {
2431       srcAddr32(i->src(0), 31);
2432    }
2433 
2434    if (i->getIndirect(0, 0)) {
2435       srcId(i->getIndirect(0, 0), 10);
2436       if (i->getIndirect(0, 0)->reg.size == 8)
2437          code[1] |= 1 << 19;
2438    } else {
2439       code[0] |= 255 << 10;
2440    }
2441 }
2442 
2443 void
emitCCTL(const Instruction * i)2444 CodeEmitterGK110::emitCCTL(const Instruction *i)
2445 {
2446    int32_t offset = SDATA(i->src(0)).offset;
2447 
2448    code[0] = 0x00000002 | (i->subOp << 2);
2449 
2450    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2451       code[1] = 0x7b000000;
2452    } else {
2453       code[1] = 0x7c000000;
2454       offset &= 0xffffff;
2455    }
2456    code[0] |= offset << 23;
2457    code[1] |= offset >> 9;
2458 
2459    if (uses64bitAddress(i))
2460       code[1] |= 1 << 23;
2461    srcId(i->src(0).getIndirect(0), 10);
2462 
2463    emitPredicate(i);
2464 }
2465 
2466 bool
emitInstruction(Instruction * insn)2467 CodeEmitterGK110::emitInstruction(Instruction *insn)
2468 {
2469    const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
2470 
2471    if (insn->encSize != 8) {
2472       ERROR("skipping unencodable instruction: ");
2473       insn->print();
2474       return false;
2475    } else
2476    if (codeSize + size > codeSizeLimit) {
2477       ERROR("code emitter output buffer too small\n");
2478       return false;
2479    }
2480 
2481    if (writeIssueDelays) {
2482       int id = (codeSize & 0x3f) / 8 - 1;
2483       if (id < 0) {
2484          id += 1;
2485          code[0] = 0x00000000; // cf issue delay "instruction"
2486          code[1] = 0x08000000;
2487          code += 2;
2488          codeSize += 8;
2489       }
2490       uint32_t *data = code - (id * 2 + 2);
2491 
2492       switch (id) {
2493       case 0: data[0] |= insn->sched << 2; break;
2494       case 1: data[0] |= insn->sched << 10; break;
2495       case 2: data[0] |= insn->sched << 18; break;
2496       case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
2497       case 4: data[1] |= insn->sched << 2; break;
2498       case 5: data[1] |= insn->sched << 10; break;
2499       case 6: data[1] |= insn->sched << 18; break;
2500       default:
2501          assert(0);
2502          break;
2503       }
2504    }
2505 
2506    // assert that instructions with multiple defs don't corrupt registers
2507    for (int d = 0; insn->defExists(d); ++d)
2508       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2509 
2510    switch (insn->op) {
2511    case OP_MOV:
2512    case OP_RDSV:
2513       emitMOV(insn);
2514       break;
2515    case OP_NOP:
2516       break;
2517    case OP_LOAD:
2518       emitLOAD(insn);
2519       break;
2520    case OP_STORE:
2521       emitSTORE(insn);
2522       break;
2523    case OP_LINTERP:
2524    case OP_PINTERP:
2525       emitINTERP(insn);
2526       break;
2527    case OP_VFETCH:
2528       emitVFETCH(insn);
2529       break;
2530    case OP_EXPORT:
2531       emitEXPORT(insn);
2532       break;
2533    case OP_AFETCH:
2534       emitAFETCH(insn);
2535       break;
2536    case OP_PFETCH:
2537       emitPFETCH(insn);
2538       break;
2539    case OP_EMIT:
2540    case OP_RESTART:
2541       emitOUT(insn);
2542       break;
2543    case OP_ADD:
2544    case OP_SUB:
2545       if (insn->dType == TYPE_F64)
2546          emitDADD(insn);
2547       else if (isFloatType(insn->dType))
2548          emitFADD(insn);
2549       else
2550          emitUADD(insn);
2551       break;
2552    case OP_MUL:
2553       if (insn->dType == TYPE_F64)
2554          emitDMUL(insn);
2555       else if (isFloatType(insn->dType))
2556          emitFMUL(insn);
2557       else
2558          emitIMUL(insn);
2559       break;
2560    case OP_MAD:
2561    case OP_FMA:
2562       if (insn->dType == TYPE_F64)
2563          emitDMAD(insn);
2564       else if (isFloatType(insn->dType))
2565          emitFMAD(insn);
2566       else
2567          emitIMAD(insn);
2568       break;
2569    case OP_MADSP:
2570       emitMADSP(insn);
2571       break;
2572    case OP_SAD:
2573       emitISAD(insn);
2574       break;
2575    case OP_SHLADD:
2576       emitSHLADD(insn);
2577       break;
2578    case OP_NOT:
2579       emitNOT(insn);
2580       break;
2581    case OP_AND:
2582       emitLogicOp(insn, 0);
2583       break;
2584    case OP_OR:
2585       emitLogicOp(insn, 1);
2586       break;
2587    case OP_XOR:
2588       emitLogicOp(insn, 2);
2589       break;
2590    case OP_SHL:
2591    case OP_SHR:
2592       if (typeSizeof(insn->sType) == 8)
2593          emitShift64(insn);
2594       else
2595          emitShift(insn);
2596       break;
2597    case OP_SET:
2598    case OP_SET_AND:
2599    case OP_SET_OR:
2600    case OP_SET_XOR:
2601       emitSET(insn->asCmp());
2602       break;
2603    case OP_SELP:
2604       emitSELP(insn);
2605       break;
2606    case OP_SLCT:
2607       emitSLCT(insn->asCmp());
2608       break;
2609    case OP_MIN:
2610    case OP_MAX:
2611       emitMINMAX(insn);
2612       break;
2613    case OP_ABS:
2614    case OP_NEG:
2615    case OP_CEIL:
2616    case OP_FLOOR:
2617    case OP_TRUNC:
2618    case OP_SAT:
2619       emitCVT(insn);
2620       break;
2621    case OP_CVT:
2622       if (insn->def(0).getFile() == FILE_PREDICATE ||
2623           insn->src(0).getFile() == FILE_PREDICATE)
2624          emitMOV(insn);
2625       else
2626          emitCVT(insn);
2627       break;
2628    case OP_RSQ:
2629       emitSFnOp(insn, 5 + 2 * insn->subOp);
2630       break;
2631    case OP_RCP:
2632       emitSFnOp(insn, 4 + 2 * insn->subOp);
2633       break;
2634    case OP_LG2:
2635       emitSFnOp(insn, 3);
2636       break;
2637    case OP_EX2:
2638       emitSFnOp(insn, 2);
2639       break;
2640    case OP_SIN:
2641       emitSFnOp(insn, 1);
2642       break;
2643    case OP_COS:
2644       emitSFnOp(insn, 0);
2645       break;
2646    case OP_PRESIN:
2647    case OP_PREEX2:
2648       emitPreOp(insn);
2649       break;
2650    case OP_TEX:
2651    case OP_TXB:
2652    case OP_TXL:
2653    case OP_TXD:
2654    case OP_TXF:
2655    case OP_TXG:
2656    case OP_TXLQ:
2657       emitTEX(insn->asTex());
2658       break;
2659    case OP_TXQ:
2660       emitTXQ(insn->asTex());
2661       break;
2662    case OP_TEXBAR:
2663       emitTEXBAR(insn);
2664       break;
2665    case OP_PIXLD:
2666       emitPIXLD(insn);
2667       break;
2668    case OP_BRA:
2669    case OP_CALL:
2670    case OP_PRERET:
2671    case OP_RET:
2672    case OP_DISCARD:
2673    case OP_EXIT:
2674    case OP_PRECONT:
2675    case OP_CONT:
2676    case OP_PREBREAK:
2677    case OP_BREAK:
2678    case OP_JOINAT:
2679    case OP_BRKPT:
2680    case OP_QUADON:
2681    case OP_QUADPOP:
2682       emitFlow(insn);
2683       break;
2684    case OP_QUADOP:
2685       emitQUADOP(insn, insn->subOp, insn->lanes);
2686       break;
2687    case OP_DFDX:
2688       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2689       break;
2690    case OP_DFDY:
2691       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2692       break;
2693    case OP_POPCNT:
2694       emitPOPC(insn);
2695       break;
2696    case OP_INSBF:
2697       emitINSBF(insn);
2698       break;
2699    case OP_EXTBF:
2700       emitEXTBF(insn);
2701       break;
2702    case OP_BFIND:
2703       emitBFIND(insn);
2704       break;
2705    case OP_PERMT:
2706       emitPERMT(insn);
2707       break;
2708    case OP_JOIN:
2709       emitNOP(insn);
2710       insn->join = 1;
2711       break;
2712    case OP_BAR:
2713       emitBAR(insn);
2714       break;
2715    case OP_MEMBAR:
2716       emitMEMBAR(insn);
2717       break;
2718    case OP_ATOM:
2719       emitATOM(insn);
2720       break;
2721    case OP_CCTL:
2722       emitCCTL(insn);
2723       break;
2724    case OP_SHFL:
2725       emitSHFL(insn);
2726       break;
2727    case OP_VOTE:
2728       emitVOTE(insn);
2729       break;
2730    case OP_SULDB:
2731       emitSULDGB(insn->asTex());
2732       break;
2733    case OP_SUSTB:
2734    case OP_SUSTP:
2735       emitSUSTGx(insn->asTex());
2736       break;
2737    case OP_SUBFM:
2738    case OP_SUCLAMP:
2739    case OP_SUEAU:
2740       emitSUCalc(insn);
2741       break;
2742    case OP_VSHL:
2743       emitVSHL(insn);
2744       break;
2745    case OP_PHI:
2746    case OP_UNION:
2747    case OP_CONSTRAINT:
2748       ERROR("operation should have been eliminated");
2749       return false;
2750    case OP_EXP:
2751    case OP_LOG:
2752    case OP_SQRT:
2753    case OP_POW:
2754       ERROR("operation should have been lowered\n");
2755       return false;
2756    default:
2757       ERROR("unknown op: %u\n", insn->op);
2758       return false;
2759    }
2760 
2761    if (insn->join)
2762       code[0] |= 1 << 22;
2763 
2764    code += 2;
2765    codeSize += 8;
2766    return true;
2767 }
2768 
2769 uint32_t
getMinEncodingSize(const Instruction * i) const2770 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
2771 {
2772    // No more short instruction encodings.
2773    return 8;
2774 }
2775 
2776 void
prepareEmission(Function * func)2777 CodeEmitterGK110::prepareEmission(Function *func)
2778 {
2779    const Target *targ = func->getProgram()->getTarget();
2780 
2781    CodeEmitter::prepareEmission(func);
2782 
2783    if (targ->hasSWSched)
2784       calculateSchedDataNVC0(targ, func);
2785 }
2786 
CodeEmitterGK110(const TargetNVC0 * target)2787 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
2788    : CodeEmitter(target),
2789      targNVC0(target),
2790      writeIssueDelays(target->hasSWSched)
2791 {
2792    code = NULL;
2793    codeSize = codeSizeLimit = 0;
2794    relocInfo = NULL;
2795 }
2796 
2797 CodeEmitter *
createCodeEmitterGK110(Program::Type type)2798 TargetNVC0::createCodeEmitterGK110(Program::Type type)
2799 {
2800    CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
2801    emit->setProgramType(type);
2802    return emit;
2803 }
2804 
2805 } // namespace nv50_ir
2806