• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir_target_nvc0.h"
24 
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26 
27 namespace nv50_ir {
28 
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32    CodeEmitterGK110(const TargetNVC0 *);
33 
34    virtual bool emitInstruction(Instruction *);
35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
36    virtual void prepareEmission(Function *);
37 
setProgramType(Program::Type pType)38    inline void setProgramType(Program::Type pType) { progType = pType; }
39 
40 private:
41    const TargetNVC0 *targNVC0;
42 
43    Program::Type progType;
44 
45    const bool writeIssueDelays;
46 
47 private:
48    void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
49    void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
50    void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3);
51 
52    void emitPredicate(const Instruction *);
53 
54    void setCAddress14(const ValueRef&);
55    void setShortImmediate(const Instruction *, const int s);
56    void setImmediate32(const Instruction *, const int s, Modifier);
57    void setSUConst16(const Instruction *, const int s);
58 
59    void modNegAbsF32_3b(const Instruction *, const int s);
60 
61    void emitCondCode(CondCode cc, int pos, uint8_t mask);
62    void emitInterpMode(const Instruction *);
63    void emitLoadStoreType(DataType ty, const int pos);
64    void emitCachingMode(CacheMode c, const int pos);
65    void emitSUGType(DataType, const int pos);
66    void emitSUCachingMode(CacheMode c);
67 
68    inline uint8_t getSRegEncoding(const ValueRef&);
69 
70    void emitRoundMode(RoundMode, const int pos, const int rintPos);
71    void emitRoundModeF(RoundMode, const int pos);
72    void emitRoundModeI(RoundMode, const int pos);
73 
74    void emitNegAbs12(const Instruction *);
75 
76    void emitNOP(const Instruction *);
77 
78    void emitLOAD(const Instruction *);
79    void emitSTORE(const Instruction *);
80    void emitMOV(const Instruction *);
81    void emitATOM(const Instruction *);
82    void emitCCTL(const Instruction *);
83 
84    void emitINTERP(const Instruction *);
85    void emitAFETCH(const Instruction *);
86    void emitPFETCH(const Instruction *);
87    void emitVFETCH(const Instruction *);
88    void emitEXPORT(const Instruction *);
89    void emitOUT(const Instruction *);
90 
91    void emitUADD(const Instruction *);
92    void emitFADD(const Instruction *);
93    void emitDADD(const Instruction *);
94    void emitIMUL(const Instruction *);
95    void emitFMUL(const Instruction *);
96    void emitDMUL(const Instruction *);
97    void emitIMAD(const Instruction *);
98    void emitISAD(const Instruction *);
99    void emitSHLADD(const Instruction *);
100    void emitFMAD(const Instruction *);
101    void emitDMAD(const Instruction *);
102    void emitMADSP(const Instruction *i);
103 
104    void emitNOT(const Instruction *);
105    void emitLogicOp(const Instruction *, uint8_t subOp);
106    void emitPOPC(const Instruction *);
107    void emitINSBF(const Instruction *);
108    void emitEXTBF(const Instruction *);
109    void emitBFIND(const Instruction *);
110    void emitPERMT(const Instruction *);
111    void emitShift(const Instruction *);
112    void emitShift64(const Instruction *);
113 
114    void emitSFnOp(const Instruction *, uint8_t subOp);
115 
116    void emitCVT(const Instruction *);
117    void emitMINMAX(const Instruction *);
118    void emitPreOp(const Instruction *);
119 
120    void emitSET(const CmpInstruction *);
121    void emitSLCT(const CmpInstruction *);
122    void emitSELP(const Instruction *);
123 
124    void emitTEXBAR(const Instruction *);
125    void emitTEX(const TexInstruction *);
126    void emitTEXCSAA(const TexInstruction *);
127    void emitTXQ(const TexInstruction *);
128 
129    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
130 
131    void emitPIXLD(const Instruction *);
132 
133    void emitBAR(const Instruction *);
134    void emitMEMBAR(const Instruction *);
135 
136    void emitFlow(const Instruction *);
137 
138    void emitSHFL(const Instruction *);
139 
140    void emitVOTE(const Instruction *);
141 
142    void emitSULDGB(const TexInstruction *);
143    void emitSUSTGx(const TexInstruction *);
144    void emitSUCLAMPMode(uint16_t);
145    void emitSUCalc(Instruction *);
146 
147    void emitVSHL(const Instruction *);
148    void emitVectorSubOp(const Instruction *);
149 
150    inline void defId(const ValueDef&, const int pos);
151    inline void srcId(const ValueRef&, const int pos);
152    inline void srcId(const ValueRef *, const int pos);
153    inline void srcId(const Instruction *, int s, const int pos);
154 
155    inline void srcAddr32(const ValueRef&, const int pos); // address / 4
156 
157    inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
158 };
159 
160 #define GK110_GPR_ZERO 255
161 
162 #define NEG_(b, s) \
163    if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
164 #define ABS_(b, s) \
165    if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
166 
167 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))       \
168    code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
169 
170 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
171 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
172 
173 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
174 
175 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
176 
177 #define SDATA(a) ((a).rep()->reg.data)
178 #define DDATA(a) ((a).rep()->reg.data)
179 
srcId(const ValueRef & src,const int pos)180 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
181 {
182    code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
183 }
184 
srcId(const ValueRef * src,const int pos)185 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
186 {
187    code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
188 }
189 
srcId(const Instruction * insn,int s,int pos)190 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
191 {
192    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
193    code[pos / 32] |= r << (pos % 32);
194 }
195 
srcAddr32(const ValueRef & src,const int pos)196 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
197 {
198    code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
199 }
200 
defId(const ValueDef & def,const int pos)201 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
202 {
203    code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
204 }
205 
isLIMM(const ValueRef & ref,DataType ty,bool mod)206 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
207 {
208    const ImmediateValue *imm = ref.get()->asImm();
209 
210    return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
211 }
212 
213 void
emitRoundMode(RoundMode rnd,const int pos,const int rintPos)214 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
215 {
216    bool rint = false;
217    uint8_t n;
218 
219    switch (rnd) {
220    case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
221    case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
222    case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
223    default:
224       rint = rnd == ROUND_NI;
225       n = 0;
226       assert(rnd == ROUND_N || rnd == ROUND_NI);
227       break;
228    }
229    code[pos / 32] |= n << (pos % 32);
230    if (rint && rintPos >= 0)
231       code[rintPos / 32] |= 1 << (rintPos % 32);
232 }
233 
234 void
emitRoundModeF(RoundMode rnd,const int pos)235 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
236 {
237    uint8_t n;
238 
239    switch (rnd) {
240    case ROUND_M: n = 1; break;
241    case ROUND_P: n = 2; break;
242    case ROUND_Z: n = 3; break;
243    default:
244       n = 0;
245       assert(rnd == ROUND_N);
246       break;
247    }
248    code[pos / 32] |= n << (pos % 32);
249 }
250 
251 void
emitRoundModeI(RoundMode rnd,const int pos)252 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
253 {
254    uint8_t n;
255 
256    switch (rnd) {
257    case ROUND_MI: n = 1; break;
258    case ROUND_PI: n = 2; break;
259    case ROUND_ZI: n = 3; break;
260    default:
261       n = 0;
262       assert(rnd == ROUND_NI);
263       break;
264    }
265    code[pos / 32] |= n << (pos % 32);
266 }
267 
emitCondCode(CondCode cc,int pos,uint8_t mask)268 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
269 {
270    uint8_t n;
271 
272    switch (cc) {
273    case CC_FL:  n = 0x00; break;
274    case CC_LT:  n = 0x01; break;
275    case CC_EQ:  n = 0x02; break;
276    case CC_LE:  n = 0x03; break;
277    case CC_GT:  n = 0x04; break;
278    case CC_NE:  n = 0x05; break;
279    case CC_GE:  n = 0x06; break;
280    case CC_LTU: n = 0x09; break;
281    case CC_EQU: n = 0x0a; break;
282    case CC_LEU: n = 0x0b; break;
283    case CC_GTU: n = 0x0c; break;
284    case CC_NEU: n = 0x0d; break;
285    case CC_GEU: n = 0x0e; break;
286    case CC_TR:  n = 0x0f; break;
287    case CC_NO:  n = 0x10; break;
288    case CC_NC:  n = 0x11; break;
289    case CC_NS:  n = 0x12; break;
290    case CC_NA:  n = 0x13; break;
291    case CC_A:   n = 0x14; break;
292    case CC_S:   n = 0x15; break;
293    case CC_C:   n = 0x16; break;
294    case CC_O:   n = 0x17; break;
295    default:
296       n = 0;
297       assert(!"invalid condition code");
298       break;
299    }
300    code[pos / 32] |= (n & mask) << (pos % 32);
301 }
302 
303 void
emitPredicate(const Instruction * i)304 CodeEmitterGK110::emitPredicate(const Instruction *i)
305 {
306    if (i->predSrc >= 0) {
307       srcId(i->src(i->predSrc), 18);
308       if (i->cc == CC_NOT_P)
309          code[0] |= 8 << 18; // negate
310       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
311    } else {
312       code[0] |= 7 << 18;
313    }
314 }
315 
316 void
setCAddress14(const ValueRef & src)317 CodeEmitterGK110::setCAddress14(const ValueRef& src)
318 {
319    const Storage& res = src.get()->asSym()->reg;
320    const int32_t addr = res.data.offset / 4;
321 
322    code[0] |= (addr & 0x01ff) << 23;
323    code[1] |= (addr & 0x3e00) >> 9;
324    code[1] |= res.fileIndex << 5;
325 }
326 
327 void
setShortImmediate(const Instruction * i,const int s)328 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
329 {
330    const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
331    const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
332 
333    if (i->sType == TYPE_F32) {
334       assert(!(u32 & 0x00000fff));
335       code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
336       code[1] |= ((u32 & 0x7fe00000) >> 21);
337       code[1] |= ((u32 & 0x80000000) >> 4);
338    } else
339    if (i->sType == TYPE_F64) {
340       assert(!(u64 & 0x00000fffffffffffULL));
341       code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
342       code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
343       code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
344    } else {
345       assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
346       code[0] |= (u32 & 0x001ff) << 23;
347       code[1] |= (u32 & 0x7fe00) >> 9;
348       code[1] |= (u32 & 0x80000) << 8;
349    }
350 }
351 
352 void
setImmediate32(const Instruction * i,const int s,Modifier mod)353 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
354                                  Modifier mod)
355 {
356    uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
357 
358    if (mod) {
359       ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
360       mod.applyTo(imm);
361       u32 = imm.reg.data.u32;
362    }
363 
364    code[0] |= u32 << 23;
365    code[1] |= u32 >> 9;
366 }
367 
368 void
emitForm_L(const Instruction * i,uint32_t opc,uint8_t ctg,Modifier mod,int sCount)369 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
370                              Modifier mod, int sCount)
371 {
372    code[0] = ctg;
373    code[1] = opc << 20;
374 
375    emitPredicate(i);
376 
377    defId(i->def(0), 2);
378 
379    for (int s = 0; s < sCount && i->srcExists(s); ++s) {
380       switch (i->src(s).getFile()) {
381       case FILE_GPR:
382          srcId(i->src(s), s ? 42 : 10);
383          break;
384       case FILE_IMMEDIATE:
385          setImmediate32(i, s, mod);
386          break;
387       default:
388          break;
389       }
390    }
391 }
392 
393 
394 void
emitForm_C(const Instruction * i,uint32_t opc,uint8_t ctg)395 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
396 {
397    code[0] = ctg;
398    code[1] = opc << 20;
399 
400    emitPredicate(i);
401 
402    defId(i->def(0), 2);
403 
404    switch (i->src(0).getFile()) {
405    case FILE_MEMORY_CONST:
406       code[1] |= 0x4 << 28;
407       setCAddress14(i->src(0));
408       break;
409    case FILE_GPR:
410       code[1] |= 0xc << 28;
411       srcId(i->src(0), 23);
412       break;
413    default:
414       assert(0);
415       break;
416    }
417 }
418 
419 // 0x2 for GPR, c[] and 0x1 for short immediate
420 void
emitForm_21(const Instruction * i,uint32_t opc2,uint32_t opc1)421 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
422                               uint32_t opc1)
423 {
424    const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
425 
426    int s1 = 23;
427    if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
428       s1 = 42;
429 
430    if (imm) {
431       code[0] = 0x1;
432       code[1] = opc1 << 20;
433    } else {
434       code[0] = 0x2;
435       code[1] = (0xc << 28) | (opc2 << 20);
436    }
437 
438    emitPredicate(i);
439 
440    defId(i->def(0), 2);
441 
442    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
443       switch (i->src(s).getFile()) {
444       case FILE_MEMORY_CONST:
445          code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
446          setCAddress14(i->src(s));
447          break;
448       case FILE_IMMEDIATE:
449          setShortImmediate(i, s);
450          break;
451       case FILE_GPR:
452          srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
453          break;
454       default:
455          if (i->op == OP_SELP) {
456             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
457             srcId(i->src(s), 42);
458          }
459          // ignore here, can be predicate or flags, but must not be address
460          break;
461       }
462    }
463    // 0x0 = invalid
464    // 0xc = rrr
465    // 0x8 = rrc
466    // 0x4 = rcr
467    assert(imm || (code[1] & (0xc << 28)));
468 }
469 
470 inline void
modNegAbsF32_3b(const Instruction * i,const int s)471 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
472 {
473    if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
474    if (i->src(s).mod.neg()) code[1] ^=  (1 << 27);
475 }
476 
477 void
emitNOP(const Instruction * i)478 CodeEmitterGK110::emitNOP(const Instruction *i)
479 {
480    code[0] = 0x00003c02;
481    code[1] = 0x85800000;
482 
483    if (i)
484       emitPredicate(i);
485    else
486       code[0] = 0x001c3c02;
487 }
488 
489 void
emitFMAD(const Instruction * i)490 CodeEmitterGK110::emitFMAD(const Instruction *i)
491 {
492    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
493 
494    if (isLIMM(i->src(1), TYPE_F32)) {
495       assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id);
496 
497       // last source is dst, so force 2 sources
498       emitForm_L(i, 0x600, 0x0, 0, 2);
499 
500       if (i->flagsDef >= 0)
501          code[1] |= 1 << 23;
502 
503       SAT_(3a);
504       NEG_(3c, 2);
505 
506       if (neg1) {
507          code[1] |= 1 << 27;
508       }
509    } else {
510       emitForm_21(i, 0x0c0, 0x940);
511 
512       NEG_(34, 2);
513       SAT_(35);
514       RND_(36, F);
515 
516       if (code[0] & 0x1) {
517          if (neg1)
518             code[1] ^= 1 << 27;
519       } else
520       if (neg1) {
521          code[1] |= 1 << 19;
522       }
523    }
524 
525    FTZ_(38);
526    DNZ_(39);
527 }
528 
529 void
emitDMAD(const Instruction * i)530 CodeEmitterGK110::emitDMAD(const Instruction *i)
531 {
532    assert(!i->saturate);
533    assert(!i->ftz);
534 
535    emitForm_21(i, 0x1b8, 0xb38);
536 
537    NEG_(34, 2);
538    RND_(36, F);
539 
540    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
541 
542    if (code[0] & 0x1) {
543       if (neg1)
544          code[1] ^= 1 << 27;
545    } else
546    if (neg1) {
547       code[1] |= 1 << 19;
548    }
549 }
550 
551 void
emitMADSP(const Instruction * i)552 CodeEmitterGK110::emitMADSP(const Instruction *i)
553 {
554    emitForm_21(i, 0x140, 0xa40);
555 
556    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
557       code[1] |= 0x00c00000;
558    } else {
559       code[1] |= (i->subOp & 0x00f) << 19; // imadp1
560       code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
561       code[1] |= (i->subOp & 0x100) << 11; // imadp3
562       code[1] |= (i->subOp & 0x200) << 15; // imadp3
563       code[1] |= (i->subOp & 0xc00) << 12; // imadp3
564    }
565 
566    if (i->flagsDef >= 0)
567       code[1] |= 1 << 18;
568 }
569 
570 void
emitFMUL(const Instruction * i)571 CodeEmitterGK110::emitFMUL(const Instruction *i)
572 {
573    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
574 
575    assert(i->postFactor >= -3 && i->postFactor <= 3);
576 
577    if (isLIMM(i->src(1), TYPE_F32)) {
578       emitForm_L(i, 0x200, 0x2, Modifier(0));
579 
580       FTZ_(38);
581       DNZ_(39);
582       SAT_(3a);
583       if (neg)
584          code[1] ^= 1 << 22;
585 
586       assert(i->postFactor == 0);
587    } else {
588       emitForm_21(i, 0x234, 0xc34);
589       code[1] |= ((i->postFactor > 0) ?
590                   (7 - i->postFactor) : (0 - i->postFactor)) << 12;
591 
592       RND_(2a, F);
593       FTZ_(2f);
594       DNZ_(30);
595       SAT_(35);
596 
597       if (code[0] & 0x1) {
598          if (neg)
599             code[1] ^= 1 << 27;
600       } else
601       if (neg) {
602          code[1] |= 1 << 19;
603       }
604    }
605 }
606 
607 void
emitDMUL(const Instruction * i)608 CodeEmitterGK110::emitDMUL(const Instruction *i)
609 {
610    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
611 
612    assert(!i->postFactor);
613    assert(!i->saturate);
614    assert(!i->ftz);
615    assert(!i->dnz);
616 
617    emitForm_21(i, 0x240, 0xc40);
618 
619    RND_(2a, F);
620 
621    if (code[0] & 0x1) {
622       if (neg)
623          code[1] ^= 1 << 27;
624    } else
625    if (neg) {
626       code[1] |= 1 << 19;
627    }
628 }
629 
630 void
emitIMUL(const Instruction * i)631 CodeEmitterGK110::emitIMUL(const Instruction *i)
632 {
633    assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
634    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
635 
636    if (i->src(1).getFile() == FILE_IMMEDIATE) {
637       emitForm_L(i, 0x280, 2, Modifier(0));
638 
639       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
640          code[1] |= 1 << 24;
641       if (i->sType == TYPE_S32)
642          code[1] |= 3 << 25;
643    } else {
644       emitForm_21(i, 0x21c, 0xc1c);
645 
646       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
647          code[1] |= 1 << 10;
648       if (i->sType == TYPE_S32)
649          code[1] |= 3 << 11;
650    }
651 }
652 
653 void
emitFADD(const Instruction * i)654 CodeEmitterGK110::emitFADD(const Instruction *i)
655 {
656    if (isLIMM(i->src(1), TYPE_F32)) {
657       assert(i->rnd == ROUND_N);
658       assert(!i->saturate);
659 
660       Modifier mod = i->src(1).mod ^
661          Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
662 
663       emitForm_L(i, 0x400, 0, mod);
664 
665       FTZ_(3a);
666       NEG_(3b, 0);
667       ABS_(39, 0);
668    } else {
669       emitForm_21(i, 0x22c, 0xc2c);
670 
671       FTZ_(2f);
672       RND_(2a, F);
673       ABS_(31, 0);
674       NEG_(33, 0);
675       SAT_(35);
676 
677       if (code[0] & 0x1) {
678          modNegAbsF32_3b(i, 1);
679          if (i->op == OP_SUB) code[1] ^= 1 << 27;
680       } else {
681          ABS_(34, 1);
682          NEG_(30, 1);
683          if (i->op == OP_SUB) code[1] ^= 1 << 16;
684       }
685    }
686 }
687 
688 void
emitDADD(const Instruction * i)689 CodeEmitterGK110::emitDADD(const Instruction *i)
690 {
691    assert(!i->saturate);
692    assert(!i->ftz);
693 
694    emitForm_21(i, 0x238, 0xc38);
695    RND_(2a, F);
696    ABS_(31, 0);
697    NEG_(33, 0);
698    if (code[0] & 0x1) {
699       modNegAbsF32_3b(i, 1);
700       if (i->op == OP_SUB) code[1] ^= 1 << 27;
701    } else {
702       NEG_(30, 1);
703       ABS_(34, 1);
704       if (i->op == OP_SUB) code[1] ^= 1 << 16;
705    }
706 }
707 
708 void
emitUADD(const Instruction * i)709 CodeEmitterGK110::emitUADD(const Instruction *i)
710 {
711    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
712 
713    if (i->op == OP_SUB)
714       addOp ^= 1;
715 
716    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
717 
718    if (isLIMM(i->src(1), TYPE_S32)) {
719       emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
720 
721       if (addOp & 2)
722          code[1] |= 1 << 27;
723 
724       assert(i->flagsDef < 0);
725       assert(i->flagsSrc < 0);
726 
727       SAT_(39);
728    } else {
729       emitForm_21(i, 0x208, 0xc08);
730 
731       assert(addOp != 3); // would be add-plus-one
732 
733       code[1] |= addOp << 19;
734 
735       if (i->flagsDef >= 0)
736          code[1] |= 1 << 18; // write carry
737       if (i->flagsSrc >= 0)
738          code[1] |= 1 << 14; // add carry
739 
740       SAT_(35);
741    }
742 }
743 
744 void
emitIMAD(const Instruction * i)745 CodeEmitterGK110::emitIMAD(const Instruction *i)
746 {
747    uint8_t addOp =
748       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
749 
750    emitForm_21(i, 0x100, 0xa00);
751 
752    assert(addOp != 3);
753    code[1] |= addOp << 26;
754 
755    if (i->sType == TYPE_S32)
756       code[1] |= (1 << 19) | (1 << 24);
757 
758    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
759       code[1] |= 1 << 25;
760 
761    if (i->flagsDef >= 0) code[1] |= 1 << 18;
762    if (i->flagsSrc >= 0) code[1] |= 1 << 20;
763 
764    SAT_(35);
765 }
766 
767 void
emitISAD(const Instruction * i)768 CodeEmitterGK110::emitISAD(const Instruction *i)
769 {
770    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
771 
772    emitForm_21(i, 0x1f4, 0xb74);
773 
774    if (i->dType == TYPE_S32)
775       code[1] |= 1 << 19;
776 }
777 
778 void
emitSHLADD(const Instruction * i)779 CodeEmitterGK110::emitSHLADD(const Instruction *i)
780 {
781    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
782    const ImmediateValue *imm = i->src(1).get()->asImm();
783    assert(imm);
784 
785    if (i->src(2).getFile() == FILE_IMMEDIATE) {
786       code[0] = 0x1;
787       code[1] = 0xc0c << 20;
788    } else {
789       code[0] = 0x2;
790       code[1] = 0x20c << 20;
791    }
792    code[1] |= addOp << 19;
793 
794    emitPredicate(i);
795 
796    defId(i->def(0), 2);
797    srcId(i->src(0), 10);
798 
799    if (i->flagsDef >= 0)
800       code[1] |= 1 << 18;
801 
802    assert(!(imm->reg.data.u32 & 0xffffffe0));
803    code[1] |= imm->reg.data.u32 << 10;
804 
805    switch (i->src(2).getFile()) {
806    case FILE_GPR:
807       assert(code[0] & 0x2);
808       code[1] |= 0xc << 28;
809       srcId(i->src(2), 23);
810       break;
811    case FILE_MEMORY_CONST:
812       assert(code[0] & 0x2);
813       code[1] |= 0x4 << 28;
814       setCAddress14(i->src(2));
815       break;
816    case FILE_IMMEDIATE:
817       assert(code[0] & 0x1);
818       setShortImmediate(i, 2);
819       break;
820    default:
821       assert(!"bad src2 file");
822       break;
823    }
824 }
825 
826 void
emitNOT(const Instruction * i)827 CodeEmitterGK110::emitNOT(const Instruction *i)
828 {
829    code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
830    code[1] = 0x22003800;
831 
832    emitPredicate(i);
833 
834    defId(i->def(0), 2);
835 
836    switch (i->src(0).getFile()) {
837    case FILE_GPR:
838       code[1] |= 0xc << 28;
839       srcId(i->src(0), 23);
840       break;
841    case FILE_MEMORY_CONST:
842       code[1] |= 0x4 << 28;
843       setCAddress14(i->src(0));
844       break;
845    default:
846       assert(0);
847       break;
848    }
849 }
850 
851 void
emitLogicOp(const Instruction * i,uint8_t subOp)852 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
853 {
854    if (i->def(0).getFile() == FILE_PREDICATE) {
855       code[0] = 0x00000002 | (subOp << 27);
856       code[1] = 0x84800000;
857 
858       emitPredicate(i);
859 
860       defId(i->def(0), 5);
861       srcId(i->src(0), 14);
862       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
863       srcId(i->src(1), 32);
864       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
865 
866       if (i->defExists(1)) {
867          defId(i->def(1), 2);
868       } else {
869          code[0] |= 7 << 2;
870       }
871       // (a OP b) OP c
872       if (i->predSrc != 2 && i->srcExists(2)) {
873          code[1] |= subOp << 16;
874          srcId(i->src(2), 42);
875          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
876       } else {
877          code[1] |= 7 << 10;
878       }
879    } else
880    if (isLIMM(i->src(1), TYPE_S32)) {
881       emitForm_L(i, 0x200, 0, i->src(1).mod);
882       code[1] |= subOp << 24;
883       NOT_(3a, 0);
884    } else {
885       emitForm_21(i, 0x220, 0xc20);
886       code[1] |= subOp << 12;
887       NOT_(2a, 0);
888       NOT_(2b, 1);
889    }
890 }
891 
892 void
emitPOPC(const Instruction * i)893 CodeEmitterGK110::emitPOPC(const Instruction *i)
894 {
895    assert(!isLIMM(i->src(1), TYPE_S32, true));
896 
897    emitForm_21(i, 0x204, 0xc04);
898 
899    NOT_(2a, 0);
900    if (!(code[0] & 0x1))
901       NOT_(2b, 1);
902 }
903 
904 void
emitINSBF(const Instruction * i)905 CodeEmitterGK110::emitINSBF(const Instruction *i)
906 {
907    emitForm_21(i, 0x1f8, 0xb78);
908 }
909 
910 void
emitEXTBF(const Instruction * i)911 CodeEmitterGK110::emitEXTBF(const Instruction *i)
912 {
913    emitForm_21(i, 0x600, 0xc00);
914 
915    if (i->dType == TYPE_S32)
916       code[1] |= 0x80000;
917    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
918       code[1] |= 0x800;
919 }
920 
921 void
emitBFIND(const Instruction * i)922 CodeEmitterGK110::emitBFIND(const Instruction *i)
923 {
924    emitForm_C(i, 0x218, 0x2);
925 
926    if (i->dType == TYPE_S32)
927       code[1] |= 0x80000;
928    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
929       code[1] |= 0x800;
930    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
931       code[1] |= 0x1000;
932 }
933 
934 void
emitPERMT(const Instruction * i)935 CodeEmitterGK110::emitPERMT(const Instruction *i)
936 {
937    emitForm_21(i, 0x1e0, 0xb60);
938 
939    code[1] |= i->subOp << 19;
940 }
941 
942 void
emitShift(const Instruction * i)943 CodeEmitterGK110::emitShift(const Instruction *i)
944 {
945    if (i->op == OP_SHR) {
946       emitForm_21(i, 0x214, 0xc14);
947       if (isSignedType(i->dType))
948          code[1] |= 1 << 19;
949    } else {
950       emitForm_21(i, 0x224, 0xc24);
951    }
952 
953    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
954       code[1] |= 1 << 10;
955 }
956 
957 void
emitShift64(const Instruction * i)958 CodeEmitterGK110::emitShift64(const Instruction *i)
959 {
960    if (i->op == OP_SHR) {
961       emitForm_21(i, 0x27c, 0xc7c);
962       if (isSignedType(i->sType))
963          code[1] |= 0x100;
964       if (i->subOp & NV50_IR_SUBOP_SHIFT_HIGH)
965          code[1] |= 1 << 19;
966    } else {
967       emitForm_21(i, 0xdfc, 0xf7c);
968    }
969    code[1] |= 0x200;
970 
971    if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
972       code[1] |= 1 << 21;
973 }
974 
975 void
emitPreOp(const Instruction * i)976 CodeEmitterGK110::emitPreOp(const Instruction *i)
977 {
978    emitForm_C(i, 0x248, 0x2);
979 
980    if (i->op == OP_PREEX2)
981       code[1] |= 1 << 10;
982 
983    NEG_(30, 0);
984    ABS_(34, 0);
985 }
986 
987 void
emitSFnOp(const Instruction * i,uint8_t subOp)988 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
989 {
990    code[0] = 0x00000002 | (subOp << 23);
991    code[1] = 0x84000000;
992 
993    emitPredicate(i);
994 
995    defId(i->def(0), 2);
996    srcId(i->src(0), 10);
997 
998    NEG_(33, 0);
999    ABS_(31, 0);
1000    SAT_(35);
1001 }
1002 
1003 void
emitMINMAX(const Instruction * i)1004 CodeEmitterGK110::emitMINMAX(const Instruction *i)
1005 {
1006    uint32_t op2, op1;
1007 
1008    switch (i->dType) {
1009    case TYPE_U32:
1010    case TYPE_S32:
1011       op2 = 0x210;
1012       op1 = 0xc10;
1013       break;
1014    case TYPE_F32:
1015       op2 = 0x230;
1016       op1 = 0xc30;
1017       break;
1018    case TYPE_F64:
1019       op2 = 0x228;
1020       op1 = 0xc28;
1021       break;
1022    default:
1023       assert(0);
1024       op2 = 0;
1025       op1 = 0;
1026       break;
1027    }
1028    emitForm_21(i, op2, op1);
1029 
1030    if (i->dType == TYPE_S32)
1031       code[1] |= 1 << 19;
1032    code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
1033    code[1] |= i->subOp << 14;
1034    if (i->flagsDef >= 0)
1035       code[1] |= i->subOp << 18;
1036 
1037    FTZ_(2f);
1038    ABS_(31, 0);
1039    NEG_(33, 0);
1040    if (code[0] & 0x1) {
1041       modNegAbsF32_3b(i, 1);
1042    } else {
1043       ABS_(34, 1);
1044       NEG_(30, 1);
1045    }
1046 }
1047 
1048 void
emitCVT(const Instruction * i)1049 CodeEmitterGK110::emitCVT(const Instruction *i)
1050 {
1051    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1052    const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
1053    const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
1054 
1055    bool sat = i->saturate;
1056    bool abs = i->src(0).mod.abs();
1057    bool neg = i->src(0).mod.neg();
1058 
1059    RoundMode rnd = i->rnd;
1060 
1061    switch (i->op) {
1062    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1063    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1064    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1065    case OP_SAT: sat = true; break;
1066    case OP_NEG: neg = !neg; break;
1067    case OP_ABS: abs = true; neg = false; break;
1068    default:
1069       break;
1070    }
1071 
1072    DataType dType;
1073 
1074    if (i->op == OP_NEG && i->dType == TYPE_U32)
1075       dType = TYPE_S32;
1076    else
1077       dType = i->dType;
1078 
1079 
1080    uint32_t op;
1081 
1082    if      (f2f) op = 0x254;
1083    else if (f2i) op = 0x258;
1084    else if (i2f) op = 0x25c;
1085    else          op = 0x260;
1086 
1087    emitForm_C(i, op, 0x2);
1088 
1089    FTZ_(2f);
1090    if (neg) code[1] |= 1 << 16;
1091    if (abs) code[1] |= 1 << 20;
1092    if (sat) code[1] |= 1 << 21;
1093 
1094    emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
1095 
1096    code[0] |= typeSizeofLog2(dType) << 10;
1097    code[0] |= typeSizeofLog2(i->sType) << 12;
1098    code[1] |= i->subOp << 12;
1099 
1100    if (isSignedIntType(dType))
1101       code[0] |= 0x4000;
1102    if (isSignedIntType(i->sType))
1103       code[0] |= 0x8000;
1104 }
1105 
1106 void
emitSET(const CmpInstruction * i)1107 CodeEmitterGK110::emitSET(const CmpInstruction *i)
1108 {
1109    uint16_t op1, op2;
1110 
1111    if (i->def(0).getFile() == FILE_PREDICATE) {
1112       switch (i->sType) {
1113       case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
1114       case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
1115       default:
1116          op2 = 0x1b0;
1117          op1 = 0xb30;
1118          break;
1119       }
1120       emitForm_21(i, op2, op1);
1121 
1122       NEG_(2e, 0);
1123       ABS_(9, 0);
1124       if (!(code[0] & 0x1)) {
1125          NEG_(8, 1);
1126          ABS_(2f, 1);
1127       } else {
1128          modNegAbsF32_3b(i, 1);
1129       }
1130       FTZ_(32);
1131 
1132       // normal DST field is negated predicate result
1133       code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
1134       if (i->defExists(1))
1135          defId(i->def(1), 2);
1136       else
1137          code[0] |= 0x1c;
1138    } else {
1139       switch (i->sType) {
1140       case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
1141       case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
1142       default:
1143          op2 = 0x1a8;
1144          op1 = 0xb28;
1145          break;
1146       }
1147       emitForm_21(i, op2, op1);
1148 
1149       NEG_(2e, 0);
1150       ABS_(39, 0);
1151       if (!(code[0] & 0x1)) {
1152          NEG_(38, 1);
1153          ABS_(2f, 1);
1154       } else {
1155          modNegAbsF32_3b(i, 1);
1156       }
1157       FTZ_(3a);
1158 
1159       if (i->dType == TYPE_F32) {
1160          if (isFloatType(i->sType))
1161             code[1] |= 1 << 23;
1162          else
1163             code[1] |= 1 << 15;
1164       }
1165    }
1166    if (i->sType == TYPE_S32)
1167       code[1] |= 1 << 19;
1168 
1169    if (i->op != OP_SET) {
1170       switch (i->op) {
1171       case OP_SET_AND: code[1] |= 0x0 << 16; break;
1172       case OP_SET_OR:  code[1] |= 0x1 << 16; break;
1173       case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1174       default:
1175          assert(0);
1176          break;
1177       }
1178       srcId(i->src(2), 0x2a);
1179    } else {
1180       code[1] |= 0x7 << 10;
1181    }
1182    if (i->flagsSrc >= 0)
1183       code[1] |= 1 << 14;
1184    emitCondCode(i->setCond,
1185                 isFloatType(i->sType) ? 0x33 : 0x34,
1186                 isFloatType(i->sType) ? 0xf : 0x7);
1187 }
1188 
1189 void
emitSLCT(const CmpInstruction * i)1190 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1191 {
1192    CondCode cc = i->setCond;
1193    if (i->src(2).mod.neg())
1194       cc = reverseCondCode(cc);
1195 
1196    if (i->dType == TYPE_F32) {
1197       emitForm_21(i, 0x1d0, 0xb50);
1198       FTZ_(32);
1199       emitCondCode(cc, 0x33, 0xf);
1200    } else {
1201       emitForm_21(i, 0x1a0, 0xb20);
1202       emitCondCode(cc, 0x34, 0x7);
1203       if (i->dType == TYPE_S32)
1204          code[1] |= 1 << 19;
1205    }
1206 }
1207 
1208 static void
selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1209 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1210 {
1211    int loc = entry->loc;
1212    if (data.force_persample_interp)
1213       code[loc + 1] |= 1 << 13;
1214    else
1215       code[loc + 1] &= ~(1 << 13);
1216 }
1217 
emitSELP(const Instruction * i)1218 void CodeEmitterGK110::emitSELP(const Instruction *i)
1219 {
1220    emitForm_21(i, 0x250, 0x050);
1221 
1222    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1223       code[1] |= 1 << 13;
1224 
1225    if (i->subOp == 1) {
1226       addInterp(0, 0, selpFlip);
1227    }
1228 }
1229 
emitTEXBAR(const Instruction * i)1230 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1231 {
1232    code[0] = 0x0000003e | (i->subOp << 23);
1233    code[1] = 0x77000000;
1234 
1235    emitPredicate(i);
1236 }
1237 
emitTEXCSAA(const TexInstruction * i)1238 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1239 {
1240    code[0] = 0x00000002;
1241    code[1] = 0x76c00000;
1242 
1243    code[1] |= i->tex.r << 9;
1244    // code[1] |= i->tex.s << (9 + 8);
1245 
1246    if (i->tex.liveOnly)
1247       code[0] |= 0x80000000;
1248 
1249    defId(i->def(0), 2);
1250    srcId(i->src(0), 10);
1251 }
1252 
1253 static inline bool
isNextIndependentTex(const TexInstruction * i)1254 isNextIndependentTex(const TexInstruction *i)
1255 {
1256    if (!i->next || !isTextureOp(i->next->op))
1257       return false;
1258    if (i->getDef(0)->interfers(i->next->getSrc(0)))
1259       return false;
1260    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1261 }
1262 
1263 void
emitTEX(const TexInstruction * i)1264 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1265 {
1266    const bool ind = i->tex.rIndirectSrc >= 0;
1267 
1268    if (ind) {
1269       code[0] = 0x00000002;
1270       switch (i->op) {
1271       case OP_TXD:
1272          code[1] = 0x7e000000;
1273          break;
1274       case OP_TXLQ:
1275          code[1] = 0x7e800000;
1276          break;
1277       case OP_TXF:
1278          code[1] = 0x78000000;
1279          break;
1280       case OP_TXG:
1281          code[1] = 0x7dc00000;
1282          break;
1283       default:
1284          code[1] = 0x7d800000;
1285          break;
1286       }
1287    } else {
1288       switch (i->op) {
1289       case OP_TXD:
1290          code[0] = 0x00000002;
1291          code[1] = 0x76000000;
1292          code[1] |= i->tex.r << 9;
1293          break;
1294       case OP_TXLQ:
1295          code[0] = 0x00000002;
1296          code[1] = 0x76800000;
1297          code[1] |= i->tex.r << 9;
1298          break;
1299       case OP_TXF:
1300          code[0] = 0x00000002;
1301          code[1] = 0x70000000;
1302          code[1] |= i->tex.r << 13;
1303          break;
1304       case OP_TXG:
1305          code[0] = 0x00000001;
1306          code[1] = 0x70000000;
1307          code[1] |= i->tex.r << 15;
1308          break;
1309       default:
1310          code[0] = 0x00000001;
1311          code[1] = 0x60000000;
1312          code[1] |= i->tex.r << 15;
1313          break;
1314       }
1315    }
1316 
1317    code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1318 
1319    if (i->tex.liveOnly)
1320       code[0] |= 0x80000000;
1321 
1322    switch (i->op) {
1323    case OP_TEX: break;
1324    case OP_TXB: code[1] |= 0x2000; break;
1325    case OP_TXL: code[1] |= 0x3000; break;
1326    case OP_TXF: break;
1327    case OP_TXG: break;
1328    case OP_TXD: break;
1329    case OP_TXLQ: break;
1330    default:
1331       assert(!"invalid texture op");
1332       break;
1333    }
1334 
1335    if (i->op == OP_TXF) {
1336       if (!i->tex.levelZero)
1337          code[1] |= 0x1000;
1338    } else
1339    if (i->tex.levelZero) {
1340       code[1] |= 0x1000;
1341    }
1342 
1343    if (i->op != OP_TXD && i->tex.derivAll)
1344       code[1] |= 0x200;
1345 
1346    emitPredicate(i);
1347 
1348    code[1] |= i->tex.mask << 2;
1349 
1350    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1351 
1352    defId(i->def(0), 2);
1353    srcId(i->src(0), 10);
1354    srcId(i, src1, 23);
1355 
1356    if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1357 
1358    // texture target:
1359    code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1360    if (i->tex.target.isArray())
1361       code[1] |= 0x40;
1362    if (i->tex.target.isShadow())
1363       code[1] |= 0x400;
1364    if (i->tex.target == TEX_TARGET_2D_MS ||
1365        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1366       code[1] |= 0x800;
1367 
1368    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1369       // ?
1370    }
1371 
1372    if (i->tex.useOffsets == 1) {
1373       switch (i->op) {
1374       case OP_TXF: code[1] |= 0x200; break;
1375       case OP_TXD: code[1] |= 0x00400000; break;
1376       default: code[1] |= 0x800; break;
1377       }
1378    }
1379    if (i->tex.useOffsets == 4)
1380       code[1] |= 0x1000;
1381 }
1382 
1383 void
emitTXQ(const TexInstruction * i)1384 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1385 {
1386    code[0] = 0x00000002;
1387    code[1] = 0x75400001;
1388 
1389    switch (i->tex.query) {
1390    case TXQ_DIMS:            code[0] |= 0x01 << 25; break;
1391    case TXQ_TYPE:            code[0] |= 0x02 << 25; break;
1392    case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1393    case TXQ_FILTER:          code[0] |= 0x10 << 25; break;
1394    case TXQ_LOD:             code[0] |= 0x12 << 25; break;
1395    case TXQ_BORDER_COLOUR:   code[0] |= 0x16 << 25; break;
1396    default:
1397       assert(!"invalid texture query");
1398       break;
1399    }
1400 
1401    code[1] |= i->tex.mask << 2;
1402    code[1] |= i->tex.r << 9;
1403    if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1404       code[1] |= 0x08000000;
1405 
1406    defId(i->def(0), 2);
1407    srcId(i->src(0), 10);
1408 
1409    emitPredicate(i);
1410 }
1411 
1412 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1413 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1414 {
1415    code[0] = 0x00000002 | ((qOp & 1) << 31);
1416    code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
1417 
1418    defId(i->def(0), 2);
1419    srcId(i->src(0), 10);
1420    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
1421 
1422    emitPredicate(i);
1423 }
1424 
1425 void
emitPIXLD(const Instruction * i)1426 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1427 {
1428    emitForm_L(i, 0x7f4, 2, Modifier(0));
1429    code[1] |= i->subOp << 2;
1430    code[1] |= 0x00070000;
1431 }
1432 
1433 void
emitBAR(const Instruction * i)1434 CodeEmitterGK110::emitBAR(const Instruction *i)
1435 {
1436    code[0] = 0x00000002;
1437    code[1] = 0x85400000;
1438 
1439    switch (i->subOp) {
1440    case NV50_IR_SUBOP_BAR_ARRIVE:   code[1] |= 0x08; break;
1441    case NV50_IR_SUBOP_BAR_RED_AND:  code[1] |= 0x50; break;
1442    case NV50_IR_SUBOP_BAR_RED_OR:   code[1] |= 0x90; break;
1443    case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
1444    default:
1445       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1446       break;
1447    }
1448 
1449    emitPredicate(i);
1450 
1451    // barrier id
1452    if (i->src(0).getFile() == FILE_GPR) {
1453       srcId(i->src(0), 10);
1454    } else {
1455       ImmediateValue *imm = i->getSrc(0)->asImm();
1456       assert(imm);
1457       code[0] |= imm->reg.data.u32 << 10;
1458       code[1] |= 0x8000;
1459    }
1460 
1461    // thread count
1462    if (i->src(1).getFile() == FILE_GPR) {
1463       srcId(i->src(1), 23);
1464    } else {
1465       ImmediateValue *imm = i->getSrc(0)->asImm();
1466       assert(imm);
1467       assert(imm->reg.data.u32 <= 0xfff);
1468       code[0] |= imm->reg.data.u32 << 23;
1469       code[1] |= imm->reg.data.u32 >> 9;
1470       code[1] |= 0x4000;
1471    }
1472 
1473    if (i->srcExists(2) && (i->predSrc != 2)) {
1474       srcId(i->src(2), 32 + 10);
1475       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1476          code[1] |= 1 << 13;
1477    } else {
1478       code[1] |= 7 << 10;
1479    }
1480 }
1481 
emitMEMBAR(const Instruction * i)1482 void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
1483 {
1484    code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
1485    code[1] = 0x7cc00000;
1486 
1487    emitPredicate(i);
1488 }
1489 
1490 void
emitFlow(const Instruction * i)1491 CodeEmitterGK110::emitFlow(const Instruction *i)
1492 {
1493    const FlowInstruction *f = i->asFlow();
1494 
1495    unsigned mask; // bit 0: predicate, bit 1: target
1496 
1497    code[0] = 0x00000000;
1498 
1499    switch (i->op) {
1500    case OP_BRA:
1501       code[1] = f->absolute ? 0x10800000 : 0x12000000;
1502       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1503          code[0] |= 0x80;
1504       mask = 3;
1505       break;
1506    case OP_CALL:
1507       code[1] = f->absolute ? 0x11000000 : 0x13000000;
1508       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1509          code[0] |= 0x80;
1510       mask = 2;
1511       break;
1512 
1513    case OP_EXIT:    code[1] = 0x18000000; mask = 1; break;
1514    case OP_RET:     code[1] = 0x19000000; mask = 1; break;
1515    case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1516    case OP_BREAK:   code[1] = 0x1a000000; mask = 1; break;
1517    case OP_CONT:    code[1] = 0x1a800000; mask = 1; break;
1518 
1519    case OP_JOINAT:   code[1] = 0x14800000; mask = 2; break;
1520    case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1521    case OP_PRECONT:  code[1] = 0x15800000; mask = 2; break;
1522    case OP_PRERET:   code[1] = 0x13800000; mask = 2; break;
1523 
1524    case OP_QUADON:  code[1] = 0x1b800000; mask = 0; break;
1525    case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1526    case OP_BRKPT:   code[1] = 0x00000000; mask = 0; break;
1527    default:
1528       assert(!"invalid flow operation");
1529       return;
1530    }
1531 
1532    if (mask & 1) {
1533       emitPredicate(i);
1534       if (i->flagsSrc < 0)
1535          code[0] |= 0x3c;
1536    }
1537 
1538    if (!f)
1539       return;
1540 
1541    if (f->allWarp)
1542       code[0] |= 1 << 9;
1543    if (f->limit)
1544       code[0] |= 1 << 8;
1545 
1546    if (f->op == OP_CALL) {
1547       if (f->builtin) {
1548          assert(f->absolute);
1549          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1550          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1551          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1552       } else {
1553          assert(!f->absolute);
1554          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1555          code[0] |= (pcRel & 0x1ff) << 23;
1556          code[1] |= (pcRel >> 9) & 0x7fff;
1557       }
1558    } else
1559    if (mask & 2) {
1560       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1561       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1562          pcRel += 8;
1563       // currently we don't want absolute branches
1564       assert(!f->absolute);
1565       code[0] |= (pcRel & 0x1ff) << 23;
1566       code[1] |= (pcRel >> 9) & 0x7fff;
1567    }
1568 }
1569 
1570 void
emitSHFL(const Instruction * i)1571 CodeEmitterGK110::emitSHFL(const Instruction *i)
1572 {
1573    const ImmediateValue *imm;
1574 
1575    code[0] = 0x00000002;
1576    code[1] = 0x78800000 | (i->subOp << 1);
1577 
1578    emitPredicate(i);
1579 
1580    defId(i->def(0), 2);
1581    srcId(i->src(0), 10);
1582 
1583    switch (i->src(1).getFile()) {
1584    case FILE_GPR:
1585       srcId(i->src(1), 23);
1586       break;
1587    case FILE_IMMEDIATE:
1588       imm = i->getSrc(1)->asImm();
1589       assert(imm && imm->reg.data.u32 < 0x20);
1590       code[0] |= imm->reg.data.u32 << 23;
1591       code[0] |= 1 << 31;
1592       break;
1593    default:
1594       assert(!"invalid src1 file");
1595       break;
1596    }
1597 
1598    switch (i->src(2).getFile()) {
1599    case FILE_GPR:
1600       srcId(i->src(2), 42);
1601       break;
1602    case FILE_IMMEDIATE:
1603       imm = i->getSrc(2)->asImm();
1604       assert(imm && imm->reg.data.u32 < 0x2000);
1605       code[1] |= imm->reg.data.u32 << 5;
1606       code[1] |= 1;
1607       break;
1608    default:
1609       assert(!"invalid src2 file");
1610       break;
1611    }
1612 
1613    if (!i->defExists(1))
1614       code[1] |= 7 << 19;
1615    else {
1616       assert(i->def(1).getFile() == FILE_PREDICATE);
1617       defId(i->def(1), 51);
1618    }
1619 }
1620 
1621 void
emitVOTE(const Instruction * i)1622 CodeEmitterGK110::emitVOTE(const Instruction *i)
1623 {
1624    const ImmediateValue *imm;
1625    uint32_t u32;
1626 
1627    code[0] = 0x00000002;
1628    code[1] = 0x86c00000 | (i->subOp << 19);
1629 
1630    emitPredicate(i);
1631 
1632    unsigned rp = 0;
1633    for (int d = 0; i->defExists(d); d++) {
1634       if (i->def(d).getFile() == FILE_PREDICATE) {
1635          assert(!(rp & 2));
1636          rp |= 2;
1637          defId(i->def(d), 48);
1638       } else if (i->def(d).getFile() == FILE_GPR) {
1639          assert(!(rp & 1));
1640          rp |= 1;
1641          defId(i->def(d), 2);
1642       } else {
1643          assert(!"Unhandled def");
1644       }
1645    }
1646    if (!(rp & 1))
1647       code[0] |= 255 << 2;
1648    if (!(rp & 2))
1649       code[1] |= 7 << 16;
1650 
1651    switch (i->src(0).getFile()) {
1652    case FILE_PREDICATE:
1653       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
1654          code[0] |= 1 << 13;
1655       srcId(i->src(0), 42);
1656       break;
1657    case FILE_IMMEDIATE:
1658       imm = i->getSrc(0)->asImm();
1659       assert(imm);
1660       u32 = imm->reg.data.u32;
1661       assert(u32 == 0 || u32 == 1);
1662       code[1] |= (u32 == 1 ? 0x7 : 0xf) << 10;
1663       break;
1664    default:
1665       assert(!"Unhandled src");
1666       break;
1667    }
1668 }
1669 
1670 void
emitSUGType(DataType ty,const int pos)1671 CodeEmitterGK110::emitSUGType(DataType ty, const int pos)
1672 {
1673    uint8_t n = 0;
1674 
1675    switch (ty) {
1676    case TYPE_S32: n = 1; break;
1677    case TYPE_U8:  n = 2; break;
1678    case TYPE_S8:  n = 3; break;
1679    default:
1680       assert(ty == TYPE_U32);
1681       break;
1682    }
1683    code[pos / 32] |= n << (pos % 32);
1684 }
1685 
1686 void
emitSUCachingMode(CacheMode c)1687 CodeEmitterGK110::emitSUCachingMode(CacheMode c)
1688 {
1689    uint8_t n = 0;
1690 
1691    switch (c) {
1692    case CACHE_CA:
1693 // case CACHE_WB:
1694       n = 0;
1695       break;
1696    case CACHE_CG:
1697       n = 1;
1698       break;
1699    case CACHE_CS:
1700       n = 2;
1701       break;
1702    case CACHE_CV:
1703 // case CACHE_WT:
1704       n = 3;
1705       break;
1706    default:
1707       assert(!"invalid caching mode");
1708       break;
1709    }
1710    code[0] |= (n & 1) << 31;
1711    code[1] |= (n & 2) >> 1;
1712 }
1713 
1714 void
setSUConst16(const Instruction * i,const int s)1715 CodeEmitterGK110::setSUConst16(const Instruction *i, const int s)
1716 {
1717    const uint32_t offset = i->getSrc(s)->reg.data.offset;
1718 
1719    assert(offset == (offset & 0xfffc));
1720 
1721    code[0] |= offset << 21;
1722    code[1] |= offset >> 11;
1723    code[1] |= i->getSrc(s)->reg.fileIndex << 5;
1724 }
1725 
1726 void
emitSULDGB(const TexInstruction * i)1727 CodeEmitterGK110::emitSULDGB(const TexInstruction *i)
1728 {
1729    code[0] = 0x00000002;
1730    code[1] = 0x30000000 | (i->subOp << 14);
1731 
1732    if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1733       emitLoadStoreType(i->dType, 0x38);
1734       emitCachingMode(i->cache, 0x36);
1735 
1736       // format
1737       setSUConst16(i, 1);
1738    } else {
1739       assert(i->src(1).getFile() == FILE_GPR);
1740       code[1] |= 0x49800000;
1741 
1742       emitLoadStoreType(i->dType, 0x21);
1743       emitSUCachingMode(i->cache);
1744 
1745       srcId(i->src(1), 23);
1746    }
1747 
1748    emitSUGType(i->sType, 0x34);
1749 
1750    emitPredicate(i);
1751    defId(i->def(0), 2); // destination
1752    srcId(i->src(0), 10); // address
1753 
1754    // surface predicate
1755    if (!i->srcExists(2) || (i->predSrc == 2)) {
1756       code[1] |= 0x7 << 10;
1757    } else {
1758       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1759          code[1] |= 1 << 13;
1760       srcId(i->src(2), 32 + 10);
1761    }
1762 }
1763 
1764 void
emitSUSTGx(const TexInstruction * i)1765 CodeEmitterGK110::emitSUSTGx(const TexInstruction *i)
1766 {
1767    assert(i->op == OP_SUSTP);
1768 
1769    code[0] = 0x00000002;
1770    code[1] = 0x38000000;
1771 
1772    if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1773       code[0] |= i->subOp << 2;
1774 
1775       if (i->op == OP_SUSTP)
1776          code[0] |= i->tex.mask << 4;
1777 
1778       emitSUGType(i->sType, 0x8);
1779       emitCachingMode(i->cache, 0x36);
1780 
1781       // format
1782       setSUConst16(i, 1);
1783    } else {
1784       assert(i->src(1).getFile() == FILE_GPR);
1785 
1786       code[0] |= i->subOp << 23;
1787       code[1] |= 0x41c00000;
1788 
1789       if (i->op == OP_SUSTP)
1790          code[0] |= i->tex.mask << 25;
1791 
1792       emitSUGType(i->sType, 0x1d);
1793       emitSUCachingMode(i->cache);
1794 
1795       srcId(i->src(1), 2);
1796    }
1797 
1798    emitPredicate(i);
1799    srcId(i->src(0), 10); // address
1800    srcId(i->src(3), 42); // values
1801 
1802    // surface predicate
1803    if (!i->srcExists(2) || (i->predSrc == 2)) {
1804       code[1] |= 0x7 << 18;
1805    } else {
1806       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1807          code[1] |= 1 << 21;
1808       srcId(i->src(2), 32 + 18);
1809    }
1810 }
1811 
1812 void
emitSUCLAMPMode(uint16_t subOp)1813 CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp)
1814 {
1815    uint8_t m;
1816    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1817    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1818    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1819    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1820    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1821    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1822    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1823    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1824    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1825    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1826    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1827    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1828    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1829    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1830    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1831    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1832    default:
1833       return;
1834    }
1835    code[1] |= m << 20;
1836    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1837       code[1] |= 1 << 24;
1838 }
1839 
1840 void
emitSUCalc(Instruction * i)1841 CodeEmitterGK110::emitSUCalc(Instruction *i)
1842 {
1843    ImmediateValue *imm = NULL;
1844    uint64_t opc1, opc2;
1845 
1846    if (i->srcExists(2)) {
1847       imm = i->getSrc(2)->asImm();
1848       if (imm)
1849          i->setSrc(2, NULL); // special case, make emitForm_21 not assert
1850    }
1851 
1852    switch (i->op) {
1853    case OP_SUCLAMP:  opc1 = 0xb00; opc2 = 0x580; break;
1854    case OP_SUBFM:    opc1 = 0xb68; opc2 = 0x1e8; break;
1855    case OP_SUEAU:    opc1 = 0xb6c; opc2 = 0x1ec; break;
1856    default:
1857       assert(0);
1858       return;
1859    }
1860    emitForm_21(i, opc2, opc1);
1861 
1862    if (i->op == OP_SUCLAMP) {
1863       if (i->dType == TYPE_S32)
1864          code[1] |= 1 << 19;
1865       emitSUCLAMPMode(i->subOp);
1866    }
1867 
1868    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1869       code[1] |= 1 << 18;
1870 
1871    if (i->op != OP_SUEAU) {
1872       const uint8_t pos = i->op == OP_SUBFM ? 19 : 16;
1873       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1874          code[0] |= 255 << 2;
1875          code[1] |= i->getDef(1)->reg.data.id << pos;
1876       } else
1877       if (i->defExists(1)) { // r, p
1878          assert(i->def(1).getFile() == FILE_PREDICATE);
1879          code[1] |= i->getDef(1)->reg.data.id << pos;
1880       } else { // r, #
1881          code[1] |= 7 << pos;
1882       }
1883    }
1884 
1885    if (imm) {
1886       assert(i->op == OP_SUCLAMP);
1887       i->setSrc(2, imm);
1888       code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint6
1889    }
1890 }
1891 
1892 
1893 void
emitVectorSubOp(const Instruction * i)1894 CodeEmitterGK110::emitVectorSubOp(const Instruction *i)
1895 {
1896    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1897    case 0:
1898       code[1] |= (i->subOp & 0x000f) << 7;  // vsrc1
1899       code[1] |= (i->subOp & 0x00e0) >> 6;  // vsrc2
1900       code[1] |= (i->subOp & 0x0100) << 13; // vsrc2
1901       code[1] |= (i->subOp & 0x3c00) << 12; // vdst
1902       break;
1903    default:
1904       assert(0);
1905       break;
1906    }
1907 }
1908 
1909 void
emitVSHL(const Instruction * i)1910 CodeEmitterGK110::emitVSHL(const Instruction *i)
1911 {
1912    code[0] = 0x00000002;
1913    code[1] = 0xb8000000;
1914 
1915    assert(NV50_IR_SUBOP_Vn(i->subOp) == 0);
1916 
1917    if (isSignedType(i->dType)) code[1] |= 1 << 25;
1918    if (isSignedType(i->sType)) code[1] |= 1 << 19;
1919 
1920    emitVectorSubOp(i);
1921 
1922    emitPredicate(i);
1923    defId(i->def(0), 2);
1924    srcId(i->src(0), 10);
1925 
1926    if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) {
1927       ImmediateValue *imm = i->getSrc(1)->asImm();
1928       assert(imm);
1929       code[0] |= (imm->reg.data.u32 & 0x01ff) << 23;
1930       code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9;
1931    } else {
1932       assert(i->getSrc(1)->reg.file == FILE_GPR);
1933       code[1] |= 1 << 21;
1934       srcId(i->src(1), 23);
1935    }
1936    srcId(i->src(2), 42);
1937 
1938    if (i->saturate)
1939       code[0] |= 1 << 22;
1940    if (i->flagsDef >= 0)
1941       code[1] |= 1 << 18;
1942 }
1943 
1944 void
emitAFETCH(const Instruction * i)1945 CodeEmitterGK110::emitAFETCH(const Instruction *i)
1946 {
1947    uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
1948 
1949    code[0] = 0x00000002 | (offset << 23);
1950    code[1] = 0x7d000000 | (offset >> 9);
1951 
1952    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1953       code[1] |= 0x8;
1954 
1955    emitPredicate(i);
1956 
1957    defId(i->def(0), 2);
1958    srcId(i->src(0).getIndirect(0), 10);
1959 }
1960 
1961 void
emitPFETCH(const Instruction * i)1962 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1963 {
1964    uint32_t prim = i->src(0).get()->reg.data.u32;
1965 
1966    code[0] = 0x00000002 | ((prim & 0xff) << 23);
1967    code[1] = 0x7f800000;
1968 
1969    emitPredicate(i);
1970 
1971    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1972 
1973    defId(i->def(0), 2);
1974    srcId(i, src1, 10);
1975 }
1976 
1977 void
emitVFETCH(const Instruction * i)1978 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1979 {
1980    unsigned int size = typeSizeof(i->dType);
1981    uint32_t offset = i->src(0).get()->reg.data.offset;
1982 
1983    code[0] = 0x00000002 | (offset << 23);
1984    code[1] = 0x7ec00000 | (offset >> 9);
1985    code[1] |= (size / 4 - 1) << 18;
1986 
1987    if (i->perPatch)
1988       code[1] |= 0x4;
1989    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1990       code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
1991 
1992    emitPredicate(i);
1993 
1994    defId(i->def(0), 2);
1995    srcId(i->src(0).getIndirect(0), 10);
1996    srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
1997 }
1998 
1999 void
emitEXPORT(const Instruction * i)2000 CodeEmitterGK110::emitEXPORT(const Instruction *i)
2001 {
2002    unsigned int size = typeSizeof(i->dType);
2003    uint32_t offset = i->src(0).get()->reg.data.offset;
2004 
2005    code[0] = 0x00000002 | (offset << 23);
2006    code[1] = 0x7f000000 | (offset >> 9);
2007    code[1] |= (size / 4 - 1) << 18;
2008 
2009    if (i->perPatch)
2010       code[1] |= 0x4;
2011 
2012    emitPredicate(i);
2013 
2014    assert(i->src(1).getFile() == FILE_GPR);
2015 
2016    srcId(i->src(0).getIndirect(0), 10);
2017    srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
2018    srcId(i->src(1), 2);
2019 }
2020 
2021 void
emitOUT(const Instruction * i)2022 CodeEmitterGK110::emitOUT(const Instruction *i)
2023 {
2024    assert(i->src(0).getFile() == FILE_GPR);
2025 
2026    emitForm_21(i, 0x1f0, 0xb70);
2027 
2028    if (i->op == OP_EMIT)
2029       code[1] |= 1 << 10;
2030    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
2031       code[1] |= 1 << 11;
2032 }
2033 
2034 void
emitInterpMode(const Instruction * i)2035 CodeEmitterGK110::emitInterpMode(const Instruction *i)
2036 {
2037    code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
2038    code[1] |= (i->ipa & 0xc) << (19 - 2);
2039 }
2040 
2041 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2042 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2043 {
2044    int ipa = entry->ipa;
2045    int reg = entry->reg;
2046    int loc = entry->loc;
2047 
2048    if (data.flatshade &&
2049        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2050       ipa = NV50_IR_INTERP_FLAT;
2051       reg = 0xff;
2052    } else if (data.force_persample_interp &&
2053               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2054               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2055       ipa |= NV50_IR_INTERP_CENTROID;
2056    }
2057    code[loc + 1] &= ~(0xf << 19);
2058    code[loc + 1] |= (ipa & 0x3) << 21;
2059    code[loc + 1] |= (ipa & 0xc) << (19 - 2);
2060    code[loc + 0] &= ~(0xff << 23);
2061    code[loc + 0] |= reg << 23;
2062 }
2063 
2064 void
emitINTERP(const Instruction * i)2065 CodeEmitterGK110::emitINTERP(const Instruction *i)
2066 {
2067    const uint32_t base = i->getSrc(0)->reg.data.offset;
2068 
2069    code[0] = 0x00000002 | (base << 31);
2070    code[1] = 0x74800000 | (base >> 1);
2071 
2072    if (i->saturate)
2073       code[1] |= 1 << 18;
2074 
2075    if (i->op == OP_PINTERP) {
2076       srcId(i->src(1), 23);
2077       addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
2078    } else {
2079       code[0] |= 0xff << 23;
2080       addInterp(i->ipa, 0xff, interpApply);
2081    }
2082 
2083    srcId(i->src(0).getIndirect(0), 10);
2084    emitInterpMode(i);
2085 
2086    emitPredicate(i);
2087    defId(i->def(0), 2);
2088 
2089    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
2090       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
2091    else
2092       code[1] |= 0xff << 10;
2093 }
2094 
2095 void
emitLoadStoreType(DataType ty,const int pos)2096 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
2097 {
2098    uint8_t n;
2099 
2100    switch (ty) {
2101    case TYPE_U8:
2102       n = 0;
2103       break;
2104    case TYPE_S8:
2105       n = 1;
2106       break;
2107    case TYPE_U16:
2108       n = 2;
2109       break;
2110    case TYPE_S16:
2111       n = 3;
2112       break;
2113    case TYPE_F32:
2114    case TYPE_U32:
2115    case TYPE_S32:
2116       n = 4;
2117       break;
2118    case TYPE_F64:
2119    case TYPE_U64:
2120    case TYPE_S64:
2121       n = 5;
2122       break;
2123    case TYPE_B128:
2124       n = 6;
2125       break;
2126    default:
2127       n = 0;
2128       assert(!"invalid ld/st type");
2129       break;
2130    }
2131    code[pos / 32] |= n << (pos % 32);
2132 }
2133 
2134 void
emitCachingMode(CacheMode c,const int pos)2135 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
2136 {
2137    uint8_t n;
2138 
2139    switch (c) {
2140    case CACHE_CA:
2141 // case CACHE_WB:
2142       n = 0;
2143       break;
2144    case CACHE_CG:
2145       n = 1;
2146       break;
2147    case CACHE_CS:
2148       n = 2;
2149       break;
2150    case CACHE_CV:
2151 // case CACHE_WT:
2152       n = 3;
2153       break;
2154    default:
2155       n = 0;
2156       assert(!"invalid caching mode");
2157       break;
2158    }
2159    code[pos / 32] |= n << (pos % 32);
2160 }
2161 
2162 void
emitSTORE(const Instruction * i)2163 CodeEmitterGK110::emitSTORE(const Instruction *i)
2164 {
2165    int32_t offset = SDATA(i->src(0)).offset;
2166 
2167    switch (i->src(0).getFile()) {
2168    case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
2169    case FILE_MEMORY_LOCAL:  code[1] = 0x7a800000; code[0] = 0x00000002; break;
2170    case FILE_MEMORY_SHARED:
2171       code[0] = 0x00000002;
2172       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
2173          code[1] = 0x78400000;
2174       else
2175          code[1] = 0x7ac00000;
2176       break;
2177    default:
2178       assert(!"invalid memory file");
2179       break;
2180    }
2181 
2182    if (code[0] & 0x2) {
2183       offset &= 0xffffff;
2184       emitLoadStoreType(i->dType, 0x33);
2185       if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2186          emitCachingMode(i->cache, 0x2f);
2187    } else {
2188       emitLoadStoreType(i->dType, 0x38);
2189       emitCachingMode(i->cache, 0x3b);
2190    }
2191    code[0] |= offset << 23;
2192    code[1] |= offset >> 9;
2193 
2194    // Unlocked store on shared memory can fail.
2195    if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2196        i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
2197       assert(i->defExists(0));
2198       defId(i->def(0), 32 + 16);
2199    }
2200 
2201    emitPredicate(i);
2202 
2203    srcId(i->src(1), 2);
2204    srcId(i->src(0).getIndirect(0), 10);
2205    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2206        i->src(0).isIndirect(0) &&
2207        i->getIndirect(0, 0)->reg.size == 8)
2208       code[1] |= 1 << 23;
2209 }
2210 
2211 void
emitLOAD(const Instruction * i)2212 CodeEmitterGK110::emitLOAD(const Instruction *i)
2213 {
2214    int32_t offset = SDATA(i->src(0)).offset;
2215 
2216    switch (i->src(0).getFile()) {
2217    case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
2218    case FILE_MEMORY_LOCAL:  code[1] = 0x7a000000; code[0] = 0x00000002; break;
2219    case FILE_MEMORY_SHARED:
2220       code[0] = 0x00000002;
2221       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
2222          code[1] = 0x77400000;
2223       else
2224          code[1] = 0x7a400000;
2225       break;
2226    case FILE_MEMORY_CONST:
2227       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
2228          emitMOV(i);
2229          return;
2230       }
2231       offset &= 0xffff;
2232       code[0] = 0x00000002;
2233       code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
2234       code[1] |= i->subOp << 15;
2235       break;
2236    default:
2237       assert(!"invalid memory file");
2238       break;
2239    }
2240 
2241    if (code[0] & 0x2) {
2242       offset &= 0xffffff;
2243       emitLoadStoreType(i->dType, 0x33);
2244       if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2245          emitCachingMode(i->cache, 0x2f);
2246    } else {
2247       emitLoadStoreType(i->dType, 0x38);
2248       emitCachingMode(i->cache, 0x3b);
2249    }
2250    code[0] |= offset << 23;
2251    code[1] |= offset >> 9;
2252 
2253    // Locked store on shared memory can fail.
2254    int r = 0, p = -1;
2255    if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2256        i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
2257       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2258          r = -1;
2259          p = 0;
2260       } else if (i->defExists(1)) { // r, p
2261          p = 1;
2262       } else {
2263          assert(!"Expected predicate dest for load locked");
2264       }
2265    }
2266 
2267    emitPredicate(i);
2268 
2269    if (r >= 0)
2270       defId(i->def(r), 2);
2271    else
2272       code[0] |= 255 << 2;
2273 
2274    if (p >= 0)
2275       defId(i->def(p), 32 + 16);
2276 
2277    if (i->getIndirect(0, 0)) {
2278       srcId(i->src(0).getIndirect(0), 10);
2279       if (i->getIndirect(0, 0)->reg.size == 8)
2280          code[1] |= 1 << 23;
2281    } else {
2282       code[0] |= 255 << 10;
2283    }
2284 }
2285 
2286 uint8_t
getSRegEncoding(const ValueRef & ref)2287 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
2288 {
2289    switch (SDATA(ref).sv.sv) {
2290    case SV_LANEID:        return 0x00;
2291    case SV_PHYSID:        return 0x03;
2292    case SV_VERTEX_COUNT:  return 0x10;
2293    case SV_INVOCATION_ID: return 0x11;
2294    case SV_YDIR:          return 0x12;
2295    case SV_THREAD_KILL:   return 0x13;
2296    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
2297    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
2298    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
2299    case SV_GRIDID:        return 0x2c;
2300    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
2301    case SV_LBASE:         return 0x34;
2302    case SV_SBASE:         return 0x30;
2303    case SV_LANEMASK_EQ:   return 0x38;
2304    case SV_LANEMASK_LT:   return 0x39;
2305    case SV_LANEMASK_LE:   return 0x3a;
2306    case SV_LANEMASK_GT:   return 0x3b;
2307    case SV_LANEMASK_GE:   return 0x3c;
2308    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
2309    default:
2310       assert(!"no sreg for system value");
2311       return 0;
2312    }
2313 }
2314 
2315 void
emitMOV(const Instruction * i)2316 CodeEmitterGK110::emitMOV(const Instruction *i)
2317 {
2318    if (i->def(0).getFile() == FILE_PREDICATE) {
2319       if (i->src(0).getFile() == FILE_GPR) {
2320          // Use ISETP.NE.AND dst, PT, src, RZ, PT
2321          code[0] = 0x00000002;
2322          code[1] = 0xdb500000;
2323 
2324          code[0] |= 0x7 << 2;
2325          code[0] |= 0xff << 23;
2326          code[1] |= 0x7 << 10;
2327          srcId(i->src(0), 10);
2328       } else
2329       if (i->src(0).getFile() == FILE_PREDICATE) {
2330          // Use PSETP.AND.AND dst, PT, src, PT, PT
2331          code[0] = 0x00000002;
2332          code[1] = 0x84800000;
2333 
2334          code[0] |= 0x7 << 2;
2335          code[1] |= 0x7 << 0;
2336          code[1] |= 0x7 << 10;
2337 
2338          srcId(i->src(0), 14);
2339       } else {
2340          assert(!"Unexpected source for predicate destination");
2341          emitNOP(i);
2342       }
2343       emitPredicate(i);
2344       defId(i->def(0), 5);
2345    } else
2346    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2347       code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
2348       code[1] = 0x86400000;
2349       emitPredicate(i);
2350       defId(i->def(0), 2);
2351    } else
2352    if (i->src(0).getFile() == FILE_IMMEDIATE) {
2353       code[0] = 0x00000002 | (i->lanes << 14);
2354       code[1] = 0x74000000;
2355       emitPredicate(i);
2356       defId(i->def(0), 2);
2357       setImmediate32(i, 0, Modifier(0));
2358    } else
2359    if (i->src(0).getFile() == FILE_PREDICATE) {
2360       code[0] = 0x00000002;
2361       code[1] = 0x84401c07;
2362       emitPredicate(i);
2363       defId(i->def(0), 2);
2364       srcId(i->src(0), 14);
2365    } else {
2366       emitForm_C(i, 0x24c, 2);
2367       code[1] |= i->lanes << 10;
2368    }
2369 }
2370 
2371 static inline bool
uses64bitAddress(const Instruction * ldst)2372 uses64bitAddress(const Instruction *ldst)
2373 {
2374    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2375       ldst->src(0).isIndirect(0) &&
2376       ldst->getIndirect(0, 0)->reg.size == 8;
2377 }
2378 
2379 void
emitATOM(const Instruction * i)2380 CodeEmitterGK110::emitATOM(const Instruction *i)
2381 {
2382    const bool hasDst = i->defExists(0);
2383    const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
2384 
2385    code[0] = 0x00000002;
2386    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
2387       code[1] = 0x77800000;
2388    else
2389       code[1] = 0x68000000;
2390 
2391    switch (i->subOp) {
2392    case NV50_IR_SUBOP_ATOM_CAS: break;
2393    case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
2394    default: code[1] |= i->subOp << 23; break;
2395    }
2396 
2397    switch (i->dType) {
2398    case TYPE_U32: break;
2399    case TYPE_S32: code[1] |= 0x00100000; break;
2400    case TYPE_U64: code[1] |= 0x00200000; break;
2401    case TYPE_F32: code[1] |= 0x00300000; break;
2402    case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
2403    case TYPE_S64: code[1] |= 0x00500000; break;
2404    default: assert(!"unsupported type"); break;
2405    }
2406 
2407    emitPredicate(i);
2408 
2409    /* TODO: cas: check that src regs line up */
2410    /* TODO: cas: flip bits if $r255 is used */
2411    srcId(i->src(1), 23);
2412 
2413    if (hasDst) {
2414       defId(i->def(0), 2);
2415    } else
2416    if (!exch) {
2417       code[0] |= 255 << 2;
2418    }
2419 
2420    if (hasDst || !exch) {
2421       const int32_t offset = SDATA(i->src(0)).offset;
2422       assert(offset < 0x80000 && offset >= -0x80000);
2423       code[0] |= (offset & 1) << 31;
2424       code[1] |= (offset & 0xffffe) >> 1;
2425    } else {
2426       srcAddr32(i->src(0), 31);
2427    }
2428 
2429    if (i->getIndirect(0, 0)) {
2430       srcId(i->getIndirect(0, 0), 10);
2431       if (i->getIndirect(0, 0)->reg.size == 8)
2432          code[1] |= 1 << 19;
2433    } else {
2434       code[0] |= 255 << 10;
2435    }
2436 }
2437 
2438 void
emitCCTL(const Instruction * i)2439 CodeEmitterGK110::emitCCTL(const Instruction *i)
2440 {
2441    int32_t offset = SDATA(i->src(0)).offset;
2442 
2443    code[0] = 0x00000002 | (i->subOp << 2);
2444 
2445    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2446       code[1] = 0x7b000000;
2447    } else {
2448       code[1] = 0x7c000000;
2449       offset &= 0xffffff;
2450    }
2451    code[0] |= offset << 23;
2452    code[1] |= offset >> 9;
2453 
2454    if (uses64bitAddress(i))
2455       code[1] |= 1 << 23;
2456    srcId(i->src(0).getIndirect(0), 10);
2457 
2458    emitPredicate(i);
2459 }
2460 
2461 bool
emitInstruction(Instruction * insn)2462 CodeEmitterGK110::emitInstruction(Instruction *insn)
2463 {
2464    const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
2465 
2466    if (insn->encSize != 8) {
2467       ERROR("skipping unencodable instruction: ");
2468       insn->print();
2469       return false;
2470    } else
2471    if (codeSize + size > codeSizeLimit) {
2472       ERROR("code emitter output buffer too small\n");
2473       return false;
2474    }
2475 
2476    if (writeIssueDelays) {
2477       int id = (codeSize & 0x3f) / 8 - 1;
2478       if (id < 0) {
2479          id += 1;
2480          code[0] = 0x00000000; // cf issue delay "instruction"
2481          code[1] = 0x08000000;
2482          code += 2;
2483          codeSize += 8;
2484       }
2485       uint32_t *data = code - (id * 2 + 2);
2486 
2487       switch (id) {
2488       case 0: data[0] |= insn->sched << 2; break;
2489       case 1: data[0] |= insn->sched << 10; break;
2490       case 2: data[0] |= insn->sched << 18; break;
2491       case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
2492       case 4: data[1] |= insn->sched << 2; break;
2493       case 5: data[1] |= insn->sched << 10; break;
2494       case 6: data[1] |= insn->sched << 18; break;
2495       default:
2496          assert(0);
2497          break;
2498       }
2499    }
2500 
2501    // assert that instructions with multiple defs don't corrupt registers
2502    for (int d = 0; insn->defExists(d); ++d)
2503       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2504 
2505    switch (insn->op) {
2506    case OP_MOV:
2507    case OP_RDSV:
2508       emitMOV(insn);
2509       break;
2510    case OP_NOP:
2511       break;
2512    case OP_LOAD:
2513       emitLOAD(insn);
2514       break;
2515    case OP_STORE:
2516       emitSTORE(insn);
2517       break;
2518    case OP_LINTERP:
2519    case OP_PINTERP:
2520       emitINTERP(insn);
2521       break;
2522    case OP_VFETCH:
2523       emitVFETCH(insn);
2524       break;
2525    case OP_EXPORT:
2526       emitEXPORT(insn);
2527       break;
2528    case OP_AFETCH:
2529       emitAFETCH(insn);
2530       break;
2531    case OP_PFETCH:
2532       emitPFETCH(insn);
2533       break;
2534    case OP_EMIT:
2535    case OP_RESTART:
2536       emitOUT(insn);
2537       break;
2538    case OP_ADD:
2539    case OP_SUB:
2540       if (insn->dType == TYPE_F64)
2541          emitDADD(insn);
2542       else if (isFloatType(insn->dType))
2543          emitFADD(insn);
2544       else
2545          emitUADD(insn);
2546       break;
2547    case OP_MUL:
2548       if (insn->dType == TYPE_F64)
2549          emitDMUL(insn);
2550       else if (isFloatType(insn->dType))
2551          emitFMUL(insn);
2552       else
2553          emitIMUL(insn);
2554       break;
2555    case OP_MAD:
2556    case OP_FMA:
2557       if (insn->dType == TYPE_F64)
2558          emitDMAD(insn);
2559       else if (isFloatType(insn->dType))
2560          emitFMAD(insn);
2561       else
2562          emitIMAD(insn);
2563       break;
2564    case OP_MADSP:
2565       emitMADSP(insn);
2566       break;
2567    case OP_SAD:
2568       emitISAD(insn);
2569       break;
2570    case OP_SHLADD:
2571       emitSHLADD(insn);
2572       break;
2573    case OP_NOT:
2574       emitNOT(insn);
2575       break;
2576    case OP_AND:
2577       emitLogicOp(insn, 0);
2578       break;
2579    case OP_OR:
2580       emitLogicOp(insn, 1);
2581       break;
2582    case OP_XOR:
2583       emitLogicOp(insn, 2);
2584       break;
2585    case OP_SHL:
2586    case OP_SHR:
2587       if (typeSizeof(insn->sType) == 8)
2588          emitShift64(insn);
2589       else
2590          emitShift(insn);
2591       break;
2592    case OP_SET:
2593    case OP_SET_AND:
2594    case OP_SET_OR:
2595    case OP_SET_XOR:
2596       emitSET(insn->asCmp());
2597       break;
2598    case OP_SELP:
2599       emitSELP(insn);
2600       break;
2601    case OP_SLCT:
2602       emitSLCT(insn->asCmp());
2603       break;
2604    case OP_MIN:
2605    case OP_MAX:
2606       emitMINMAX(insn);
2607       break;
2608    case OP_ABS:
2609    case OP_NEG:
2610    case OP_CEIL:
2611    case OP_FLOOR:
2612    case OP_TRUNC:
2613    case OP_SAT:
2614       emitCVT(insn);
2615       break;
2616    case OP_CVT:
2617       if (insn->def(0).getFile() == FILE_PREDICATE ||
2618           insn->src(0).getFile() == FILE_PREDICATE)
2619          emitMOV(insn);
2620       else
2621          emitCVT(insn);
2622       break;
2623    case OP_RSQ:
2624       emitSFnOp(insn, 5 + 2 * insn->subOp);
2625       break;
2626    case OP_RCP:
2627       emitSFnOp(insn, 4 + 2 * insn->subOp);
2628       break;
2629    case OP_LG2:
2630       emitSFnOp(insn, 3);
2631       break;
2632    case OP_EX2:
2633       emitSFnOp(insn, 2);
2634       break;
2635    case OP_SIN:
2636       emitSFnOp(insn, 1);
2637       break;
2638    case OP_COS:
2639       emitSFnOp(insn, 0);
2640       break;
2641    case OP_PRESIN:
2642    case OP_PREEX2:
2643       emitPreOp(insn);
2644       break;
2645    case OP_TEX:
2646    case OP_TXB:
2647    case OP_TXL:
2648    case OP_TXD:
2649    case OP_TXF:
2650    case OP_TXG:
2651    case OP_TXLQ:
2652       emitTEX(insn->asTex());
2653       break;
2654    case OP_TXQ:
2655       emitTXQ(insn->asTex());
2656       break;
2657    case OP_TEXBAR:
2658       emitTEXBAR(insn);
2659       break;
2660    case OP_PIXLD:
2661       emitPIXLD(insn);
2662       break;
2663    case OP_BRA:
2664    case OP_CALL:
2665    case OP_PRERET:
2666    case OP_RET:
2667    case OP_DISCARD:
2668    case OP_EXIT:
2669    case OP_PRECONT:
2670    case OP_CONT:
2671    case OP_PREBREAK:
2672    case OP_BREAK:
2673    case OP_JOINAT:
2674    case OP_BRKPT:
2675    case OP_QUADON:
2676    case OP_QUADPOP:
2677       emitFlow(insn);
2678       break;
2679    case OP_QUADOP:
2680       emitQUADOP(insn, insn->subOp, insn->lanes);
2681       break;
2682    case OP_DFDX:
2683       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2684       break;
2685    case OP_DFDY:
2686       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2687       break;
2688    case OP_POPCNT:
2689       emitPOPC(insn);
2690       break;
2691    case OP_INSBF:
2692       emitINSBF(insn);
2693       break;
2694    case OP_EXTBF:
2695       emitEXTBF(insn);
2696       break;
2697    case OP_BFIND:
2698       emitBFIND(insn);
2699       break;
2700    case OP_PERMT:
2701       emitPERMT(insn);
2702       break;
2703    case OP_JOIN:
2704       emitNOP(insn);
2705       insn->join = 1;
2706       break;
2707    case OP_BAR:
2708       emitBAR(insn);
2709       break;
2710    case OP_MEMBAR:
2711       emitMEMBAR(insn);
2712       break;
2713    case OP_ATOM:
2714       emitATOM(insn);
2715       break;
2716    case OP_CCTL:
2717       emitCCTL(insn);
2718       break;
2719    case OP_SHFL:
2720       emitSHFL(insn);
2721       break;
2722    case OP_VOTE:
2723       emitVOTE(insn);
2724       break;
2725    case OP_SULDB:
2726       emitSULDGB(insn->asTex());
2727       break;
2728    case OP_SUSTB:
2729    case OP_SUSTP:
2730       emitSUSTGx(insn->asTex());
2731       break;
2732    case OP_SUBFM:
2733    case OP_SUCLAMP:
2734    case OP_SUEAU:
2735       emitSUCalc(insn);
2736       break;
2737    case OP_VSHL:
2738       emitVSHL(insn);
2739       break;
2740    case OP_PHI:
2741    case OP_UNION:
2742    case OP_CONSTRAINT:
2743       ERROR("operation should have been eliminated");
2744       return false;
2745    case OP_EXP:
2746    case OP_LOG:
2747    case OP_SQRT:
2748    case OP_POW:
2749       ERROR("operation should have been lowered\n");
2750       return false;
2751    default:
2752       ERROR("unknown op: %u\n", insn->op);
2753       return false;
2754    }
2755 
2756    if (insn->join)
2757       code[0] |= 1 << 22;
2758 
2759    code += 2;
2760    codeSize += 8;
2761    return true;
2762 }
2763 
2764 uint32_t
getMinEncodingSize(const Instruction * i) const2765 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
2766 {
2767    // No more short instruction encodings.
2768    return 8;
2769 }
2770 
2771 void
prepareEmission(Function * func)2772 CodeEmitterGK110::prepareEmission(Function *func)
2773 {
2774    const Target *targ = func->getProgram()->getTarget();
2775 
2776    CodeEmitter::prepareEmission(func);
2777 
2778    if (targ->hasSWSched)
2779       calculateSchedDataNVC0(targ, func);
2780 }
2781 
CodeEmitterGK110(const TargetNVC0 * target)2782 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
2783    : CodeEmitter(target),
2784      targNVC0(target),
2785      writeIssueDelays(target->hasSWSched)
2786 {
2787    code = NULL;
2788    codeSize = codeSizeLimit = 0;
2789    relocInfo = NULL;
2790 }
2791 
2792 CodeEmitter *
createCodeEmitterGK110(Program::Type type)2793 TargetNVC0::createCodeEmitterGK110(Program::Type type)
2794 {
2795    CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
2796    emit->setProgramType(type);
2797    return emit;
2798 }
2799 
2800 } // namespace nv50_ir
2801