• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs <bskeggs@redhat.com>
23  */
24 
25 #include "codegen/nv50_ir_target_gm107.h"
26 #include "codegen/nv50_ir_sched_gm107.h"
27 
28 //#define GM107_DEBUG_SCHED_DATA
29 
30 namespace nv50_ir {
31 
32 class CodeEmitterGM107 : public CodeEmitter
33 {
34 public:
35    CodeEmitterGM107(const TargetGM107 *);
36 
37    virtual bool emitInstruction(Instruction *);
38    virtual uint32_t getMinEncodingSize(const Instruction *) const;
39 
40    virtual void prepareEmission(Program *);
41    virtual void prepareEmission(Function *);
42 
setProgramType(Program::Type pType)43    inline void setProgramType(Program::Type pType) { progType = pType; }
44 
45 private:
46    const TargetGM107 *targGM107;
47 
48    Program::Type progType;
49 
50    const Instruction *insn;
51    const bool writeIssueDelays;
52    uint32_t *data;
53 
54 private:
55    inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)56    inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
57 
58    inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)59    inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60    inline void emitPred();
61    inline void emitGPR(int, const Value *);
emitGPR(int pos)62    inline void emitGPR(int pos) {
63       emitGPR(pos, (const Value *)NULL);
64    }
emitGPR(int pos,const ValueRef & ref)65    inline void emitGPR(int pos, const ValueRef &ref) {
66       emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
67    }
emitGPR(int pos,const ValueRef * ref)68    inline void emitGPR(int pos, const ValueRef *ref) {
69       emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
70    }
emitGPR(int pos,const ValueDef & def)71    inline void emitGPR(int pos, const ValueDef &def) {
72       emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
73    }
74    inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)75    inline void emitSYS(int pos, const ValueRef &ref) {
76       emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
77    }
78    inline void emitPRED(int, const Value *);
emitPRED(int pos)79    inline void emitPRED(int pos) {
80       emitPRED(pos, (const Value *)NULL);
81    }
emitPRED(int pos,const ValueRef & ref)82    inline void emitPRED(int pos, const ValueRef &ref) {
83       emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
84    }
emitPRED(int pos,const ValueDef & def)85    inline void emitPRED(int pos, const ValueDef &def) {
86       emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
87    }
88    inline void emitADDR(int, int, int, int, const ValueRef &);
89    inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90    inline bool longIMMD(const ValueRef &);
91    inline void emitIMMD(int, int, const ValueRef &);
92 
93    void emitCond3(int, CondCode);
94    void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)95    void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96    inline void emitO(int);
97    inline void emitP(int);
98    inline void emitSAT(int);
99    inline void emitCC(int);
100    inline void emitX(int);
101    inline void emitABS(int, const ValueRef &);
102    inline void emitNEG(int, const ValueRef &);
103    inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104    inline void emitFMZ(int, int);
105    inline void emitRND(int, RoundMode, int);
emitRND(int pos)106    inline void emitRND(int pos) {
107       emitRND(pos, insn->rnd, -1);
108    }
109    inline void emitPDIV(int);
110    inline void emitINV(int, const ValueRef &);
111 
112    void emitEXIT();
113    void emitBRA();
114    void emitCAL();
115    void emitPCNT();
116    void emitCONT();
117    void emitPBK();
118    void emitBRK();
119    void emitPRET();
120    void emitRET();
121    void emitSSY();
122    void emitSYNC();
123    void emitSAM();
124    void emitRAM();
125 
126    void emitPSETP();
127 
128    void emitMOV();
129    void emitS2R();
130    void emitCS2R();
131    void emitF2F();
132    void emitF2I();
133    void emitI2F();
134    void emitI2I();
135    void emitSEL();
136    void emitSHFL();
137 
138    void emitDADD();
139    void emitDMUL();
140    void emitDFMA();
141    void emitDMNMX();
142    void emitDSET();
143    void emitDSETP();
144 
145    void emitFADD();
146    void emitFMUL();
147    void emitFFMA();
148    void emitMUFU();
149    void emitFMNMX();
150    void emitRRO();
151    void emitFCMP();
152    void emitFSET();
153    void emitFSETP();
154    void emitFSWZADD();
155 
156    void emitLOP();
157    void emitNOT();
158    void emitIADD();
159    void emitIMUL();
160    void emitIMAD();
161    void emitISCADD();
162    void emitXMAD();
163    void emitIMNMX();
164    void emitICMP();
165    void emitISET();
166    void emitISETP();
167    void emitSHL();
168    void emitSHR();
169    void emitSHF();
170    void emitPOPC();
171    void emitBFI();
172    void emitBFE();
173    void emitFLO();
174    void emitPRMT();
175 
176    void emitLDSTs(int, DataType);
177    void emitLDSTc(int);
178    void emitLDC();
179    void emitLDL();
180    void emitLDS();
181    void emitLD();
182    void emitSTL();
183    void emitSTS();
184    void emitST();
185    void emitALD();
186    void emitAST();
187    void emitISBERD();
188    void emitAL2P();
189    void emitIPA();
190    void emitATOM();
191    void emitATOMS();
192    void emitRED();
193    void emitCCTL();
194 
195    void emitPIXLD();
196 
197    void emitTEXs(int);
198    void emitTEX();
199    void emitTEXS();
200    void emitTLD();
201    void emitTLD4();
202    void emitTXD();
203    void emitTXQ();
204    void emitTMML();
205    void emitDEPBAR();
206 
207    void emitNOP();
208    void emitKIL();
209    void emitOUT();
210 
211    void emitBAR();
212    void emitMEMBAR();
213 
214    void emitVOTE();
215 
216    void emitSUTarget();
217    void emitSUHandle(const int s);
218    void emitSUSTx();
219    void emitSULDx();
220    void emitSUREDx();
221 };
222 
223 /*******************************************************************************
224  * general instruction layout/fields
225  ******************************************************************************/
226 
227 void
emitField(uint32_t * data,int b,int s,uint32_t v)228 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
229 {
230    if (b >= 0) {
231       uint32_t m = ((1ULL << s) - 1);
232       uint64_t d = (uint64_t)(v & m) << b;
233       assert(!(v & ~m) || (v & ~m) == ~m);
234       data[1] |= d >> 32;
235       data[0] |= d;
236    }
237 }
238 
239 void
emitPred()240 CodeEmitterGM107::emitPred()
241 {
242    if (insn->predSrc >= 0) {
243       emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244       emitField(19, 1, insn->cc == CC_NOT_P);
245    } else {
246       emitField(16, 3, 7);
247    }
248 }
249 
250 void
emitInsn(uint32_t hi,bool pred)251 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
252 {
253    code[0] = 0x00000000;
254    code[1] = hi;
255    if (pred)
256       emitPred();
257 }
258 
259 void
emitGPR(int pos,const Value * val)260 CodeEmitterGM107::emitGPR(int pos, const Value *val)
261 {
262    emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263              val->reg.data.id : 255);
264 }
265 
266 void
emitSYS(int pos,const Value * val)267 CodeEmitterGM107::emitSYS(int pos, const Value *val)
268 {
269    int id = val ? val->reg.data.id : -1;
270 
271    switch (id) {
272    case SV_LANEID         : id = 0x00; break;
273    case SV_VERTEX_COUNT   : id = 0x10; break;
274    case SV_INVOCATION_ID  : id = 0x11; break;
275    case SV_THREAD_KILL    : id = 0x13; break;
276    case SV_INVOCATION_INFO: id = 0x1d; break;
277    case SV_COMBINED_TID   : id = 0x20; break;
278    case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
279    case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
280    case SV_LANEMASK_EQ    : id = 0x38; break;
281    case SV_LANEMASK_LT    : id = 0x39; break;
282    case SV_LANEMASK_LE    : id = 0x3a; break;
283    case SV_LANEMASK_GT    : id = 0x3b; break;
284    case SV_LANEMASK_GE    : id = 0x3c; break;
285    case SV_CLOCK          : id = 0x50 + val->reg.data.sv.index; break;
286    default:
287       assert(!"invalid system value");
288       id = 0;
289       break;
290    }
291 
292    emitField(pos, 8, id);
293 }
294 
295 void
emitPRED(int pos,const Value * val)296 CodeEmitterGM107::emitPRED(int pos, const Value *val)
297 {
298    emitField(pos, 3, val ? val->reg.data.id : 7);
299 }
300 
301 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)302 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
303                            const ValueRef &ref)
304 {
305    const Value *v = ref.get();
306    assert(!(v->reg.data.offset & ((1 << shr) - 1)));
307    if (gpr >= 0)
308       emitGPR(gpr, ref.getIndirect(0));
309    emitField(off, len, v->reg.data.offset >> shr);
310 }
311 
312 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)313 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
314                            const ValueRef &ref)
315 {
316    const Value *v = ref.get();
317    const Symbol *s = v->asSym();
318 
319    assert(!(s->reg.data.offset & ((1 << shr) - 1)));
320 
321    emitField(buf,  5, v->reg.fileIndex);
322    if (gpr >= 0)
323       emitGPR(gpr, ref.getIndirect(0));
324    emitField(off, 16, s->reg.data.offset >> shr);
325 }
326 
327 bool
longIMMD(const ValueRef & ref)328 CodeEmitterGM107::longIMMD(const ValueRef &ref)
329 {
330    if (ref.getFile() == FILE_IMMEDIATE) {
331       const ImmediateValue *imm = ref.get()->asImm();
332       if (isFloatType(insn->sType))
333          return imm->reg.data.u32 & 0xfff;
334       else
335          return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
336    }
337    return false;
338 }
339 
340 void
emitIMMD(int pos,int len,const ValueRef & ref)341 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
342 {
343    const ImmediateValue *imm = ref.get()->asImm();
344    uint32_t val = imm->reg.data.u32;
345 
346    if (len == 19) {
347       if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348          assert(!(val & 0x00000fff));
349          val >>= 12;
350       } else if (insn->sType == TYPE_F64) {
351          assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352          val = imm->reg.data.u64 >> 44;
353       } else {
354          assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
355       }
356       emitField( 56,   1, (val & 0x80000) >> 19);
357       emitField(pos, len, (val & 0x7ffff));
358    } else {
359       emitField(pos, len, val);
360    }
361 }
362 
363 /*******************************************************************************
364  * modifiers
365  ******************************************************************************/
366 
367 void
emitCond3(int pos,CondCode code)368 CodeEmitterGM107::emitCond3(int pos, CondCode code)
369 {
370    int data = 0;
371 
372    switch (code) {
373    case CC_FL : data = 0x00; break;
374    case CC_LTU:
375    case CC_LT : data = 0x01; break;
376    case CC_EQU:
377    case CC_EQ : data = 0x02; break;
378    case CC_LEU:
379    case CC_LE : data = 0x03; break;
380    case CC_GTU:
381    case CC_GT : data = 0x04; break;
382    case CC_NEU:
383    case CC_NE : data = 0x05; break;
384    case CC_GEU:
385    case CC_GE : data = 0x06; break;
386    case CC_TR : data = 0x07; break;
387    default:
388       assert(!"invalid cond3");
389       break;
390    }
391 
392    emitField(pos, 3, data);
393 }
394 
395 void
emitCond4(int pos,CondCode code)396 CodeEmitterGM107::emitCond4(int pos, CondCode code)
397 {
398    int data = 0;
399 
400    switch (code) {
401    case CC_FL: data = 0x00; break;
402    case CC_LT: data = 0x01; break;
403    case CC_EQ: data = 0x02; break;
404    case CC_LE: data = 0x03; break;
405    case CC_GT: data = 0x04; break;
406    case CC_NE: data = 0x05; break;
407    case CC_GE: data = 0x06; break;
408 //   case CC_NUM: data = 0x07; break;
409 //   case CC_NAN: data = 0x08; break;
410    case CC_LTU: data = 0x09; break;
411    case CC_EQU: data = 0x0a; break;
412    case CC_LEU: data = 0x0b; break;
413    case CC_GTU: data = 0x0c; break;
414    case CC_NEU: data = 0x0d; break;
415    case CC_GEU: data = 0x0e; break;
416    case CC_TR:  data = 0x0f; break;
417    default:
418       assert(!"invalid cond4");
419       break;
420    }
421 
422    emitField(pos, 4, data);
423 }
424 
425 void
emitO(int pos)426 CodeEmitterGM107::emitO(int pos)
427 {
428    emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
429 }
430 
431 void
emitP(int pos)432 CodeEmitterGM107::emitP(int pos)
433 {
434    emitField(pos, 1, insn->perPatch);
435 }
436 
437 void
emitSAT(int pos)438 CodeEmitterGM107::emitSAT(int pos)
439 {
440    emitField(pos, 1, insn->saturate);
441 }
442 
443 void
emitCC(int pos)444 CodeEmitterGM107::emitCC(int pos)
445 {
446    emitField(pos, 1, insn->flagsDef >= 0);
447 }
448 
449 void
emitX(int pos)450 CodeEmitterGM107::emitX(int pos)
451 {
452    emitField(pos, 1, insn->flagsSrc >= 0);
453 }
454 
455 void
emitABS(int pos,const ValueRef & ref)456 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
457 {
458    emitField(pos, 1, ref.mod.abs());
459 }
460 
461 void
emitNEG(int pos,const ValueRef & ref)462 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
463 {
464    emitField(pos, 1, ref.mod.neg());
465 }
466 
467 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)468 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
469 {
470    emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
471 }
472 
473 void
emitFMZ(int pos,int len)474 CodeEmitterGM107::emitFMZ(int pos, int len)
475 {
476    emitField(pos, len, insn->dnz << 1 | insn->ftz);
477 }
478 
479 void
emitRND(int rmp,RoundMode rnd,int rip)480 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
481 {
482    int rm = 0, ri = 0;
483    switch (rnd) {
484    case ROUND_NI: ri = 1;
485    case ROUND_N : rm = 0; break;
486    case ROUND_MI: ri = 1;
487    case ROUND_M : rm = 1; break;
488    case ROUND_PI: ri = 1;
489    case ROUND_P : rm = 2; break;
490    case ROUND_ZI: ri = 1;
491    case ROUND_Z : rm = 3; break;
492    default:
493       assert(!"invalid round mode");
494       break;
495    }
496    emitField(rip, 1, ri);
497    emitField(rmp, 2, rm);
498 }
499 
500 void
emitPDIV(int pos)501 CodeEmitterGM107::emitPDIV(int pos)
502 {
503    assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504    if (insn->postFactor > 0)
505       emitField(pos, 3, 7 - insn->postFactor);
506    else
507       emitField(pos, 3, 0 - insn->postFactor);
508 }
509 
510 void
emitINV(int pos,const ValueRef & ref)511 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
512 {
513    emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
514 }
515 
516 /*******************************************************************************
517  * control flow
518  ******************************************************************************/
519 
520 void
emitEXIT()521 CodeEmitterGM107::emitEXIT()
522 {
523    emitInsn (0xe3000000);
524    emitCond5(0x00, CC_TR);
525 }
526 
527 void
emitBRA()528 CodeEmitterGM107::emitBRA()
529 {
530    const FlowInstruction *insn = this->insn->asFlow();
531    int gpr = -1;
532 
533    if (insn->indirect) {
534       if (insn->absolute)
535          emitInsn(0xe2000000); // JMX
536       else
537          emitInsn(0xe2500000); // BRX
538       gpr = 0x08;
539    } else {
540       if (insn->absolute)
541          emitInsn(0xe2100000); // JMP
542       else
543          emitInsn(0xe2400000); // BRA
544       emitField(0x07, 1, insn->allWarp);
545    }
546 
547    emitField(0x06, 1, insn->limit);
548    emitCond5(0x00, CC_TR);
549 
550    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551       int32_t pos = insn->target.bb->binPos;
552       if (writeIssueDelays && !(pos & 0x1f))
553          pos += 8;
554       if (!insn->absolute)
555          emitField(0x14, 24, pos - (codeSize + 8));
556       else
557          emitField(0x14, 32, pos);
558    } else {
559       emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560       emitField(0x05, 1, 1);
561    }
562 }
563 
564 void
emitCAL()565 CodeEmitterGM107::emitCAL()
566 {
567    const FlowInstruction *insn = this->insn->asFlow();
568 
569    if (insn->absolute) {
570       emitInsn(0xe2200000, 0); // JCAL
571    } else {
572       emitInsn(0xe2600000, 0); // CAL
573    }
574 
575    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
576       if (!insn->absolute)
577          emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
578       else {
579          if (insn->builtin) {
580             int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581             addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000,  20);
582             addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
583          } else {
584             emitField(0x14, 32, insn->target.bb->binPos);
585          }
586       }
587    } else {
588       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589       emitField(0x05, 1, 1);
590    }
591 }
592 
593 void
emitPCNT()594 CodeEmitterGM107::emitPCNT()
595 {
596    const FlowInstruction *insn = this->insn->asFlow();
597 
598    emitInsn(0xe2b00000, 0);
599 
600    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
602    } else {
603       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604       emitField(0x05, 1, 1);
605    }
606 }
607 
608 void
emitCONT()609 CodeEmitterGM107::emitCONT()
610 {
611    emitInsn (0xe3500000);
612    emitCond5(0x00, CC_TR);
613 }
614 
615 void
emitPBK()616 CodeEmitterGM107::emitPBK()
617 {
618    const FlowInstruction *insn = this->insn->asFlow();
619 
620    emitInsn(0xe2a00000, 0);
621 
622    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
624    } else {
625       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626       emitField(0x05, 1, 1);
627    }
628 }
629 
630 void
emitBRK()631 CodeEmitterGM107::emitBRK()
632 {
633    emitInsn (0xe3400000);
634    emitCond5(0x00, CC_TR);
635 }
636 
637 void
emitPRET()638 CodeEmitterGM107::emitPRET()
639 {
640    const FlowInstruction *insn = this->insn->asFlow();
641 
642    emitInsn(0xe2700000, 0);
643 
644    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
646    } else {
647       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648       emitField(0x05, 1, 1);
649    }
650 }
651 
652 void
emitRET()653 CodeEmitterGM107::emitRET()
654 {
655    emitInsn (0xe3200000);
656    emitCond5(0x00, CC_TR);
657 }
658 
659 void
emitSSY()660 CodeEmitterGM107::emitSSY()
661 {
662    const FlowInstruction *insn = this->insn->asFlow();
663 
664    emitInsn(0xe2900000, 0);
665 
666    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
668    } else {
669       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670       emitField(0x05, 1, 1);
671    }
672 }
673 
674 void
emitSYNC()675 CodeEmitterGM107::emitSYNC()
676 {
677    emitInsn (0xf0f80000);
678    emitCond5(0x00, CC_TR);
679 }
680 
681 void
emitSAM()682 CodeEmitterGM107::emitSAM()
683 {
684    emitInsn(0xe3700000, 0);
685 }
686 
687 void
emitRAM()688 CodeEmitterGM107::emitRAM()
689 {
690    emitInsn(0xe3800000, 0);
691 }
692 
693 /*******************************************************************************
694  * predicate/cc
695  ******************************************************************************/
696 
697 void
emitPSETP()698 CodeEmitterGM107::emitPSETP()
699 {
700 
701    emitInsn(0x50900000);
702 
703    switch (insn->op) {
704    case OP_AND: emitField(0x18, 3, 0); break;
705    case OP_OR:  emitField(0x18, 3, 1); break;
706    case OP_XOR: emitField(0x18, 3, 2); break;
707    default:
708       assert(!"unexpected operation");
709       break;
710    }
711 
712    // emitINV (0x2a);
713    emitPRED(0x27); // TODO: support 3-arg
714    emitINV (0x20, insn->src(1));
715    emitPRED(0x1d, insn->src(1));
716    emitINV (0x0f, insn->src(0));
717    emitPRED(0x0c, insn->src(0));
718    emitPRED(0x03, insn->def(0));
719    emitPRED(0x00);
720 }
721 
722 /*******************************************************************************
723  * movement / conversion
724  ******************************************************************************/
725 
726 void
emitMOV()727 CodeEmitterGM107::emitMOV()
728 {
729    if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730       switch (insn->src(0).getFile()) {
731       case FILE_GPR:
732          if (insn->def(0).getFile() == FILE_PREDICATE) {
733             emitInsn(0x5b6a0000);
734             emitGPR (0x08);
735          } else {
736             emitInsn(0x5c980000);
737          }
738          emitGPR (0x14, insn->src(0));
739          break;
740       case FILE_MEMORY_CONST:
741          emitInsn(0x4c980000);
742          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
743          break;
744       case FILE_IMMEDIATE:
745          emitInsn(0x38980000);
746          emitIMMD(0x14, 19, insn->src(0));
747          break;
748       case FILE_PREDICATE:
749          emitInsn(0x50880000);
750          emitPRED(0x0c, insn->src(0));
751          emitPRED(0x1d);
752          emitPRED(0x27);
753          break;
754       default:
755          assert(!"bad src file");
756          break;
757       }
758       if (insn->def(0).getFile() != FILE_PREDICATE &&
759           insn->src(0).getFile() != FILE_PREDICATE)
760          emitField(0x27, 4, insn->lanes);
761    } else {
762       emitInsn (0x01000000);
763       emitIMMD (0x14, 32, insn->src(0));
764       emitField(0x0c, 4, insn->lanes);
765    }
766 
767    if (insn->def(0).getFile() == FILE_PREDICATE) {
768       emitPRED(0x27);
769       emitPRED(0x03, insn->def(0));
770       emitPRED(0x00);
771    } else {
772       emitGPR(0x00, insn->def(0));
773    }
774 }
775 
776 void
emitS2R()777 CodeEmitterGM107::emitS2R()
778 {
779    emitInsn(0xf0c80000);
780    emitSYS (0x14, insn->src(0));
781    emitGPR (0x00, insn->def(0));
782 }
783 
784 void
emitCS2R()785 CodeEmitterGM107::emitCS2R()
786 {
787    emitInsn(0x50c80000);
788    emitSYS (0x14, insn->src(0));
789    emitGPR (0x00, insn->def(0));
790 }
791 
792 void
emitF2F()793 CodeEmitterGM107::emitF2F()
794 {
795    RoundMode rnd = insn->rnd;
796 
797    switch (insn->op) {
798    case OP_FLOOR: rnd = ROUND_MI; break;
799    case OP_CEIL : rnd = ROUND_PI; break;
800    case OP_TRUNC: rnd = ROUND_ZI; break;
801    default:
802       break;
803    }
804 
805    switch (insn->src(0).getFile()) {
806    case FILE_GPR:
807       emitInsn(0x5ca80000);
808       emitGPR (0x14, insn->src(0));
809       break;
810    case FILE_MEMORY_CONST:
811       emitInsn(0x4ca80000);
812       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
813       break;
814    case FILE_IMMEDIATE:
815       emitInsn(0x38a80000);
816       emitIMMD(0x14, 19, insn->src(0));
817       break;
818    default:
819       assert(!"bad src0 file");
820       break;
821    }
822 
823    emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
825    emitCC   (0x2f);
826    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
827    emitFMZ  (0x2c, 1);
828    emitField(0x29, 1, insn->subOp);
829    emitRND  (0x27, rnd, 0x2a);
830    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832    emitGPR  (0x00, insn->def(0));
833 }
834 
835 void
emitF2I()836 CodeEmitterGM107::emitF2I()
837 {
838    RoundMode rnd = insn->rnd;
839 
840    switch (insn->op) {
841    case OP_FLOOR: rnd = ROUND_M; break;
842    case OP_CEIL : rnd = ROUND_P; break;
843    case OP_TRUNC: rnd = ROUND_Z; break;
844    default:
845       break;
846    }
847 
848    switch (insn->src(0).getFile()) {
849    case FILE_GPR:
850       emitInsn(0x5cb00000);
851       emitGPR (0x14, insn->src(0));
852       break;
853    case FILE_MEMORY_CONST:
854       emitInsn(0x4cb00000);
855       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
856       break;
857    case FILE_IMMEDIATE:
858       emitInsn(0x38b00000);
859       emitIMMD(0x14, 19, insn->src(0));
860       break;
861    default:
862       assert(!"bad src0 file");
863       break;
864    }
865 
866    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
867    emitCC   (0x2f);
868    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
869    emitFMZ  (0x2c, 1);
870    emitRND  (0x27, rnd, 0x2a);
871    emitField(0x0c, 1, isSignedType(insn->dType));
872    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874    emitGPR  (0x00, insn->def(0));
875 }
876 
877 void
emitI2F()878 CodeEmitterGM107::emitI2F()
879 {
880    RoundMode rnd = insn->rnd;
881 
882    switch (insn->op) {
883    case OP_FLOOR: rnd = ROUND_M; break;
884    case OP_CEIL : rnd = ROUND_P; break;
885    case OP_TRUNC: rnd = ROUND_Z; break;
886    default:
887       break;
888    }
889 
890    switch (insn->src(0).getFile()) {
891    case FILE_GPR:
892       emitInsn(0x5cb80000);
893       emitGPR (0x14, insn->src(0));
894       break;
895    case FILE_MEMORY_CONST:
896       emitInsn(0x4cb80000);
897       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
898       break;
899    case FILE_IMMEDIATE:
900       emitInsn(0x38b80000);
901       emitIMMD(0x14, 19, insn->src(0));
902       break;
903    default:
904       assert(!"bad src0 file");
905       break;
906    }
907 
908    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
909    emitCC   (0x2f);
910    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911    emitField(0x29, 2, insn->subOp);
912    emitRND  (0x27, rnd, -1);
913    emitField(0x0d, 1, isSignedType(insn->sType));
914    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916    emitGPR  (0x00, insn->def(0));
917 }
918 
919 void
emitI2I()920 CodeEmitterGM107::emitI2I()
921 {
922    switch (insn->src(0).getFile()) {
923    case FILE_GPR:
924       emitInsn(0x5ce00000);
925       emitGPR (0x14, insn->src(0));
926       break;
927    case FILE_MEMORY_CONST:
928       emitInsn(0x4ce00000);
929       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
930       break;
931    case FILE_IMMEDIATE:
932       emitInsn(0x38e00000);
933       emitIMMD(0x14, 19, insn->src(0));
934       break;
935    default:
936       assert(!"bad src0 file");
937       break;
938    }
939 
940    emitSAT  (0x32);
941    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
942    emitCC   (0x2f);
943    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944    emitField(0x29, 2, insn->subOp);
945    emitField(0x0d, 1, isSignedType(insn->sType));
946    emitField(0x0c, 1, isSignedType(insn->dType));
947    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949    emitGPR  (0x00, insn->def(0));
950 }
951 
952 void
gm107_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)953 gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
954 {
955    int loc = entry->loc;
956    if (data.force_persample_interp)
957       code[loc + 1] |= 1 << 10;
958    else
959       code[loc + 1] &= ~(1 << 10);
960 }
961 
962 void
emitSEL()963 CodeEmitterGM107::emitSEL()
964 {
965    switch (insn->src(1).getFile()) {
966    case FILE_GPR:
967       emitInsn(0x5ca00000);
968       emitGPR (0x14, insn->src(1));
969       break;
970    case FILE_MEMORY_CONST:
971       emitInsn(0x4ca00000);
972       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
973       break;
974    case FILE_IMMEDIATE:
975       emitInsn(0x38a00000);
976       emitIMMD(0x14, 19, insn->src(1));
977       break;
978    default:
979       assert(!"bad src1 file");
980       break;
981    }
982 
983    emitINV (0x2a, insn->src(2));
984    emitPRED(0x27, insn->src(2));
985    emitGPR (0x08, insn->src(0));
986    emitGPR (0x00, insn->def(0));
987 
988    if (insn->subOp == 1) {
989       addInterp(0, 0, gm107_selpFlip);
990    }
991 }
992 
993 void
emitSHFL()994 CodeEmitterGM107::emitSHFL()
995 {
996    int type = 0;
997 
998    emitInsn (0xef100000);
999 
1000    switch (insn->src(1).getFile()) {
1001    case FILE_GPR:
1002       emitGPR(0x14, insn->src(1));
1003       break;
1004    case FILE_IMMEDIATE:
1005       emitIMMD(0x14, 5, insn->src(1));
1006       type |= 1;
1007       break;
1008    default:
1009       assert(!"invalid src1 file");
1010       break;
1011    }
1012 
1013    switch (insn->src(2).getFile()) {
1014    case FILE_GPR:
1015       emitGPR(0x27, insn->src(2));
1016       break;
1017    case FILE_IMMEDIATE:
1018       emitIMMD(0x22, 13, insn->src(2));
1019       type |= 2;
1020       break;
1021    default:
1022       assert(!"invalid src2 file");
1023       break;
1024    }
1025 
1026    if (!insn->defExists(1))
1027       emitPRED(0x30);
1028    else {
1029       assert(insn->def(1).getFile() == FILE_PREDICATE);
1030       emitPRED(0x30, insn->def(1));
1031    }
1032 
1033    emitField(0x1e, 2, insn->subOp);
1034    emitField(0x1c, 2, type);
1035    emitGPR  (0x08, insn->src(0));
1036    emitGPR  (0x00, insn->def(0));
1037 }
1038 
1039 /*******************************************************************************
1040  * double
1041  ******************************************************************************/
1042 
1043 void
emitDADD()1044 CodeEmitterGM107::emitDADD()
1045 {
1046    switch (insn->src(1).getFile()) {
1047    case FILE_GPR:
1048       emitInsn(0x5c700000);
1049       emitGPR (0x14, insn->src(1));
1050       break;
1051    case FILE_MEMORY_CONST:
1052       emitInsn(0x4c700000);
1053       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1054       break;
1055    case FILE_IMMEDIATE:
1056       emitInsn(0x38700000);
1057       emitIMMD(0x14, 19, insn->src(1));
1058       break;
1059    default:
1060       assert(!"bad src1 file");
1061       break;
1062    }
1063    emitABS(0x31, insn->src(1));
1064    emitNEG(0x30, insn->src(0));
1065    emitCC (0x2f);
1066    emitABS(0x2e, insn->src(0));
1067    emitNEG(0x2d, insn->src(1));
1068 
1069    if (insn->op == OP_SUB)
1070       code[1] ^= 0x00002000;
1071 
1072    emitGPR(0x08, insn->src(0));
1073    emitGPR(0x00, insn->def(0));
1074 }
1075 
1076 void
emitDMUL()1077 CodeEmitterGM107::emitDMUL()
1078 {
1079    switch (insn->src(1).getFile()) {
1080    case FILE_GPR:
1081       emitInsn(0x5c800000);
1082       emitGPR (0x14, insn->src(1));
1083       break;
1084    case FILE_MEMORY_CONST:
1085       emitInsn(0x4c800000);
1086       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1087       break;
1088    case FILE_IMMEDIATE:
1089       emitInsn(0x38800000);
1090       emitIMMD(0x14, 19, insn->src(1));
1091       break;
1092    default:
1093       assert(!"bad src1 file");
1094       break;
1095    }
1096 
1097    emitNEG2(0x30, insn->src(0), insn->src(1));
1098    emitCC  (0x2f);
1099    emitRND (0x27);
1100    emitGPR (0x08, insn->src(0));
1101    emitGPR (0x00, insn->def(0));
1102 }
1103 
1104 void
emitDFMA()1105 CodeEmitterGM107::emitDFMA()
1106 {
1107    switch(insn->src(2).getFile()) {
1108    case FILE_GPR:
1109       switch (insn->src(1).getFile()) {
1110       case FILE_GPR:
1111          emitInsn(0x5b700000);
1112          emitGPR (0x14, insn->src(1));
1113          break;
1114       case FILE_MEMORY_CONST:
1115          emitInsn(0x4b700000);
1116          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1117          break;
1118       case FILE_IMMEDIATE:
1119          emitInsn(0x36700000);
1120          emitIMMD(0x14, 19, insn->src(1));
1121          break;
1122       default:
1123          assert(!"bad src1 file");
1124          break;
1125       }
1126       emitGPR (0x27, insn->src(2));
1127       break;
1128    case FILE_MEMORY_CONST:
1129       emitInsn(0x53700000);
1130       emitGPR (0x27, insn->src(1));
1131       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1132       break;
1133    default:
1134       assert(!"bad src2 file");
1135       break;
1136    }
1137 
1138    emitRND (0x32);
1139    emitNEG (0x31, insn->src(2));
1140    emitNEG2(0x30, insn->src(0), insn->src(1));
1141    emitCC  (0x2f);
1142    emitGPR (0x08, insn->src(0));
1143    emitGPR (0x00, insn->def(0));
1144 }
1145 
1146 void
emitDMNMX()1147 CodeEmitterGM107::emitDMNMX()
1148 {
1149    switch (insn->src(1).getFile()) {
1150    case FILE_GPR:
1151       emitInsn(0x5c500000);
1152       emitGPR (0x14, insn->src(1));
1153       break;
1154    case FILE_MEMORY_CONST:
1155       emitInsn(0x4c500000);
1156       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1157       break;
1158    case FILE_IMMEDIATE:
1159       emitInsn(0x38500000);
1160       emitIMMD(0x14, 19, insn->src(1));
1161       break;
1162    default:
1163       assert(!"bad src1 file");
1164       break;
1165    }
1166 
1167    emitABS  (0x31, insn->src(1));
1168    emitNEG  (0x30, insn->src(0));
1169    emitCC   (0x2f);
1170    emitABS  (0x2e, insn->src(0));
1171    emitNEG  (0x2d, insn->src(1));
1172    emitField(0x2a, 1, insn->op == OP_MAX);
1173    emitPRED (0x27);
1174    emitGPR  (0x08, insn->src(0));
1175    emitGPR  (0x00, insn->def(0));
1176 }
1177 
1178 void
emitDSET()1179 CodeEmitterGM107::emitDSET()
1180 {
1181    const CmpInstruction *insn = this->insn->asCmp();
1182 
1183    switch (insn->src(1).getFile()) {
1184    case FILE_GPR:
1185       emitInsn(0x59000000);
1186       emitGPR (0x14, insn->src(1));
1187       break;
1188    case FILE_MEMORY_CONST:
1189       emitInsn(0x49000000);
1190       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1191       break;
1192    case FILE_IMMEDIATE:
1193       emitInsn(0x32000000);
1194       emitIMMD(0x14, 19, insn->src(1));
1195       break;
1196    default:
1197       assert(!"bad src1 file");
1198       break;
1199    }
1200 
1201    if (insn->op != OP_SET) {
1202       switch (insn->op) {
1203       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1204       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1205       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1206       default:
1207          assert(!"invalid set op");
1208          break;
1209       }
1210       emitPRED(0x27, insn->src(2));
1211    } else {
1212       emitPRED(0x27);
1213    }
1214 
1215    emitABS  (0x36, insn->src(0));
1216    emitNEG  (0x35, insn->src(1));
1217    emitField(0x34, 1, insn->dType == TYPE_F32);
1218    emitCond4(0x30, insn->setCond);
1219    emitCC   (0x2f);
1220    emitABS  (0x2c, insn->src(1));
1221    emitNEG  (0x2b, insn->src(0));
1222    emitGPR  (0x08, insn->src(0));
1223    emitGPR  (0x00, insn->def(0));
1224 }
1225 
1226 void
emitDSETP()1227 CodeEmitterGM107::emitDSETP()
1228 {
1229    const CmpInstruction *insn = this->insn->asCmp();
1230 
1231    switch (insn->src(1).getFile()) {
1232    case FILE_GPR:
1233       emitInsn(0x5b800000);
1234       emitGPR (0x14, insn->src(1));
1235       break;
1236    case FILE_MEMORY_CONST:
1237       emitInsn(0x4b800000);
1238       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1239       break;
1240    case FILE_IMMEDIATE:
1241       emitInsn(0x36800000);
1242       emitIMMD(0x14, 19, insn->src(1));
1243       break;
1244    default:
1245       assert(!"bad src1 file");
1246       break;
1247    }
1248 
1249    if (insn->op != OP_SET) {
1250       switch (insn->op) {
1251       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1252       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1253       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1254       default:
1255          assert(!"invalid set op");
1256          break;
1257       }
1258       emitPRED(0x27, insn->src(2));
1259    } else {
1260       emitPRED(0x27);
1261    }
1262 
1263    emitCond4(0x30, insn->setCond);
1264    emitABS  (0x2c, insn->src(1));
1265    emitNEG  (0x2b, insn->src(0));
1266    emitGPR  (0x08, insn->src(0));
1267    emitABS  (0x07, insn->src(0));
1268    emitNEG  (0x06, insn->src(1));
1269    emitPRED (0x03, insn->def(0));
1270    if (insn->defExists(1))
1271       emitPRED(0x00, insn->def(1));
1272    else
1273       emitPRED(0x00);
1274 }
1275 
1276 /*******************************************************************************
1277  * float
1278  ******************************************************************************/
1279 
1280 void
emitFADD()1281 CodeEmitterGM107::emitFADD()
1282 {
1283    if (!longIMMD(insn->src(1))) {
1284       switch (insn->src(1).getFile()) {
1285       case FILE_GPR:
1286          emitInsn(0x5c580000);
1287          emitGPR (0x14, insn->src(1));
1288          break;
1289       case FILE_MEMORY_CONST:
1290          emitInsn(0x4c580000);
1291          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1292          break;
1293       case FILE_IMMEDIATE:
1294          emitInsn(0x38580000);
1295          emitIMMD(0x14, 19, insn->src(1));
1296          break;
1297       default:
1298          assert(!"bad src1 file");
1299          break;
1300       }
1301       emitSAT(0x32);
1302       emitABS(0x31, insn->src(1));
1303       emitNEG(0x30, insn->src(0));
1304       emitCC (0x2f);
1305       emitABS(0x2e, insn->src(0));
1306       emitNEG(0x2d, insn->src(1));
1307       emitFMZ(0x2c, 1);
1308 
1309       if (insn->op == OP_SUB)
1310          code[1] ^= 0x00002000;
1311    } else {
1312       emitInsn(0x08000000);
1313       emitABS(0x39, insn->src(1));
1314       emitNEG(0x38, insn->src(0));
1315       emitFMZ(0x37, 1);
1316       emitABS(0x36, insn->src(0));
1317       emitNEG(0x35, insn->src(1));
1318       emitCC  (0x34);
1319       emitIMMD(0x14, 32, insn->src(1));
1320 
1321       if (insn->op == OP_SUB)
1322          code[1] ^= 0x00080000;
1323    }
1324 
1325    emitGPR(0x08, insn->src(0));
1326    emitGPR(0x00, insn->def(0));
1327 }
1328 
1329 void
emitFMUL()1330 CodeEmitterGM107::emitFMUL()
1331 {
1332    if (!longIMMD(insn->src(1))) {
1333       switch (insn->src(1).getFile()) {
1334       case FILE_GPR:
1335          emitInsn(0x5c680000);
1336          emitGPR (0x14, insn->src(1));
1337          break;
1338       case FILE_MEMORY_CONST:
1339          emitInsn(0x4c680000);
1340          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1341          break;
1342       case FILE_IMMEDIATE:
1343          emitInsn(0x38680000);
1344          emitIMMD(0x14, 19, insn->src(1));
1345          break;
1346       default:
1347          assert(!"bad src1 file");
1348          break;
1349       }
1350       emitSAT (0x32);
1351       emitNEG2(0x30, insn->src(0), insn->src(1));
1352       emitCC  (0x2f);
1353       emitFMZ (0x2c, 2);
1354       emitPDIV(0x29);
1355       emitRND (0x27);
1356    } else {
1357       emitInsn(0x1e000000);
1358       emitSAT (0x37);
1359       emitFMZ (0x35, 2);
1360       emitCC  (0x34);
1361       emitIMMD(0x14, 32, insn->src(1));
1362       if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1363          code[1] ^= 0x00080000; /* flip immd sign bit */
1364    }
1365 
1366    emitGPR(0x08, insn->src(0));
1367    emitGPR(0x00, insn->def(0));
1368 }
1369 
1370 void
emitFFMA()1371 CodeEmitterGM107::emitFFMA()
1372 {
1373    bool isLongIMMD = false;
1374    switch(insn->src(2).getFile()) {
1375    case FILE_GPR:
1376       switch (insn->src(1).getFile()) {
1377       case FILE_GPR:
1378          emitInsn(0x59800000);
1379          emitGPR (0x14, insn->src(1));
1380          break;
1381       case FILE_MEMORY_CONST:
1382          emitInsn(0x49800000);
1383          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1384          break;
1385       case FILE_IMMEDIATE:
1386          if (longIMMD(insn->getSrc(1))) {
1387             assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1388             isLongIMMD = true;
1389             emitInsn(0x0c000000);
1390             emitIMMD(0x14, 32, insn->src(1));
1391          } else {
1392             emitInsn(0x32800000);
1393             emitIMMD(0x14, 19, insn->src(1));
1394          }
1395          break;
1396       default:
1397          assert(!"bad src1 file");
1398          break;
1399       }
1400       if (!isLongIMMD)
1401          emitGPR (0x27, insn->src(2));
1402       break;
1403    case FILE_MEMORY_CONST:
1404       emitInsn(0x51800000);
1405       emitGPR (0x27, insn->src(1));
1406       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1407       break;
1408    default:
1409       assert(!"bad src2 file");
1410       break;
1411    }
1412 
1413    if (isLongIMMD) {
1414       emitNEG (0x39, insn->src(2));
1415       emitNEG2(0x38, insn->src(0), insn->src(1));
1416       emitSAT (0x37);
1417       emitCC  (0x34);
1418    } else {
1419       emitRND (0x33);
1420       emitSAT (0x32);
1421       emitNEG (0x31, insn->src(2));
1422       emitNEG2(0x30, insn->src(0), insn->src(1));
1423       emitCC  (0x2f);
1424    }
1425 
1426    emitFMZ(0x35, 2);
1427    emitGPR(0x08, insn->src(0));
1428    emitGPR(0x00, insn->def(0));
1429 }
1430 
1431 void
emitMUFU()1432 CodeEmitterGM107::emitMUFU()
1433 {
1434    int mufu = 0;
1435 
1436    switch (insn->op) {
1437    case OP_COS: mufu = 0; break;
1438    case OP_SIN: mufu = 1; break;
1439    case OP_EX2: mufu = 2; break;
1440    case OP_LG2: mufu = 3; break;
1441    case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1442    case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1443    case OP_SQRT: mufu = 8; break;
1444    default:
1445       assert(!"invalid mufu");
1446       break;
1447    }
1448 
1449    emitInsn (0x50800000);
1450    emitSAT  (0x32);
1451    emitNEG  (0x30, insn->src(0));
1452    emitABS  (0x2e, insn->src(0));
1453    emitField(0x14, 4, mufu);
1454    emitGPR  (0x08, insn->src(0));
1455    emitGPR  (0x00, insn->def(0));
1456 }
1457 
1458 void
emitFMNMX()1459 CodeEmitterGM107::emitFMNMX()
1460 {
1461    switch (insn->src(1).getFile()) {
1462    case FILE_GPR:
1463       emitInsn(0x5c600000);
1464       emitGPR (0x14, insn->src(1));
1465       break;
1466    case FILE_MEMORY_CONST:
1467       emitInsn(0x4c600000);
1468       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1469       break;
1470    case FILE_IMMEDIATE:
1471       emitInsn(0x38600000);
1472       emitIMMD(0x14, 19, insn->src(1));
1473       break;
1474    default:
1475       assert(!"bad src1 file");
1476       break;
1477    }
1478 
1479    emitField(0x2a, 1, insn->op == OP_MAX);
1480    emitPRED (0x27);
1481 
1482    emitABS(0x31, insn->src(1));
1483    emitNEG(0x30, insn->src(0));
1484    emitCC (0x2f);
1485    emitABS(0x2e, insn->src(0));
1486    emitNEG(0x2d, insn->src(1));
1487    emitFMZ(0x2c, 1);
1488    emitGPR(0x08, insn->src(0));
1489    emitGPR(0x00, insn->def(0));
1490 }
1491 
1492 void
emitRRO()1493 CodeEmitterGM107::emitRRO()
1494 {
1495    switch (insn->src(0).getFile()) {
1496    case FILE_GPR:
1497       emitInsn(0x5c900000);
1498       emitGPR (0x14, insn->src(0));
1499       break;
1500    case FILE_MEMORY_CONST:
1501       emitInsn(0x4c900000);
1502       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1503       break;
1504    case FILE_IMMEDIATE:
1505       emitInsn(0x38900000);
1506       emitIMMD(0x14, 19, insn->src(0));
1507       break;
1508    default:
1509       assert(!"bad src file");
1510       break;
1511    }
1512 
1513    emitABS  (0x31, insn->src(0));
1514    emitNEG  (0x2d, insn->src(0));
1515    emitField(0x27, 1, insn->op == OP_PREEX2);
1516    emitGPR  (0x00, insn->def(0));
1517 }
1518 
1519 void
emitFCMP()1520 CodeEmitterGM107::emitFCMP()
1521 {
1522    const CmpInstruction *insn = this->insn->asCmp();
1523    CondCode cc = insn->setCond;
1524 
1525    if (insn->src(2).mod.neg())
1526       cc = reverseCondCode(cc);
1527 
1528    switch(insn->src(2).getFile()) {
1529    case FILE_GPR:
1530       switch (insn->src(1).getFile()) {
1531       case FILE_GPR:
1532          emitInsn(0x5ba00000);
1533          emitGPR (0x14, insn->src(1));
1534          break;
1535       case FILE_MEMORY_CONST:
1536          emitInsn(0x4ba00000);
1537          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1538          break;
1539       case FILE_IMMEDIATE:
1540          emitInsn(0x36a00000);
1541          emitIMMD(0x14, 19, insn->src(1));
1542          break;
1543       default:
1544          assert(!"bad src1 file");
1545          break;
1546       }
1547       emitGPR (0x27, insn->src(2));
1548       break;
1549    case FILE_MEMORY_CONST:
1550       emitInsn(0x53a00000);
1551       emitGPR (0x27, insn->src(1));
1552       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1553       break;
1554    default:
1555       assert(!"bad src2 file");
1556       break;
1557    }
1558 
1559    emitCond4(0x30, cc);
1560    emitFMZ  (0x2f, 1);
1561    emitGPR  (0x08, insn->src(0));
1562    emitGPR  (0x00, insn->def(0));
1563 }
1564 
1565 void
emitFSET()1566 CodeEmitterGM107::emitFSET()
1567 {
1568    const CmpInstruction *insn = this->insn->asCmp();
1569 
1570    switch (insn->src(1).getFile()) {
1571    case FILE_GPR:
1572       emitInsn(0x58000000);
1573       emitGPR (0x14, insn->src(1));
1574       break;
1575    case FILE_MEMORY_CONST:
1576       emitInsn(0x48000000);
1577       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1578       break;
1579    case FILE_IMMEDIATE:
1580       emitInsn(0x30000000);
1581       emitIMMD(0x14, 19, insn->src(1));
1582       break;
1583    default:
1584       assert(!"bad src1 file");
1585       break;
1586    }
1587 
1588    if (insn->op != OP_SET) {
1589       switch (insn->op) {
1590       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1591       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1592       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1593       default:
1594          assert(!"invalid set op");
1595          break;
1596       }
1597       emitPRED(0x27, insn->src(2));
1598    } else {
1599       emitPRED(0x27);
1600    }
1601 
1602    emitFMZ  (0x37, 1);
1603    emitABS  (0x36, insn->src(0));
1604    emitNEG  (0x35, insn->src(1));
1605    emitField(0x34, 1, insn->dType == TYPE_F32);
1606    emitCond4(0x30, insn->setCond);
1607    emitCC   (0x2f);
1608    emitABS  (0x2c, insn->src(1));
1609    emitNEG  (0x2b, insn->src(0));
1610    emitGPR  (0x08, insn->src(0));
1611    emitGPR  (0x00, insn->def(0));
1612 }
1613 
1614 void
emitFSETP()1615 CodeEmitterGM107::emitFSETP()
1616 {
1617    const CmpInstruction *insn = this->insn->asCmp();
1618 
1619    switch (insn->src(1).getFile()) {
1620    case FILE_GPR:
1621       emitInsn(0x5bb00000);
1622       emitGPR (0x14, insn->src(1));
1623       break;
1624    case FILE_MEMORY_CONST:
1625       emitInsn(0x4bb00000);
1626       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1627       break;
1628    case FILE_IMMEDIATE:
1629       emitInsn(0x36b00000);
1630       emitIMMD(0x14, 19, insn->src(1));
1631       break;
1632    default:
1633       assert(!"bad src1 file");
1634       break;
1635    }
1636 
1637    if (insn->op != OP_SET) {
1638       switch (insn->op) {
1639       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1640       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1641       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1642       default:
1643          assert(!"invalid set op");
1644          break;
1645       }
1646       emitPRED(0x27, insn->src(2));
1647    } else {
1648       emitPRED(0x27);
1649    }
1650 
1651    emitCond4(0x30, insn->setCond);
1652    emitFMZ  (0x2f, 1);
1653    emitABS  (0x2c, insn->src(1));
1654    emitNEG  (0x2b, insn->src(0));
1655    emitGPR  (0x08, insn->src(0));
1656    emitABS  (0x07, insn->src(0));
1657    emitNEG  (0x06, insn->src(1));
1658    emitPRED (0x03, insn->def(0));
1659    if (insn->defExists(1))
1660       emitPRED(0x00, insn->def(1));
1661    else
1662       emitPRED(0x00);
1663 }
1664 
1665 void
emitFSWZADD()1666 CodeEmitterGM107::emitFSWZADD()
1667 {
1668    emitInsn (0x50f80000);
1669    emitCC   (0x2f);
1670    emitFMZ  (0x2c, 1);
1671    emitRND  (0x27);
1672    emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1673    emitField(0x1c, 8, insn->subOp);
1674    if (insn->predSrc != 1)
1675       emitGPR  (0x14, insn->src(1));
1676    else
1677       emitGPR  (0x14);
1678    emitGPR  (0x08, insn->src(0));
1679    emitGPR  (0x00, insn->def(0));
1680 }
1681 
1682 /*******************************************************************************
1683  * integer
1684  ******************************************************************************/
1685 
1686 void
emitLOP()1687 CodeEmitterGM107::emitLOP()
1688 {
1689    int lop = 0;
1690 
1691    switch (insn->op) {
1692    case OP_AND: lop = 0; break;
1693    case OP_OR : lop = 1; break;
1694    case OP_XOR: lop = 2; break;
1695    default:
1696       assert(!"invalid lop");
1697       break;
1698    }
1699 
1700    if (!longIMMD(insn->src(1))) {
1701       switch (insn->src(1).getFile()) {
1702       case FILE_GPR:
1703          emitInsn(0x5c400000);
1704          emitGPR (0x14, insn->src(1));
1705          break;
1706       case FILE_MEMORY_CONST:
1707          emitInsn(0x4c400000);
1708          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1709          break;
1710       case FILE_IMMEDIATE:
1711          emitInsn(0x38400000);
1712          emitIMMD(0x14, 19, insn->src(1));
1713          break;
1714       default:
1715          assert(!"bad src1 file");
1716          break;
1717       }
1718       emitPRED (0x30);
1719       emitCC   (0x2f);
1720       emitX    (0x2b);
1721       emitField(0x29, 2, lop);
1722       emitINV  (0x28, insn->src(1));
1723       emitINV  (0x27, insn->src(0));
1724    } else {
1725       emitInsn (0x04000000);
1726       emitX    (0x39);
1727       emitINV  (0x38, insn->src(1));
1728       emitINV  (0x37, insn->src(0));
1729       emitField(0x35, 2, lop);
1730       emitCC   (0x34);
1731       emitIMMD (0x14, 32, insn->src(1));
1732    }
1733 
1734    emitGPR  (0x08, insn->src(0));
1735    emitGPR  (0x00, insn->def(0));
1736 }
1737 
1738 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1739 void
emitNOT()1740 CodeEmitterGM107::emitNOT()
1741 {
1742    if (!longIMMD(insn->src(0))) {
1743       switch (insn->src(0).getFile()) {
1744       case FILE_GPR:
1745          emitInsn(0x5c400700);
1746          emitGPR (0x14, insn->src(0));
1747          break;
1748       case FILE_MEMORY_CONST:
1749          emitInsn(0x4c400700);
1750          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1751          break;
1752       case FILE_IMMEDIATE:
1753          emitInsn(0x38400700);
1754          emitIMMD(0x14, 19, insn->src(0));
1755          break;
1756       default:
1757          assert(!"bad src1 file");
1758          break;
1759       }
1760       emitPRED (0x30);
1761    } else {
1762       emitInsn (0x05600000);
1763       emitIMMD (0x14, 32, insn->src(1));
1764    }
1765 
1766    emitGPR(0x08);
1767    emitGPR(0x00, insn->def(0));
1768 }
1769 
1770 void
emitIADD()1771 CodeEmitterGM107::emitIADD()
1772 {
1773    if (!longIMMD(insn->src(1))) {
1774       switch (insn->src(1).getFile()) {
1775       case FILE_GPR:
1776          emitInsn(0x5c100000);
1777          emitGPR (0x14, insn->src(1));
1778          break;
1779       case FILE_MEMORY_CONST:
1780          emitInsn(0x4c100000);
1781          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1782          break;
1783       case FILE_IMMEDIATE:
1784          emitInsn(0x38100000);
1785          emitIMMD(0x14, 19, insn->src(1));
1786          break;
1787       default:
1788          assert(!"bad src1 file");
1789          break;
1790       }
1791       emitSAT(0x32);
1792       emitNEG(0x31, insn->src(0));
1793       emitNEG(0x30, insn->src(1));
1794       emitCC (0x2f);
1795       emitX  (0x2b);
1796    } else {
1797       emitInsn(0x1c000000);
1798       emitNEG (0x38, insn->src(0));
1799       emitSAT (0x36);
1800       emitX   (0x35);
1801       emitCC  (0x34);
1802       emitIMMD(0x14, 32, insn->src(1));
1803    }
1804 
1805    if (insn->op == OP_SUB)
1806       code[1] ^= 0x00010000;
1807 
1808    emitGPR(0x08, insn->src(0));
1809    emitGPR(0x00, insn->def(0));
1810 }
1811 
1812 void
emitIMUL()1813 CodeEmitterGM107::emitIMUL()
1814 {
1815    if (!longIMMD(insn->src(1))) {
1816       switch (insn->src(1).getFile()) {
1817       case FILE_GPR:
1818          emitInsn(0x5c380000);
1819          emitGPR (0x14, insn->src(1));
1820          break;
1821       case FILE_MEMORY_CONST:
1822          emitInsn(0x4c380000);
1823          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1824          break;
1825       case FILE_IMMEDIATE:
1826          emitInsn(0x38380000);
1827          emitIMMD(0x14, 19, insn->src(1));
1828          break;
1829       default:
1830          assert(!"bad src1 file");
1831          break;
1832       }
1833       emitCC   (0x2f);
1834       emitField(0x29, 1, isSignedType(insn->sType));
1835       emitField(0x28, 1, isSignedType(insn->dType));
1836       emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1837    } else {
1838       emitInsn (0x1f000000);
1839       emitField(0x37, 1, isSignedType(insn->sType));
1840       emitField(0x36, 1, isSignedType(insn->dType));
1841       emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1842       emitCC   (0x34);
1843       emitIMMD (0x14, 32, insn->src(1));
1844    }
1845 
1846    emitGPR(0x08, insn->src(0));
1847    emitGPR(0x00, insn->def(0));
1848 }
1849 
1850 void
emitIMAD()1851 CodeEmitterGM107::emitIMAD()
1852 {
1853    /*XXX: imad32i exists, but not using it as third src overlaps dst */
1854    switch(insn->src(2).getFile()) {
1855    case FILE_GPR:
1856       switch (insn->src(1).getFile()) {
1857       case FILE_GPR:
1858          emitInsn(0x5a000000);
1859          emitGPR (0x14, insn->src(1));
1860          break;
1861       case FILE_MEMORY_CONST:
1862          emitInsn(0x4a000000);
1863          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1864          break;
1865       case FILE_IMMEDIATE:
1866          emitInsn(0x34000000);
1867          emitIMMD(0x14, 19, insn->src(1));
1868          break;
1869       default:
1870          assert(!"bad src1 file");
1871          break;
1872       }
1873       emitGPR (0x27, insn->src(2));
1874       break;
1875    case FILE_MEMORY_CONST:
1876       emitInsn(0x52000000);
1877       emitGPR (0x27, insn->src(1));
1878       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1879       break;
1880    default:
1881       assert(!"bad src2 file");
1882       break;
1883    }
1884 
1885    emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1886    emitField(0x35, 1, isSignedType(insn->sType));
1887    emitNEG  (0x34, insn->src(2));
1888    emitNEG2 (0x33, insn->src(0), insn->src(1));
1889    emitSAT  (0x32);
1890    emitX    (0x31);
1891    emitField(0x30, 1, isSignedType(insn->dType));
1892    emitCC   (0x2f);
1893    emitGPR  (0x08, insn->src(0));
1894    emitGPR  (0x00, insn->def(0));
1895 }
1896 
1897 void
emitISCADD()1898 CodeEmitterGM107::emitISCADD()
1899 {
1900    assert(insn->src(1).get()->asImm());
1901 
1902    switch (insn->src(2).getFile()) {
1903    case FILE_GPR:
1904       emitInsn(0x5c180000);
1905       emitGPR (0x14, insn->src(2));
1906       break;
1907    case FILE_MEMORY_CONST:
1908       emitInsn(0x4c180000);
1909       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1910       break;
1911    case FILE_IMMEDIATE:
1912       emitInsn(0x38180000);
1913       emitIMMD(0x14, 19, insn->src(2));
1914       break;
1915    default:
1916       assert(!"bad src1 file");
1917       break;
1918    }
1919    emitNEG (0x31, insn->src(0));
1920    emitNEG (0x30, insn->src(2));
1921    emitCC  (0x2f);
1922    emitIMMD(0x27, 5, insn->src(1));
1923    emitGPR (0x08, insn->src(0));
1924    emitGPR (0x00, insn->def(0));
1925 }
1926 
1927 void
emitXMAD()1928 CodeEmitterGM107::emitXMAD()
1929 {
1930    assert(insn->src(0).getFile() == FILE_GPR);
1931 
1932    bool constbuf = false;
1933    bool psl_mrg = true;
1934    bool immediate = false;
1935    if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1936       assert(insn->src(1).getFile() == FILE_GPR);
1937       constbuf = true;
1938       psl_mrg = false;
1939       emitInsn(0x51000000);
1940       emitGPR(0x27, insn->src(1));
1941       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1942    } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1943       assert(insn->src(2).getFile() == FILE_GPR);
1944       constbuf = true;
1945       emitInsn(0x4e000000);
1946       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1947       emitGPR(0x27, insn->src(2));
1948    } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1949       assert(insn->src(2).getFile() == FILE_GPR);
1950       assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1951       immediate = true;
1952       emitInsn(0x36000000);
1953       emitIMMD(0x14, 16, insn->src(1));
1954       emitGPR(0x27, insn->src(2));
1955    } else {
1956       assert(insn->src(1).getFile() == FILE_GPR);
1957       assert(insn->src(2).getFile() == FILE_GPR);
1958       emitInsn(0x5b000000);
1959       emitGPR(0x14, insn->src(1));
1960       emitGPR(0x27, insn->src(2));
1961    }
1962 
1963    if (psl_mrg)
1964       emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1965 
1966    unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1967    cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1968    emitField(0x32, constbuf ? 2 : 3, cmode);
1969 
1970    emitX(constbuf ? 0x36 : 0x26);
1971    emitCC(0x2f);
1972 
1973    emitGPR(0x0, insn->def(0));
1974    emitGPR(0x8, insn->src(0));
1975 
1976    // source flags
1977    if (isSignedType(insn->sType)) {
1978       uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1979       emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1980    }
1981    emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1982    if (!immediate) {
1983       bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1984       emitField(constbuf ? 0x34 : 0x23, 1, h1);
1985    }
1986 }
1987 
1988 void
emitIMNMX()1989 CodeEmitterGM107::emitIMNMX()
1990 {
1991    switch (insn->src(1).getFile()) {
1992    case FILE_GPR:
1993       emitInsn(0x5c200000);
1994       emitGPR (0x14, insn->src(1));
1995       break;
1996    case FILE_MEMORY_CONST:
1997       emitInsn(0x4c200000);
1998       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1999       break;
2000    case FILE_IMMEDIATE:
2001       emitInsn(0x38200000);
2002       emitIMMD(0x14, 19, insn->src(1));
2003       break;
2004    default:
2005       assert(!"bad src1 file");
2006       break;
2007    }
2008 
2009    emitField(0x30, 1, isSignedType(insn->dType));
2010    emitCC   (0x2f);
2011    emitField(0x2b, 2, insn->subOp);
2012    emitField(0x2a, 1, insn->op == OP_MAX);
2013    emitPRED (0x27);
2014    emitGPR  (0x08, insn->src(0));
2015    emitGPR  (0x00, insn->def(0));
2016 }
2017 
2018 void
emitICMP()2019 CodeEmitterGM107::emitICMP()
2020 {
2021    const CmpInstruction *insn = this->insn->asCmp();
2022    CondCode cc = insn->setCond;
2023 
2024    if (insn->src(2).mod.neg())
2025       cc = reverseCondCode(cc);
2026 
2027    switch(insn->src(2).getFile()) {
2028    case FILE_GPR:
2029       switch (insn->src(1).getFile()) {
2030       case FILE_GPR:
2031          emitInsn(0x5b400000);
2032          emitGPR (0x14, insn->src(1));
2033          break;
2034       case FILE_MEMORY_CONST:
2035          emitInsn(0x4b400000);
2036          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2037          break;
2038       case FILE_IMMEDIATE:
2039          emitInsn(0x36400000);
2040          emitIMMD(0x14, 19, insn->src(1));
2041          break;
2042       default:
2043          assert(!"bad src1 file");
2044          break;
2045       }
2046       emitGPR (0x27, insn->src(2));
2047       break;
2048    case FILE_MEMORY_CONST:
2049       emitInsn(0x53400000);
2050       emitGPR (0x27, insn->src(1));
2051       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2052       break;
2053    default:
2054       assert(!"bad src2 file");
2055       break;
2056    }
2057 
2058    emitCond3(0x31, cc);
2059    emitField(0x30, 1, isSignedType(insn->sType));
2060    emitGPR  (0x08, insn->src(0));
2061    emitGPR  (0x00, insn->def(0));
2062 }
2063 
2064 void
emitISET()2065 CodeEmitterGM107::emitISET()
2066 {
2067    const CmpInstruction *insn = this->insn->asCmp();
2068 
2069    switch (insn->src(1).getFile()) {
2070    case FILE_GPR:
2071       emitInsn(0x5b500000);
2072       emitGPR (0x14, insn->src(1));
2073       break;
2074    case FILE_MEMORY_CONST:
2075       emitInsn(0x4b500000);
2076       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2077       break;
2078    case FILE_IMMEDIATE:
2079       emitInsn(0x36500000);
2080       emitIMMD(0x14, 19, insn->src(1));
2081       break;
2082    default:
2083       assert(!"bad src1 file");
2084       break;
2085    }
2086 
2087    if (insn->op != OP_SET) {
2088       switch (insn->op) {
2089       case OP_SET_AND: emitField(0x2d, 2, 0); break;
2090       case OP_SET_OR : emitField(0x2d, 2, 1); break;
2091       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2092       default:
2093          assert(!"invalid set op");
2094          break;
2095       }
2096       emitPRED(0x27, insn->src(2));
2097    } else {
2098       emitPRED(0x27);
2099    }
2100 
2101    emitCond3(0x31, insn->setCond);
2102    emitField(0x30, 1, isSignedType(insn->sType));
2103    emitCC   (0x2f);
2104    emitField(0x2c, 1, insn->dType == TYPE_F32);
2105    emitX    (0x2b);
2106    emitGPR  (0x08, insn->src(0));
2107    emitGPR  (0x00, insn->def(0));
2108 }
2109 
2110 void
emitISETP()2111 CodeEmitterGM107::emitISETP()
2112 {
2113    const CmpInstruction *insn = this->insn->asCmp();
2114 
2115    switch (insn->src(1).getFile()) {
2116    case FILE_GPR:
2117       emitInsn(0x5b600000);
2118       emitGPR (0x14, insn->src(1));
2119       break;
2120    case FILE_MEMORY_CONST:
2121       emitInsn(0x4b600000);
2122       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2123       break;
2124    case FILE_IMMEDIATE:
2125       emitInsn(0x36600000);
2126       emitIMMD(0x14, 19, insn->src(1));
2127       break;
2128    default:
2129       assert(!"bad src1 file");
2130       break;
2131    }
2132 
2133    if (insn->op != OP_SET) {
2134       switch (insn->op) {
2135       case OP_SET_AND: emitField(0x2d, 2, 0); break;
2136       case OP_SET_OR : emitField(0x2d, 2, 1); break;
2137       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2138       default:
2139          assert(!"invalid set op");
2140          break;
2141       }
2142       emitPRED(0x27, insn->src(2));
2143    } else {
2144       emitPRED(0x27);
2145    }
2146 
2147    emitCond3(0x31, insn->setCond);
2148    emitField(0x30, 1, isSignedType(insn->sType));
2149    emitX    (0x2b);
2150    emitGPR  (0x08, insn->src(0));
2151    emitPRED (0x03, insn->def(0));
2152    if (insn->defExists(1))
2153       emitPRED(0x00, insn->def(1));
2154    else
2155       emitPRED(0x00);
2156 }
2157 
2158 void
emitSHL()2159 CodeEmitterGM107::emitSHL()
2160 {
2161    switch (insn->src(1).getFile()) {
2162    case FILE_GPR:
2163       emitInsn(0x5c480000);
2164       emitGPR (0x14, insn->src(1));
2165       break;
2166    case FILE_MEMORY_CONST:
2167       emitInsn(0x4c480000);
2168       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2169       break;
2170    case FILE_IMMEDIATE:
2171       emitInsn(0x38480000);
2172       emitIMMD(0x14, 19, insn->src(1));
2173       break;
2174    default:
2175       assert(!"bad src1 file");
2176       break;
2177    }
2178 
2179    emitCC   (0x2f);
2180    emitX    (0x2b);
2181    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2182    emitGPR  (0x08, insn->src(0));
2183    emitGPR  (0x00, insn->def(0));
2184 }
2185 
2186 void
emitSHR()2187 CodeEmitterGM107::emitSHR()
2188 {
2189    switch (insn->src(1).getFile()) {
2190    case FILE_GPR:
2191       emitInsn(0x5c280000);
2192       emitGPR (0x14, insn->src(1));
2193       break;
2194    case FILE_MEMORY_CONST:
2195       emitInsn(0x4c280000);
2196       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2197       break;
2198    case FILE_IMMEDIATE:
2199       emitInsn(0x38280000);
2200       emitIMMD(0x14, 19, insn->src(1));
2201       break;
2202    default:
2203       assert(!"bad src1 file");
2204       break;
2205    }
2206 
2207    emitField(0x30, 1, isSignedType(insn->dType));
2208    emitCC   (0x2f);
2209    emitX    (0x2c);
2210    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2211    emitGPR  (0x08, insn->src(0));
2212    emitGPR  (0x00, insn->def(0));
2213 }
2214 
2215 void
emitSHF()2216 CodeEmitterGM107::emitSHF()
2217 {
2218    unsigned type;
2219 
2220    switch (insn->src(1).getFile()) {
2221    case FILE_GPR:
2222       emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2223       emitGPR(0x14, insn->src(1));
2224       break;
2225    case FILE_IMMEDIATE:
2226       emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2227       emitIMMD(0x14, 19, insn->src(1));
2228       break;
2229    default:
2230       assert(!"bad src1 file");
2231       break;
2232    }
2233 
2234    switch (insn->sType) {
2235    case TYPE_U64:
2236       type = 2;
2237       break;
2238    case TYPE_S64:
2239       type = 3;
2240       break;
2241    default:
2242       type = 0;
2243       break;
2244    }
2245 
2246    emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2247    emitX    (0x31);
2248    emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2249    emitCC   (0x2f);
2250    emitGPR  (0x27, insn->src(2));
2251    emitField(0x25, 2, type);
2252    emitGPR  (0x08, insn->src(0));
2253    emitGPR  (0x00, insn->def(0));
2254 }
2255 
2256 void
emitPOPC()2257 CodeEmitterGM107::emitPOPC()
2258 {
2259    switch (insn->src(0).getFile()) {
2260    case FILE_GPR:
2261       emitInsn(0x5c080000);
2262       emitGPR (0x14, insn->src(0));
2263       break;
2264    case FILE_MEMORY_CONST:
2265       emitInsn(0x4c080000);
2266       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2267       break;
2268    case FILE_IMMEDIATE:
2269       emitInsn(0x38080000);
2270       emitIMMD(0x14, 19, insn->src(0));
2271       break;
2272    default:
2273       assert(!"bad src1 file");
2274       break;
2275    }
2276 
2277    emitINV(0x28, insn->src(0));
2278    emitGPR(0x00, insn->def(0));
2279 }
2280 
2281 void
emitBFI()2282 CodeEmitterGM107::emitBFI()
2283 {
2284    switch(insn->src(2).getFile()) {
2285    case FILE_GPR:
2286       switch (insn->src(1).getFile()) {
2287       case FILE_GPR:
2288          emitInsn(0x5bf00000);
2289          emitGPR (0x14, insn->src(1));
2290          break;
2291       case FILE_MEMORY_CONST:
2292          emitInsn(0x4bf00000);
2293          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2294          break;
2295       case FILE_IMMEDIATE:
2296          emitInsn(0x36f00000);
2297          emitIMMD(0x14, 19, insn->src(1));
2298          break;
2299       default:
2300          assert(!"bad src1 file");
2301          break;
2302       }
2303       emitGPR (0x27, insn->src(2));
2304       break;
2305    case FILE_MEMORY_CONST:
2306       emitInsn(0x53f00000);
2307       emitGPR (0x27, insn->src(1));
2308       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2309       break;
2310    default:
2311       assert(!"bad src2 file");
2312       break;
2313    }
2314 
2315    emitCC   (0x2f);
2316    emitGPR  (0x08, insn->src(0));
2317    emitGPR  (0x00, insn->def(0));
2318 }
2319 
2320 void
emitBFE()2321 CodeEmitterGM107::emitBFE()
2322 {
2323    switch (insn->src(1).getFile()) {
2324    case FILE_GPR:
2325       emitInsn(0x5c000000);
2326       emitGPR (0x14, insn->src(1));
2327       break;
2328    case FILE_MEMORY_CONST:
2329       emitInsn(0x4c000000);
2330       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2331       break;
2332    case FILE_IMMEDIATE:
2333       emitInsn(0x38000000);
2334       emitIMMD(0x14, 19, insn->src(1));
2335       break;
2336    default:
2337       assert(!"bad src1 file");
2338       break;
2339    }
2340 
2341    emitField(0x30, 1, isSignedType(insn->dType));
2342    emitCC   (0x2f);
2343    emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2344    emitGPR  (0x08, insn->src(0));
2345    emitGPR  (0x00, insn->def(0));
2346 }
2347 
2348 void
emitFLO()2349 CodeEmitterGM107::emitFLO()
2350 {
2351    switch (insn->src(0).getFile()) {
2352    case FILE_GPR:
2353       emitInsn(0x5c300000);
2354       emitGPR (0x14, insn->src(0));
2355       break;
2356    case FILE_MEMORY_CONST:
2357       emitInsn(0x4c300000);
2358       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2359       break;
2360    case FILE_IMMEDIATE:
2361       emitInsn(0x38300000);
2362       emitIMMD(0x14, 19, insn->src(0));
2363       break;
2364    default:
2365       assert(!"bad src1 file");
2366       break;
2367    }
2368 
2369    emitField(0x30, 1, isSignedType(insn->dType));
2370    emitCC   (0x2f);
2371    emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2372    emitINV  (0x28, insn->src(0));
2373    emitGPR  (0x00, insn->def(0));
2374 }
2375 
2376 void
emitPRMT()2377 CodeEmitterGM107::emitPRMT()
2378 {
2379    switch (insn->src(1).getFile()) {
2380    case FILE_GPR:
2381       emitInsn(0x5bc00000);
2382       emitGPR (0x14, insn->src(1));
2383       break;
2384    case FILE_MEMORY_CONST:
2385       emitInsn(0x4bc00000);
2386       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2387       break;
2388    case FILE_IMMEDIATE:
2389       emitInsn(0x36c00000);
2390       emitIMMD(0x14, 19, insn->src(1));
2391       break;
2392    default:
2393       assert(!"bad src1 file");
2394       break;
2395    }
2396 
2397    emitField(0x30, 3, insn->subOp);
2398    emitGPR  (0x27, insn->src(2));
2399    emitGPR  (0x08, insn->src(0));
2400    emitGPR  (0x00, insn->def(0));
2401 }
2402 
2403 /*******************************************************************************
2404  * memory
2405  ******************************************************************************/
2406 
2407 void
emitLDSTs(int pos,DataType type)2408 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2409 {
2410    int data = 0;
2411 
2412    switch (typeSizeof(type)) {
2413    case  1: data = isSignedType(type) ? 1 : 0; break;
2414    case  2: data = isSignedType(type) ? 3 : 2; break;
2415    case  4: data = 4; break;
2416    case  8: data = 5; break;
2417    case 16: data = 6; break;
2418    default:
2419       assert(!"bad type");
2420       break;
2421    }
2422 
2423    emitField(pos, 3, data);
2424 }
2425 
2426 void
emitLDSTc(int pos)2427 CodeEmitterGM107::emitLDSTc(int pos)
2428 {
2429    int mode = 0;
2430 
2431    switch (insn->cache) {
2432    case CACHE_CA: mode = 0; break;
2433    case CACHE_CG: mode = 1; break;
2434    case CACHE_CS: mode = 2; break;
2435    case CACHE_CV: mode = 3; break;
2436    default:
2437       assert(!"invalid caching mode");
2438       break;
2439    }
2440 
2441    emitField(pos, 2, mode);
2442 }
2443 
2444 void
emitLDC()2445 CodeEmitterGM107::emitLDC()
2446 {
2447    emitInsn (0xef900000);
2448    emitLDSTs(0x30, insn->dType);
2449    emitField(0x2c, 2, insn->subOp);
2450    emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2451    emitGPR  (0x00, insn->def(0));
2452 }
2453 
2454 void
emitLDL()2455 CodeEmitterGM107::emitLDL()
2456 {
2457    emitInsn (0xef400000);
2458    emitLDSTs(0x30, insn->dType);
2459    emitLDSTc(0x2c);
2460    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2461    emitGPR  (0x00, insn->def(0));
2462 }
2463 
2464 void
emitLDS()2465 CodeEmitterGM107::emitLDS()
2466 {
2467    emitInsn (0xef480000);
2468    emitLDSTs(0x30, insn->dType);
2469    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470    emitGPR  (0x00, insn->def(0));
2471 }
2472 
2473 void
emitLD()2474 CodeEmitterGM107::emitLD()
2475 {
2476    emitInsn (0x80000000);
2477    emitPRED (0x3a);
2478    emitLDSTc(0x38);
2479    emitLDSTs(0x35, insn->dType);
2480    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2481    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2482    emitGPR  (0x00, insn->def(0));
2483 }
2484 
2485 void
emitSTL()2486 CodeEmitterGM107::emitSTL()
2487 {
2488    emitInsn (0xef500000);
2489    emitLDSTs(0x30, insn->dType);
2490    emitLDSTc(0x2c);
2491    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2492    emitGPR  (0x00, insn->src(1));
2493 }
2494 
2495 void
emitSTS()2496 CodeEmitterGM107::emitSTS()
2497 {
2498    emitInsn (0xef580000);
2499    emitLDSTs(0x30, insn->dType);
2500    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501    emitGPR  (0x00, insn->src(1));
2502 }
2503 
2504 void
emitST()2505 CodeEmitterGM107::emitST()
2506 {
2507    emitInsn (0xa0000000);
2508    emitPRED (0x3a);
2509    emitLDSTc(0x38);
2510    emitLDSTs(0x35, insn->dType);
2511    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2512    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2513    emitGPR  (0x00, insn->src(1));
2514 }
2515 
2516 void
emitALD()2517 CodeEmitterGM107::emitALD()
2518 {
2519    emitInsn (0xefd80000);
2520    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2521    emitGPR  (0x27, insn->src(0).getIndirect(1));
2522    emitO    (0x20);
2523    emitP    (0x1f);
2524    emitADDR (0x08, 20, 10, 0, insn->src(0));
2525    emitGPR  (0x00, insn->def(0));
2526 }
2527 
2528 void
emitAST()2529 CodeEmitterGM107::emitAST()
2530 {
2531    emitInsn (0xeff00000);
2532    emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2533    emitGPR  (0x27, insn->src(0).getIndirect(1));
2534    emitP    (0x1f);
2535    emitADDR (0x08, 20, 10, 0, insn->src(0));
2536    emitGPR  (0x00, insn->src(1));
2537 }
2538 
2539 void
emitISBERD()2540 CodeEmitterGM107::emitISBERD()
2541 {
2542    emitInsn(0xefd00000);
2543    emitGPR (0x08, insn->src(0));
2544    emitGPR (0x00, insn->def(0));
2545 }
2546 
2547 void
emitAL2P()2548 CodeEmitterGM107::emitAL2P()
2549 {
2550    emitInsn (0xefa00000);
2551    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2552    emitPRED (0x2c);
2553    emitO    (0x20);
2554    emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2555    emitGPR  (0x08, insn->src(0).getIndirect(0));
2556    emitGPR  (0x00, insn->def(0));
2557 }
2558 
2559 void
gm107_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2560 gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2561 {
2562    int ipa = entry->ipa;
2563    int reg = entry->reg;
2564    int loc = entry->loc;
2565 
2566    if (data.flatshade &&
2567        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2568       ipa = NV50_IR_INTERP_FLAT;
2569       reg = 0xff;
2570    } else if (data.force_persample_interp &&
2571               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2572               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2573       ipa |= NV50_IR_INTERP_CENTROID;
2574    }
2575    code[loc + 1] &= ~(0xf << 0x14);
2576    code[loc + 1] |= (ipa & 0x3) << 0x16;
2577    code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2578    code[loc + 0] &= ~(0xff << 0x14);
2579    code[loc + 0] |= reg << 0x14;
2580 }
2581 
2582 void
emitIPA()2583 CodeEmitterGM107::emitIPA()
2584 {
2585    int ipam = 0, ipas = 0;
2586 
2587    switch (insn->getInterpMode()) {
2588    case NV50_IR_INTERP_LINEAR     : ipam = 0; break;
2589    case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2590    case NV50_IR_INTERP_FLAT       : ipam = 2; break;
2591    case NV50_IR_INTERP_SC         : ipam = 3; break;
2592    default:
2593       assert(!"invalid ipa mode");
2594       break;
2595    }
2596 
2597    switch (insn->getSampleMode()) {
2598    case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2599    case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2600    case NV50_IR_INTERP_OFFSET  : ipas = 2; break;
2601    default:
2602       assert(!"invalid ipa sample mode");
2603       break;
2604    }
2605 
2606    emitInsn (0xe0000000);
2607    emitField(0x36, 2, ipam);
2608    emitField(0x34, 2, ipas);
2609    emitSAT  (0x33);
2610    emitField(0x2f, 3, 7);
2611    emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2612    if ((code[0] & 0x0000ff00) != 0x0000ff00)
2613       code[1] |= 0x00000040; /* .idx */
2614    emitGPR(0x00, insn->def(0));
2615 
2616    if (insn->op == OP_PINTERP) {
2617       emitGPR(0x14, insn->src(1));
2618       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2619          emitGPR(0x27, insn->src(2));
2620       addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2621    } else {
2622       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2623          emitGPR(0x27, insn->src(1));
2624       emitGPR(0x14);
2625       addInterp(insn->ipa, 0xff, gm107_interpApply);
2626    }
2627 
2628    if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2629       emitGPR(0x27);
2630 }
2631 
2632 void
emitATOM()2633 CodeEmitterGM107::emitATOM()
2634 {
2635    unsigned dType, subOp;
2636 
2637    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2638       switch (insn->dType) {
2639       case TYPE_U32: dType = 0; break;
2640       case TYPE_U64: dType = 1; break;
2641       default: assert(!"unexpected dType"); dType = 0; break;
2642       }
2643       subOp = 15;
2644 
2645       emitInsn (0xee000000);
2646    } else {
2647       switch (insn->dType) {
2648       case TYPE_U32: dType = 0; break;
2649       case TYPE_S32: dType = 1; break;
2650       case TYPE_U64: dType = 2; break;
2651       case TYPE_F32: dType = 3; break;
2652       case TYPE_B128: dType = 4; break;
2653       case TYPE_S64: dType = 5; break;
2654       default: assert(!"unexpected dType"); dType = 0; break;
2655       }
2656       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2657          subOp = 8;
2658       else
2659          subOp = insn->subOp;
2660 
2661       emitInsn (0xed000000);
2662    }
2663 
2664    emitField(0x34, 4, subOp);
2665    emitField(0x31, 3, dType);
2666    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2667    emitGPR  (0x14, insn->src(1));
2668    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2669    emitGPR  (0x00, insn->def(0));
2670 }
2671 
2672 void
emitATOMS()2673 CodeEmitterGM107::emitATOMS()
2674 {
2675    unsigned dType, subOp;
2676 
2677    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2678       switch (insn->dType) {
2679       case TYPE_U32: dType = 0; break;
2680       case TYPE_U64: dType = 1; break;
2681       default: assert(!"unexpected dType"); dType = 0; break;
2682       }
2683       subOp = 4;
2684 
2685       emitInsn (0xee000000);
2686       emitField(0x34, 1, dType);
2687    } else {
2688       switch (insn->dType) {
2689       case TYPE_U32: dType = 0; break;
2690       case TYPE_S32: dType = 1; break;
2691       case TYPE_U64: dType = 2; break;
2692       case TYPE_S64: dType = 3; break;
2693       default: assert(!"unexpected dType"); dType = 0; break;
2694       }
2695 
2696       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2697          subOp = 8;
2698       else
2699          subOp = insn->subOp;
2700 
2701       emitInsn (0xec000000);
2702       emitField(0x1c, 3, dType);
2703    }
2704 
2705    emitField(0x34, 4, subOp);
2706    emitGPR  (0x14, insn->src(1));
2707    emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2708    emitGPR  (0x00, insn->def(0));
2709 }
2710 
2711 void
emitRED()2712 CodeEmitterGM107::emitRED()
2713 {
2714    unsigned dType;
2715 
2716    switch (insn->dType) {
2717    case TYPE_U32: dType = 0; break;
2718    case TYPE_S32: dType = 1; break;
2719    case TYPE_U64: dType = 2; break;
2720    case TYPE_F32: dType = 3; break;
2721    case TYPE_B128: dType = 4; break;
2722    case TYPE_S64: dType = 5; break;
2723    default: assert(!"unexpected dType"); dType = 0; break;
2724    }
2725 
2726    emitInsn (0xebf80000);
2727    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2728    emitField(0x17, 3, insn->subOp);
2729    emitField(0x14, 3, dType);
2730    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2731    emitGPR  (0x00, insn->src(1));
2732 }
2733 
2734 void
emitCCTL()2735 CodeEmitterGM107::emitCCTL()
2736 {
2737    unsigned width;
2738    if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2739       emitInsn(0xef600000);
2740       width = 30;
2741    } else {
2742       emitInsn(0xef800000);
2743       width = 22;
2744    }
2745    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2746    emitADDR (0x08, 0x16, width, 2, insn->src(0));
2747    emitField(0x00, 4, insn->subOp);
2748 }
2749 
2750 /*******************************************************************************
2751  * surface
2752  ******************************************************************************/
2753 
2754 void
emitPIXLD()2755 CodeEmitterGM107::emitPIXLD()
2756 {
2757    emitInsn (0xefe80000);
2758    emitPRED (0x2d);
2759    emitField(0x1f, 3, insn->subOp);
2760    emitGPR  (0x08, insn->src(0));
2761    emitGPR  (0x00, insn->def(0));
2762 }
2763 
2764 /*******************************************************************************
2765  * texture
2766  ******************************************************************************/
2767 
2768 void
emitTEXs(int pos)2769 CodeEmitterGM107::emitTEXs(int pos)
2770 {
2771    int src1 = insn->predSrc == 1 ? 2 : 1;
2772    if (insn->srcExists(src1))
2773       emitGPR(pos, insn->src(src1));
2774    else
2775       emitGPR(pos);
2776 }
2777 
2778 static uint8_t
getTEXSMask(uint8_t mask)2779 getTEXSMask(uint8_t mask)
2780 {
2781    switch (mask) {
2782    case 0x1: return 0x0;
2783    case 0x2: return 0x1;
2784    case 0x3: return 0x4;
2785    case 0x4: return 0x2;
2786    case 0x7: return 0x0;
2787    case 0x8: return 0x3;
2788    case 0x9: return 0x5;
2789    case 0xa: return 0x6;
2790    case 0xb: return 0x1;
2791    case 0xc: return 0x7;
2792    case 0xd: return 0x2;
2793    case 0xe: return 0x3;
2794    case 0xf: return 0x4;
2795    default:
2796       assert(!"invalid mask");
2797       return 0;
2798    }
2799 }
2800 
2801 static uint8_t
getTEXSTarget(const TexInstruction * tex)2802 getTEXSTarget(const TexInstruction *tex)
2803 {
2804    assert(tex->op == OP_TEX || tex->op == OP_TXL);
2805 
2806    switch (tex->tex.target.getEnum()) {
2807    case TEX_TARGET_1D:
2808       assert(tex->tex.levelZero);
2809       return 0x0;
2810    case TEX_TARGET_2D:
2811    case TEX_TARGET_RECT:
2812       if (tex->tex.levelZero)
2813          return 0x2;
2814       if (tex->op == OP_TXL)
2815          return 0x3;
2816       return 0x1;
2817    case TEX_TARGET_2D_SHADOW:
2818    case TEX_TARGET_RECT_SHADOW:
2819       if (tex->tex.levelZero)
2820          return 0x6;
2821       if (tex->op == OP_TXL)
2822          return 0x5;
2823       return 0x4;
2824    case TEX_TARGET_2D_ARRAY:
2825       if (tex->tex.levelZero)
2826          return 0x8;
2827       return 0x7;
2828    case TEX_TARGET_2D_ARRAY_SHADOW:
2829       assert(tex->tex.levelZero);
2830       return 0x9;
2831    case TEX_TARGET_3D:
2832       if (tex->tex.levelZero)
2833          return 0xb;
2834       assert(tex->op != OP_TXL);
2835       return 0xa;
2836    case TEX_TARGET_CUBE:
2837       assert(!tex->tex.levelZero);
2838       if (tex->op == OP_TXL)
2839          return 0xd;
2840       return 0xc;
2841    default:
2842       assert(false);
2843       return 0x0;
2844    }
2845 }
2846 
2847 static uint8_t
getTLDSTarget(const TexInstruction * tex)2848 getTLDSTarget(const TexInstruction *tex)
2849 {
2850    switch (tex->tex.target.getEnum()) {
2851    case TEX_TARGET_1D:
2852       if (tex->tex.levelZero)
2853          return 0x0;
2854       return 0x1;
2855    case TEX_TARGET_2D:
2856    case TEX_TARGET_RECT:
2857       if (tex->tex.levelZero)
2858          return tex->tex.useOffsets ? 0x4 : 0x2;
2859       return tex->tex.useOffsets ? 0xc : 0x5;
2860    case TEX_TARGET_2D_MS:
2861       assert(tex->tex.levelZero);
2862       return 0x6;
2863    case TEX_TARGET_3D:
2864       assert(tex->tex.levelZero);
2865       return 0x7;
2866    case TEX_TARGET_2D_ARRAY:
2867       assert(tex->tex.levelZero);
2868       return 0x8;
2869 
2870    default:
2871       assert(false);
2872       return 0x0;
2873    }
2874 }
2875 
2876 void
emitTEX()2877 CodeEmitterGM107::emitTEX()
2878 {
2879    const TexInstruction *insn = this->insn->asTex();
2880    int lodm = 0;
2881 
2882    if (!insn->tex.levelZero) {
2883       switch (insn->op) {
2884       case OP_TEX: lodm = 0; break;
2885       case OP_TXB: lodm = 2; break;
2886       case OP_TXL: lodm = 3; break;
2887       default:
2888          assert(!"invalid tex op");
2889          break;
2890       }
2891    } else {
2892       lodm = 1;
2893    }
2894 
2895    if (insn->tex.rIndirectSrc >= 0) {
2896       emitInsn (0xdeb80000);
2897       emitField(0x25, 2, lodm);
2898       emitField(0x24, 1, insn->tex.useOffsets == 1);
2899    } else {
2900       emitInsn (0xc0380000);
2901       emitField(0x37, 2, lodm);
2902       emitField(0x36, 1, insn->tex.useOffsets == 1);
2903       emitField(0x24, 13, insn->tex.r);
2904    }
2905 
2906    emitField(0x32, 1, insn->tex.target.isShadow());
2907    emitField(0x31, 1, insn->tex.liveOnly);
2908    emitField(0x23, 1, insn->tex.derivAll);
2909    emitField(0x1f, 4, insn->tex.mask);
2910    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2911                       insn->tex.target.getDim() - 1);
2912    emitField(0x1c, 1, insn->tex.target.isArray());
2913    emitTEXs (0x14);
2914    emitGPR  (0x08, insn->src(0));
2915    emitGPR  (0x00, insn->def(0));
2916 }
2917 
2918 void
emitTEXS()2919 CodeEmitterGM107::emitTEXS()
2920 {
2921    const TexInstruction *insn = this->insn->asTex();
2922    assert(!insn->tex.derivAll);
2923 
2924    switch (insn->op) {
2925    case OP_TEX:
2926    case OP_TXL:
2927       emitInsn (0xd8000000);
2928       emitField(0x35, 4, getTEXSTarget(insn));
2929       emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2930       break;
2931    case OP_TXF:
2932       emitInsn (0xda000000);
2933       emitField(0x35, 4, getTLDSTarget(insn));
2934       emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2935       break;
2936    case OP_TXG:
2937       assert(insn->tex.useOffsets != 4);
2938       emitInsn (0xdf000000);
2939       emitField(0x34, 2, insn->tex.gatherComp);
2940       emitField(0x33, 1, insn->tex.useOffsets == 1);
2941       emitField(0x32, 1, insn->tex.target.isShadow());
2942       break;
2943    default:
2944       unreachable("unknown op in emitTEXS()");
2945       break;
2946    }
2947 
2948    emitField(0x31, 1, insn->tex.liveOnly);
2949    emitField(0x24, 13, insn->tex.r);
2950    if (insn->defExists(1))
2951       emitGPR(0x1c, insn->def(1));
2952    else
2953       emitGPR(0x1c);
2954    if (insn->srcExists(1))
2955       emitGPR(0x14, insn->getSrc(1));
2956    else
2957       emitGPR(0x14);
2958    emitGPR  (0x08, insn->src(0));
2959    emitGPR  (0x00, insn->def(0));
2960 }
2961 
2962 void
emitTLD()2963 CodeEmitterGM107::emitTLD()
2964 {
2965    const TexInstruction *insn = this->insn->asTex();
2966 
2967    if (insn->tex.rIndirectSrc >= 0) {
2968       emitInsn (0xdd380000);
2969    } else {
2970       emitInsn (0xdc380000);
2971       emitField(0x24, 13, insn->tex.r);
2972    }
2973 
2974    emitField(0x37, 1, insn->tex.levelZero == 0);
2975    emitField(0x32, 1, insn->tex.target.isMS());
2976    emitField(0x31, 1, insn->tex.liveOnly);
2977    emitField(0x23, 1, insn->tex.useOffsets == 1);
2978    emitField(0x1f, 4, insn->tex.mask);
2979    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2980                       insn->tex.target.getDim() - 1);
2981    emitField(0x1c, 1, insn->tex.target.isArray());
2982    emitTEXs (0x14);
2983    emitGPR  (0x08, insn->src(0));
2984    emitGPR  (0x00, insn->def(0));
2985 }
2986 
2987 void
emitTLD4()2988 CodeEmitterGM107::emitTLD4()
2989 {
2990    const TexInstruction *insn = this->insn->asTex();
2991 
2992    if (insn->tex.rIndirectSrc >= 0) {
2993       emitInsn (0xdef80000);
2994       emitField(0x26, 2, insn->tex.gatherComp);
2995       emitField(0x25, 2, insn->tex.useOffsets == 4);
2996       emitField(0x24, 2, insn->tex.useOffsets == 1);
2997    } else {
2998       emitInsn (0xc8380000);
2999       emitField(0x38, 2, insn->tex.gatherComp);
3000       emitField(0x37, 2, insn->tex.useOffsets == 4);
3001       emitField(0x36, 2, insn->tex.useOffsets == 1);
3002       emitField(0x24, 13, insn->tex.r);
3003    }
3004 
3005    emitField(0x32, 1, insn->tex.target.isShadow());
3006    emitField(0x31, 1, insn->tex.liveOnly);
3007    emitField(0x23, 1, insn->tex.derivAll);
3008    emitField(0x1f, 4, insn->tex.mask);
3009    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3010                       insn->tex.target.getDim() - 1);
3011    emitField(0x1c, 1, insn->tex.target.isArray());
3012    emitTEXs (0x14);
3013    emitGPR  (0x08, insn->src(0));
3014    emitGPR  (0x00, insn->def(0));
3015 }
3016 
3017 void
emitTXD()3018 CodeEmitterGM107::emitTXD()
3019 {
3020    const TexInstruction *insn = this->insn->asTex();
3021 
3022    if (insn->tex.rIndirectSrc >= 0) {
3023       emitInsn (0xde780000);
3024    } else {
3025       emitInsn (0xde380000);
3026       emitField(0x24, 13, insn->tex.r);
3027    }
3028 
3029    emitField(0x31, 1, insn->tex.liveOnly);
3030    emitField(0x23, 1, insn->tex.useOffsets == 1);
3031    emitField(0x1f, 4, insn->tex.mask);
3032    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3033                       insn->tex.target.getDim() - 1);
3034    emitField(0x1c, 1, insn->tex.target.isArray());
3035    emitTEXs (0x14);
3036    emitGPR  (0x08, insn->src(0));
3037    emitGPR  (0x00, insn->def(0));
3038 }
3039 
3040 void
emitTMML()3041 CodeEmitterGM107::emitTMML()
3042 {
3043    const TexInstruction *insn = this->insn->asTex();
3044 
3045    if (insn->tex.rIndirectSrc >= 0) {
3046       emitInsn (0xdf600000);
3047    } else {
3048       emitInsn (0xdf580000);
3049       emitField(0x24, 13, insn->tex.r);
3050    }
3051 
3052    emitField(0x31, 1, insn->tex.liveOnly);
3053    emitField(0x23, 1, insn->tex.derivAll);
3054    emitField(0x1f, 4, insn->tex.mask);
3055    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3056                       insn->tex.target.getDim() - 1);
3057    emitField(0x1c, 1, insn->tex.target.isArray());
3058    emitTEXs (0x14);
3059    emitGPR  (0x08, insn->src(0));
3060    emitGPR  (0x00, insn->def(0));
3061 }
3062 
3063 void
emitTXQ()3064 CodeEmitterGM107::emitTXQ()
3065 {
3066    const TexInstruction *insn = this->insn->asTex();
3067    int type = 0;
3068 
3069    switch (insn->tex.query) {
3070    case TXQ_DIMS           : type = 0x01; break;
3071    case TXQ_TYPE           : type = 0x02; break;
3072    case TXQ_SAMPLE_POSITION: type = 0x05; break;
3073    case TXQ_FILTER         : type = 0x10; break;
3074    case TXQ_LOD            : type = 0x12; break;
3075    case TXQ_WRAP           : type = 0x14; break;
3076    case TXQ_BORDER_COLOUR  : type = 0x16; break;
3077    default:
3078       assert(!"invalid txq query");
3079       break;
3080    }
3081 
3082    if (insn->tex.rIndirectSrc >= 0) {
3083       emitInsn (0xdf500000);
3084    } else {
3085       emitInsn (0xdf480000);
3086       emitField(0x24, 13, insn->tex.r);
3087    }
3088 
3089    emitField(0x31, 1, insn->tex.liveOnly);
3090    emitField(0x1f, 4, insn->tex.mask);
3091    emitField(0x16, 6, type);
3092    emitGPR  (0x08, insn->src(0));
3093    emitGPR  (0x00, insn->def(0));
3094 }
3095 
3096 void
emitDEPBAR()3097 CodeEmitterGM107::emitDEPBAR()
3098 {
3099    emitInsn (0xf0f00000);
3100    emitField(0x1d, 1, 1); /* le */
3101    emitField(0x1a, 3, 5);
3102    emitField(0x14, 6, insn->subOp);
3103    emitField(0x00, 6, insn->subOp);
3104 }
3105 
3106 /*******************************************************************************
3107  * misc
3108  ******************************************************************************/
3109 
3110 void
emitNOP()3111 CodeEmitterGM107::emitNOP()
3112 {
3113    emitInsn(0x50b00000);
3114 }
3115 
3116 void
emitKIL()3117 CodeEmitterGM107::emitKIL()
3118 {
3119    emitInsn (0xe3300000);
3120    emitCond5(0x00, CC_TR);
3121 }
3122 
3123 void
emitOUT()3124 CodeEmitterGM107::emitOUT()
3125 {
3126    const int cut  = insn->op == OP_RESTART || insn->subOp;
3127    const int emit = insn->op == OP_EMIT;
3128 
3129    switch (insn->src(1).getFile()) {
3130    case FILE_GPR:
3131       emitInsn(0xfbe00000);
3132       emitGPR (0x14, insn->src(1));
3133       break;
3134    case FILE_IMMEDIATE:
3135       emitInsn(0xf6e00000);
3136       emitIMMD(0x14, 19, insn->src(1));
3137       break;
3138    case FILE_MEMORY_CONST:
3139       emitInsn(0xebe00000);
3140       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3141       break;
3142    default:
3143       assert(!"bad src1 file");
3144       break;
3145    }
3146 
3147    emitField(0x27, 2, (cut << 1) | emit);
3148    emitGPR  (0x08, insn->src(0));
3149    emitGPR  (0x00, insn->def(0));
3150 }
3151 
3152 void
emitBAR()3153 CodeEmitterGM107::emitBAR()
3154 {
3155    uint8_t subop;
3156 
3157    emitInsn (0xf0a80000);
3158 
3159    switch (insn->subOp) {
3160    case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3161    case NV50_IR_SUBOP_BAR_RED_AND:  subop = 0x0a; break;
3162    case NV50_IR_SUBOP_BAR_RED_OR:   subop = 0x12; break;
3163    case NV50_IR_SUBOP_BAR_ARRIVE:   subop = 0x81; break;
3164    default:
3165       subop = 0x80;
3166       assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3167       break;
3168    }
3169 
3170    emitField(0x20, 8, subop);
3171 
3172    // barrier id
3173    if (insn->src(0).getFile() == FILE_GPR) {
3174       emitGPR(0x08, insn->src(0));
3175    } else {
3176       ImmediateValue *imm = insn->getSrc(0)->asImm();
3177       assert(imm);
3178       emitField(0x08, 8, imm->reg.data.u32);
3179       emitField(0x2b, 1, 1);
3180    }
3181 
3182    // thread count
3183    if (insn->src(1).getFile() == FILE_GPR) {
3184       emitGPR(0x14, insn->src(1));
3185    } else {
3186       ImmediateValue *imm = insn->getSrc(0)->asImm();
3187       assert(imm);
3188       emitField(0x14, 12, imm->reg.data.u32);
3189       emitField(0x2c, 1, 1);
3190    }
3191 
3192    if (insn->srcExists(2) && (insn->predSrc != 2)) {
3193       emitPRED (0x27, insn->src(2));
3194       emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3195    } else {
3196       emitField(0x27, 3, 7);
3197    }
3198 }
3199 
3200 void
emitMEMBAR()3201 CodeEmitterGM107::emitMEMBAR()
3202 {
3203    emitInsn (0xef980000);
3204    emitField(0x08, 2, insn->subOp >> 2);
3205 }
3206 
3207 void
emitVOTE()3208 CodeEmitterGM107::emitVOTE()
3209 {
3210    const ImmediateValue *imm;
3211    uint32_t u32;
3212 
3213    int r = -1, p = -1;
3214    for (int i = 0; insn->defExists(i); i++) {
3215       if (insn->def(i).getFile() == FILE_GPR)
3216          r = i;
3217       else if (insn->def(i).getFile() == FILE_PREDICATE)
3218          p = i;
3219    }
3220 
3221    emitInsn (0x50d80000);
3222    emitField(0x30, 2, insn->subOp);
3223    if (r >= 0)
3224       emitGPR  (0x00, insn->def(r));
3225    else
3226       emitGPR  (0x00);
3227    if (p >= 0)
3228       emitPRED (0x2d, insn->def(p));
3229    else
3230       emitPRED (0x2d);
3231 
3232    switch (insn->src(0).getFile()) {
3233    case FILE_PREDICATE:
3234       emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3235       emitPRED (0x27, insn->src(0));
3236       break;
3237    case FILE_IMMEDIATE:
3238       imm = insn->getSrc(0)->asImm();
3239       assert(imm);
3240       u32 = imm->reg.data.u32;
3241       assert(u32 == 0 || u32 == 1);
3242       emitPRED(0x27);
3243       emitField(0x2a, 1, u32 == 0);
3244       break;
3245    default:
3246       assert(!"Unhandled src");
3247       break;
3248    }
3249 }
3250 
3251 void
emitSUTarget()3252 CodeEmitterGM107::emitSUTarget()
3253 {
3254    const TexInstruction *insn = this->insn->asTex();
3255    int target = 0;
3256 
3257    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3258 
3259    if (insn->tex.target == TEX_TARGET_BUFFER) {
3260       target = 2;
3261    } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3262       target = 4;
3263    } else if (insn->tex.target == TEX_TARGET_2D ||
3264               insn->tex.target == TEX_TARGET_RECT) {
3265       target = 6;
3266    } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3267               insn->tex.target == TEX_TARGET_CUBE ||
3268               insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3269       target = 8;
3270    } else if (insn->tex.target == TEX_TARGET_3D) {
3271       target = 10;
3272    } else {
3273       assert(insn->tex.target == TEX_TARGET_1D);
3274    }
3275    emitField(0x20, 4, target);
3276 }
3277 
3278 void
emitSUHandle(const int s)3279 CodeEmitterGM107::emitSUHandle(const int s)
3280 {
3281    const TexInstruction *insn = this->insn->asTex();
3282 
3283    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3284 
3285    if (insn->src(s).getFile() == FILE_GPR) {
3286       emitGPR(0x27, insn->src(s));
3287    } else {
3288       ImmediateValue *imm = insn->getSrc(s)->asImm();
3289       assert(imm);
3290       emitField(0x33, 1, 1);
3291       emitField(0x24, 13, imm->reg.data.u32);
3292    }
3293 }
3294 
3295 void
emitSUSTx()3296 CodeEmitterGM107::emitSUSTx()
3297 {
3298    const TexInstruction *insn = this->insn->asTex();
3299 
3300    emitInsn(0xeb200000);
3301    if (insn->op == OP_SUSTB)
3302       emitField(0x34, 1, 1);
3303    emitSUTarget();
3304 
3305    emitLDSTc(0x18);
3306    emitField(0x14, 4, 0xf); // rgba
3307    emitGPR  (0x08, insn->src(0));
3308    emitGPR  (0x00, insn->src(1));
3309 
3310    emitSUHandle(2);
3311 }
3312 
3313 void
emitSULDx()3314 CodeEmitterGM107::emitSULDx()
3315 {
3316    const TexInstruction *insn = this->insn->asTex();
3317    int type = 0;
3318 
3319    emitInsn(0xeb000000);
3320    if (insn->op == OP_SULDB)
3321       emitField(0x34, 1, 1);
3322    emitSUTarget();
3323 
3324    switch (insn->dType) {
3325    case TYPE_S8:   type = 1; break;
3326    case TYPE_U16:  type = 2; break;
3327    case TYPE_S16:  type = 3; break;
3328    case TYPE_U32:  type = 4; break;
3329    case TYPE_U64:  type = 5; break;
3330    case TYPE_B128: type = 6; break;
3331    default:
3332       assert(insn->dType == TYPE_U8);
3333       break;
3334    }
3335    emitLDSTc(0x18);
3336    emitField(0x14, 3, type);
3337    emitGPR  (0x00, insn->def(0));
3338    emitGPR  (0x08, insn->src(0));
3339 
3340    emitSUHandle(1);
3341 }
3342 
3343 void
emitSUREDx()3344 CodeEmitterGM107::emitSUREDx()
3345 {
3346    const TexInstruction *insn = this->insn->asTex();
3347    uint8_t type = 0, subOp;
3348 
3349    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3350       emitInsn(0xeac00000);
3351    else
3352       emitInsn(0xea600000);
3353 
3354    if (insn->op == OP_SUREDB)
3355       emitField(0x34, 1, 1);
3356    emitSUTarget();
3357 
3358    // destination type
3359    switch (insn->dType) {
3360    case TYPE_S32: type = 1; break;
3361    case TYPE_U64: type = 2; break;
3362    case TYPE_F32: type = 3; break;
3363    case TYPE_S64: type = 5; break;
3364    default:
3365       assert(insn->dType == TYPE_U32);
3366       break;
3367    }
3368 
3369    // atomic operation
3370    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3371       subOp = 0;
3372    } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3373       subOp = 8;
3374    } else {
3375       subOp = insn->subOp;
3376    }
3377 
3378    emitField(0x24, 3, type);
3379    emitField(0x1d, 4, subOp);
3380    emitGPR  (0x14, insn->src(1));
3381    emitGPR  (0x08, insn->src(0));
3382    emitGPR  (0x00, insn->def(0));
3383 
3384    emitSUHandle(2);
3385 }
3386 
3387 /*******************************************************************************
3388  * assembler front-end
3389  ******************************************************************************/
3390 
3391 bool
emitInstruction(Instruction * i)3392 CodeEmitterGM107::emitInstruction(Instruction *i)
3393 {
3394    const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3395    bool ret = true;
3396 
3397    insn = i;
3398 
3399    if (insn->encSize != 8) {
3400       ERROR("skipping undecodable instruction: "); insn->print();
3401       return false;
3402    } else
3403    if (codeSize + size > codeSizeLimit) {
3404       ERROR("code emitter output buffer too small\n");
3405       return false;
3406    }
3407 
3408    if (writeIssueDelays) {
3409       int n = ((codeSize & 0x1f) / 8) - 1;
3410       if (n < 0) {
3411          data = code;
3412          data[0] = 0x00000000;
3413          data[1] = 0x00000000;
3414          code += 2;
3415          codeSize += 8;
3416          n++;
3417       }
3418 
3419       emitField(data, n * 21, 21, insn->sched);
3420    }
3421 
3422    switch (insn->op) {
3423    case OP_EXIT:
3424       emitEXIT();
3425       break;
3426    case OP_BRA:
3427       emitBRA();
3428       break;
3429    case OP_CALL:
3430       emitCAL();
3431       break;
3432    case OP_PRECONT:
3433       emitPCNT();
3434       break;
3435    case OP_CONT:
3436       emitCONT();
3437       break;
3438    case OP_PREBREAK:
3439       emitPBK();
3440       break;
3441    case OP_BREAK:
3442       emitBRK();
3443       break;
3444    case OP_PRERET:
3445       emitPRET();
3446       break;
3447    case OP_RET:
3448       emitRET();
3449       break;
3450    case OP_JOINAT:
3451       emitSSY();
3452       break;
3453    case OP_JOIN:
3454       emitSYNC();
3455       break;
3456    case OP_QUADON:
3457       emitSAM();
3458       break;
3459    case OP_QUADPOP:
3460       emitRAM();
3461       break;
3462    case OP_MOV:
3463       emitMOV();
3464       break;
3465    case OP_RDSV:
3466       if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3467          emitCS2R();
3468       else
3469          emitS2R();
3470       break;
3471    case OP_ABS:
3472    case OP_NEG:
3473    case OP_SAT:
3474    case OP_FLOOR:
3475    case OP_CEIL:
3476    case OP_TRUNC:
3477    case OP_CVT:
3478       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3479                                  insn->src(0).getFile() == FILE_PREDICATE)) {
3480          emitMOV();
3481       } else if (isFloatType(insn->dType)) {
3482          if (isFloatType(insn->sType))
3483             emitF2F();
3484          else
3485             emitI2F();
3486       } else {
3487          if (isFloatType(insn->sType))
3488             emitF2I();
3489          else
3490             emitI2I();
3491       }
3492       break;
3493    case OP_SHFL:
3494       emitSHFL();
3495       break;
3496    case OP_ADD:
3497    case OP_SUB:
3498       if (isFloatType(insn->dType)) {
3499          if (insn->dType == TYPE_F64)
3500             emitDADD();
3501          else
3502             emitFADD();
3503       } else {
3504          emitIADD();
3505       }
3506       break;
3507    case OP_MUL:
3508       if (isFloatType(insn->dType)) {
3509          if (insn->dType == TYPE_F64)
3510             emitDMUL();
3511          else
3512             emitFMUL();
3513       } else {
3514          emitIMUL();
3515       }
3516       break;
3517    case OP_MAD:
3518    case OP_FMA:
3519       if (isFloatType(insn->dType)) {
3520          if (insn->dType == TYPE_F64)
3521             emitDFMA();
3522          else
3523             emitFFMA();
3524       } else {
3525          emitIMAD();
3526       }
3527       break;
3528    case OP_SHLADD:
3529       emitISCADD();
3530       break;
3531    case OP_XMAD:
3532       emitXMAD();
3533       break;
3534    case OP_MIN:
3535    case OP_MAX:
3536       if (isFloatType(insn->dType)) {
3537          if (insn->dType == TYPE_F64)
3538             emitDMNMX();
3539          else
3540             emitFMNMX();
3541       } else {
3542          emitIMNMX();
3543       }
3544       break;
3545    case OP_SHL:
3546       if (typeSizeof(insn->sType) == 8)
3547          emitSHF();
3548       else
3549          emitSHL();
3550       break;
3551    case OP_SHR:
3552       if (typeSizeof(insn->sType) == 8)
3553          emitSHF();
3554       else
3555          emitSHR();
3556       break;
3557    case OP_POPCNT:
3558       emitPOPC();
3559       break;
3560    case OP_INSBF:
3561       emitBFI();
3562       break;
3563    case OP_EXTBF:
3564       emitBFE();
3565       break;
3566    case OP_BFIND:
3567       emitFLO();
3568       break;
3569    case OP_PERMT:
3570       emitPRMT();
3571       break;
3572    case OP_SLCT:
3573       if (isFloatType(insn->dType))
3574          emitFCMP();
3575       else
3576          emitICMP();
3577       break;
3578    case OP_SET:
3579    case OP_SET_AND:
3580    case OP_SET_OR:
3581    case OP_SET_XOR:
3582       if (insn->def(0).getFile() != FILE_PREDICATE) {
3583          if (isFloatType(insn->sType))
3584             if (insn->sType == TYPE_F64)
3585                emitDSET();
3586             else
3587                emitFSET();
3588          else
3589             emitISET();
3590       } else {
3591          if (isFloatType(insn->sType))
3592             if (insn->sType == TYPE_F64)
3593                emitDSETP();
3594             else
3595                emitFSETP();
3596          else
3597             emitISETP();
3598       }
3599       break;
3600    case OP_SELP:
3601       emitSEL();
3602       break;
3603    case OP_PRESIN:
3604    case OP_PREEX2:
3605       emitRRO();
3606       break;
3607    case OP_COS:
3608    case OP_SIN:
3609    case OP_EX2:
3610    case OP_LG2:
3611    case OP_RCP:
3612    case OP_RSQ:
3613    case OP_SQRT:
3614       emitMUFU();
3615       break;
3616    case OP_AND:
3617    case OP_OR:
3618    case OP_XOR:
3619       switch (insn->def(0).getFile()) {
3620       case FILE_GPR: emitLOP(); break;
3621       case FILE_PREDICATE: emitPSETP(); break;
3622       default:
3623          assert(!"invalid bool op");
3624       }
3625       break;
3626    case OP_NOT:
3627       emitNOT();
3628       break;
3629    case OP_LOAD:
3630       switch (insn->src(0).getFile()) {
3631       case FILE_MEMORY_CONST : emitLDC(); break;
3632       case FILE_MEMORY_LOCAL : emitLDL(); break;
3633       case FILE_MEMORY_SHARED: emitLDS(); break;
3634       case FILE_MEMORY_GLOBAL: emitLD(); break;
3635       default:
3636          assert(!"invalid load");
3637          emitNOP();
3638          break;
3639       }
3640       break;
3641    case OP_STORE:
3642       switch (insn->src(0).getFile()) {
3643       case FILE_MEMORY_LOCAL : emitSTL(); break;
3644       case FILE_MEMORY_SHARED: emitSTS(); break;
3645       case FILE_MEMORY_GLOBAL: emitST(); break;
3646       default:
3647          assert(!"invalid store");
3648          emitNOP();
3649          break;
3650       }
3651       break;
3652    case OP_ATOM:
3653       if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3654          emitATOMS();
3655       else
3656          if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3657             emitRED();
3658          else
3659             emitATOM();
3660       break;
3661    case OP_CCTL:
3662       emitCCTL();
3663       break;
3664    case OP_VFETCH:
3665       emitALD();
3666       break;
3667    case OP_EXPORT:
3668       emitAST();
3669       break;
3670    case OP_PFETCH:
3671       emitISBERD();
3672       break;
3673    case OP_AFETCH:
3674       emitAL2P();
3675       break;
3676    case OP_LINTERP:
3677    case OP_PINTERP:
3678       emitIPA();
3679       break;
3680    case OP_PIXLD:
3681       emitPIXLD();
3682       break;
3683    case OP_TEX:
3684    case OP_TXL:
3685       if (insn->asTex()->tex.scalar)
3686          emitTEXS();
3687       else
3688          emitTEX();
3689       break;
3690    case OP_TXB:
3691       emitTEX();
3692       break;
3693    case OP_TXF:
3694       if (insn->asTex()->tex.scalar)
3695          emitTEXS();
3696       else
3697          emitTLD();
3698       break;
3699    case OP_TXG:
3700       if (insn->asTex()->tex.scalar)
3701          emitTEXS();
3702       else
3703          emitTLD4();
3704       break;
3705    case OP_TXD:
3706       emitTXD();
3707       break;
3708    case OP_TXQ:
3709       emitTXQ();
3710       break;
3711    case OP_TXLQ:
3712       emitTMML();
3713       break;
3714    case OP_TEXBAR:
3715       emitDEPBAR();
3716       break;
3717    case OP_QUADOP:
3718       emitFSWZADD();
3719       break;
3720    case OP_NOP:
3721       emitNOP();
3722       break;
3723    case OP_DISCARD:
3724       emitKIL();
3725       break;
3726    case OP_EMIT:
3727    case OP_RESTART:
3728       emitOUT();
3729       break;
3730    case OP_BAR:
3731       emitBAR();
3732       break;
3733    case OP_MEMBAR:
3734       emitMEMBAR();
3735       break;
3736    case OP_VOTE:
3737       emitVOTE();
3738       break;
3739    case OP_SUSTB:
3740    case OP_SUSTP:
3741       emitSUSTx();
3742       break;
3743    case OP_SULDB:
3744    case OP_SULDP:
3745       emitSULDx();
3746       break;
3747    case OP_SUREDB:
3748    case OP_SUREDP:
3749       emitSUREDx();
3750       break;
3751    default:
3752       assert(!"invalid opcode");
3753       emitNOP();
3754       ret = false;
3755       break;
3756    }
3757 
3758    if (insn->join) {
3759       /*XXX*/
3760    }
3761 
3762    code += 2;
3763    codeSize += 8;
3764    return ret;
3765 }
3766 
3767 uint32_t
getMinEncodingSize(const Instruction * i) const3768 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3769 {
3770    return 8;
3771 }
3772 
3773 /*******************************************************************************
3774  * sched data calculator
3775  ******************************************************************************/
3776 
3777 inline void
emitStall(Instruction * insn,uint8_t cnt)3778 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3779 {
3780    assert(cnt < 16);
3781    insn->sched |= cnt;
3782 }
3783 
3784 inline void
emitYield(Instruction * insn)3785 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3786 {
3787    insn->sched |= 1 << 4;
3788 }
3789 
3790 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3791 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3792 {
3793    assert(id < 6);
3794    if ((insn->sched & 0xe0) == 0xe0)
3795       insn->sched ^= 0xe0;
3796    insn->sched |= id << 5;
3797 }
3798 
3799 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3800 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3801 {
3802    assert(id < 6);
3803    if ((insn->sched & 0x700) == 0x700)
3804       insn->sched ^= 0x700;
3805    insn->sched |= id << 8;
3806 }
3807 
3808 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3809 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3810 {
3811    assert(id < 6);
3812    insn->sched |= 1 << (11 + id);
3813 }
3814 
3815 inline void
emitReuse(Instruction * insn,uint8_t id)3816 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3817 {
3818    assert(id < 4);
3819    insn->sched |= 1 << (17 + id);
3820 }
3821 
3822 inline void
printSchedInfo(int cycle,const Instruction * insn) const3823 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3824                                          const Instruction *insn) const
3825 {
3826    uint8_t st, yl, wr, rd, wt, ru;
3827 
3828    st = (insn->sched & 0x00000f) >> 0;
3829    yl = (insn->sched & 0x000010) >> 4;
3830    wr = (insn->sched & 0x0000e0) >> 5;
3831    rd = (insn->sched & 0x000700) >> 8;
3832    wt = (insn->sched & 0x01f800) >> 11;
3833    ru = (insn->sched & 0x1e0000) >> 17;
3834 
3835    INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3836         cycle, st, yl, wr, rd, wt, ru);
3837 }
3838 
3839 inline int
getStall(const Instruction * insn) const3840 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3841 {
3842    return insn->sched & 0xf;
3843 }
3844 
3845 inline int
getWrDepBar(const Instruction * insn) const3846 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3847 {
3848    return (insn->sched & 0x0000e0) >> 5;
3849 }
3850 
3851 inline int
getRdDepBar(const Instruction * insn) const3852 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3853 {
3854    return (insn->sched & 0x000700) >> 8;
3855 }
3856 
3857 inline int
getWtDepBar(const Instruction * insn) const3858 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3859 {
3860    return (insn->sched & 0x01f800) >> 11;
3861 }
3862 
3863 // Emit the reuse flag which allows to make use of the new memory hierarchy
3864 // introduced since Maxwell, the operand reuse cache.
3865 //
3866 // It allows to reduce bank conflicts by caching operands. Each time you issue
3867 // an instruction, that flag can tell the hw which operands are going to be
3868 // re-used by the next instruction. Note that the next instruction has to use
3869 // the same GPR id in the same operand slot.
3870 void
setReuseFlag(Instruction * insn)3871 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3872 {
3873    Instruction *next = insn->next;
3874    BitSet defs(255, 1);
3875 
3876    if (!targ->isReuseSupported(insn))
3877       return;
3878 
3879    for (int d = 0; insn->defExists(d); ++d) {
3880       const Value *def = insn->def(d).rep();
3881       if (insn->def(d).getFile() != FILE_GPR)
3882          continue;
3883       if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3884          continue;
3885       defs.set(def->reg.data.id);
3886    }
3887 
3888    for (int s = 0; insn->srcExists(s); s++) {
3889       const Value *src = insn->src(s).rep();
3890       if (insn->src(s).getFile() != FILE_GPR)
3891          continue;
3892       if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3893          continue;
3894       if (defs.test(src->reg.data.id))
3895          continue;
3896       if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3897          continue;
3898       if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3899          continue;
3900       assert(s < 4);
3901       emitReuse(insn, s);
3902    }
3903 }
3904 
3905 void
recordWr(const Value * v,int cycle,int ready)3906 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3907 {
3908    int a = v->reg.data.id, b;
3909 
3910    switch (v->reg.file) {
3911    case FILE_GPR:
3912       b = a + v->reg.size / 4;
3913       for (int r = a; r < b; ++r)
3914          score->rd.r[r] = ready;
3915       break;
3916    case FILE_PREDICATE:
3917       // To immediately use a predicate set by any instructions, the minimum
3918       // number of stall counts is 13.
3919       score->rd.p[a] = cycle + 13;
3920       break;
3921    case FILE_FLAGS:
3922       score->rd.c = ready;
3923       break;
3924    default:
3925       break;
3926    }
3927 }
3928 
3929 void
checkRd(const Value * v,int cycle,int & delay) const3930 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3931 {
3932    int a = v->reg.data.id, b;
3933    int ready = cycle;
3934 
3935    switch (v->reg.file) {
3936    case FILE_GPR:
3937       b = a + v->reg.size / 4;
3938       for (int r = a; r < b; ++r)
3939          ready = MAX2(ready, score->rd.r[r]);
3940       break;
3941    case FILE_PREDICATE:
3942       ready = MAX2(ready, score->rd.p[a]);
3943       break;
3944    case FILE_FLAGS:
3945       ready = MAX2(ready, score->rd.c);
3946       break;
3947    default:
3948       break;
3949    }
3950    if (cycle < ready)
3951       delay = MAX2(delay, ready - cycle);
3952 }
3953 
3954 void
commitInsn(const Instruction * insn,int cycle)3955 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3956 {
3957    const int ready = cycle + targ->getLatency(insn);
3958 
3959    for (int d = 0; insn->defExists(d); ++d)
3960       recordWr(insn->getDef(d), cycle, ready);
3961 
3962 #ifdef GM107_DEBUG_SCHED_DATA
3963    score->print(cycle);
3964 #endif
3965 }
3966 
3967 #define GM107_MIN_ISSUE_DELAY 0x1
3968 #define GM107_MAX_ISSUE_DELAY 0xf
3969 
3970 int
calcDelay(const Instruction * insn,int cycle) const3971 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3972 {
3973    int delay = 0, ready = cycle;
3974 
3975    for (int s = 0; insn->srcExists(s); ++s)
3976       checkRd(insn->getSrc(s), cycle, delay);
3977 
3978    // TODO: make use of getReadLatency()!
3979 
3980    return MAX2(delay, ready - cycle);
3981 }
3982 
3983 void
setDelay(Instruction * insn,int delay,const Instruction * next)3984 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3985                                    const Instruction *next)
3986 {
3987    const OpClass cl = targ->getOpClass(insn->op);
3988    int wr, rd;
3989 
3990    if (insn->op == OP_EXIT ||
3991        insn->op == OP_BAR ||
3992        insn->op == OP_MEMBAR) {
3993       delay = GM107_MAX_ISSUE_DELAY;
3994    } else
3995    if (insn->op == OP_QUADON ||
3996        insn->op == OP_QUADPOP) {
3997       delay = 0xd;
3998    } else
3999    if (cl == OPCLASS_FLOW || insn->join) {
4000       delay = 0xd;
4001    }
4002 
4003    if (!next || !targ->canDualIssue(insn, next)) {
4004       delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4005    } else {
4006       delay = 0x0; // dual-issue
4007    }
4008 
4009    wr = getWrDepBar(insn);
4010    rd = getRdDepBar(insn);
4011 
4012    if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4013       // Barriers take one additional clock cycle to become active on top of
4014       // the clock consumed by the instruction producing it.
4015       if (!next || insn->bb != next->bb) {
4016          delay = 0x2;
4017       } else {
4018          int wt = getWtDepBar(next);
4019          if ((wt & (1 << wr)) | (wt & (1 << rd)))
4020             delay = 0x2;
4021       }
4022    }
4023 
4024    emitStall(insn, delay);
4025 }
4026 
4027 
4028 // Return true when the given instruction needs to emit a read dependency
4029 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4030 // setting the maximum number of stall counts is not enough.
4031 bool
needRdDepBar(const Instruction * insn) const4032 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4033 {
4034    BitSet srcs(255, 1), defs(255, 1);
4035    int a, b;
4036 
4037    if (!targ->isBarrierRequired(insn))
4038       return false;
4039 
4040    // Do not emit a read dependency barrier when the instruction doesn't use
4041    // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4042    for (int s = 0; insn->srcExists(s); ++s) {
4043       const Value *src = insn->src(s).rep();
4044       if (insn->src(s).getFile() != FILE_GPR)
4045          continue;
4046       if (src->reg.data.id == 255)
4047          continue;
4048 
4049       a = src->reg.data.id;
4050       b = a + src->reg.size / 4;
4051       for (int r = a; r < b; ++r)
4052          srcs.set(r);
4053    }
4054 
4055    if (!srcs.popCount())
4056       return false;
4057 
4058    // Do not emit a read dependency barrier when the output GPRs are equal to
4059    // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4060    // be produced and WaR hazards are prevented.
4061    for (int d = 0; insn->defExists(d); ++d) {
4062       const Value *def = insn->def(d).rep();
4063       if (insn->def(d).getFile() != FILE_GPR)
4064          continue;
4065       if (def->reg.data.id == 255)
4066          continue;
4067 
4068       a = def->reg.data.id;
4069       b = a + def->reg.size / 4;
4070       for (int r = a; r < b; ++r)
4071          defs.set(r);
4072    }
4073 
4074    srcs.andNot(defs);
4075    if (!srcs.popCount())
4076       return false;
4077 
4078    return true;
4079 }
4080 
4081 // Return true when the given instruction needs to emit a write dependency
4082 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4083 // setting the maximum number of stall counts is not enough. This is only legal
4084 // if the instruction output something.
4085 bool
needWrDepBar(const Instruction * insn) const4086 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4087 {
4088    if (!targ->isBarrierRequired(insn))
4089       return false;
4090 
4091    for (int d = 0; insn->defExists(d); ++d) {
4092       if (insn->def(d).getFile() == FILE_GPR ||
4093           insn->def(d).getFile() == FILE_FLAGS ||
4094           insn->def(d).getFile() == FILE_PREDICATE)
4095          return true;
4096    }
4097    return false;
4098 }
4099 
4100 // Helper function for findFirstUse() and findFirstDef()
4101 bool
doesInsnWriteTo(const Instruction * insn,const Value * val) const4102 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4103                                           const Value *val) const
4104 {
4105    if (val->reg.file != FILE_GPR &&
4106        val->reg.file != FILE_PREDICATE &&
4107        val->reg.file != FILE_FLAGS)
4108       return false;
4109 
4110    for (int d = 0; insn->defExists(d); ++d) {
4111       const Value* def = insn->getDef(d);
4112       int minGPR = def->reg.data.id;
4113       int maxGPR = minGPR + def->reg.size / 4 - 1;
4114 
4115       if (def->reg.file != val->reg.file)
4116          continue;
4117 
4118       if (def->reg.file == FILE_GPR) {
4119          if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4120              val->reg.data.id > maxGPR)
4121             continue;
4122          return true;
4123       } else
4124       if (def->reg.file == FILE_PREDICATE) {
4125          if (val->reg.data.id != minGPR)
4126             continue;
4127          return true;
4128       } else
4129       if (def->reg.file == FILE_FLAGS) {
4130          if (val->reg.data.id != minGPR)
4131             continue;
4132          return true;
4133       }
4134    }
4135 
4136    return false;
4137 }
4138 
4139 // Find the next instruction inside the same basic block which uses (reads or
4140 // writes from) the output of the given instruction in order to avoid RaW and
4141 // WaW hazards.
4142 Instruction *
findFirstUse(const Instruction * bari) const4143 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4144 {
4145    Instruction *insn, *next;
4146 
4147    if (!bari->defExists(0))
4148       return NULL;
4149 
4150    for (insn = bari->next; insn != NULL; insn = next) {
4151       next = insn->next;
4152 
4153       for (int s = 0; insn->srcExists(s); ++s)
4154          if (doesInsnWriteTo(bari, insn->getSrc(s)))
4155             return insn;
4156 
4157       for (int d = 0; insn->defExists(d); ++d)
4158          if (doesInsnWriteTo(bari, insn->getDef(d)))
4159             return insn;
4160    }
4161    return NULL;
4162 }
4163 
4164 // Find the next instruction inside the same basic block which overwrites, at
4165 // least, one source of the given instruction in order to avoid WaR hazards.
4166 Instruction *
findFirstDef(const Instruction * bari) const4167 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4168 {
4169    Instruction *insn, *next;
4170 
4171    if (!bari->srcExists(0))
4172       return NULL;
4173 
4174    for (insn = bari->next; insn != NULL; insn = next) {
4175       next = insn->next;
4176 
4177       for (int s = 0; bari->srcExists(s); ++s)
4178          if (doesInsnWriteTo(insn, bari->getSrc(s)))
4179             return insn;
4180    }
4181    return NULL;
4182 }
4183 
4184 // Dependency barriers:
4185 // This pass is a bit ugly and could probably be improved by performing a
4186 // better allocation.
4187 //
4188 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4189 // dependency barriers using the control codes.
4190 bool
insertBarriers(BasicBlock * bb)4191 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4192 {
4193    std::list<LiveBarUse> live_uses;
4194    std::list<LiveBarDef> live_defs;
4195    Instruction *insn, *next;
4196    BitSet bars(6, 1);
4197    int bar_id;
4198 
4199    for (insn = bb->getEntry(); insn != NULL; insn = next) {
4200       Instruction *usei = NULL, *defi = NULL;
4201       bool need_wr_bar, need_rd_bar;
4202 
4203       next = insn->next;
4204 
4205       // Expire old barrier uses.
4206       for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4207            it != live_uses.end();) {
4208          if (insn->serial >= it->usei->serial) {
4209             int wr = getWrDepBar(it->insn);
4210             emitWtDepBar(insn, wr);
4211             bars.clr(wr); // free barrier
4212             it = live_uses.erase(it);
4213             continue;
4214          }
4215          ++it;
4216       }
4217 
4218       // Expire old barrier defs.
4219       for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4220            it != live_defs.end();) {
4221          if (insn->serial >= it->defi->serial) {
4222             int rd = getRdDepBar(it->insn);
4223             emitWtDepBar(insn, rd);
4224             bars.clr(rd); // free barrier
4225             it = live_defs.erase(it);
4226             continue;
4227          }
4228          ++it;
4229       }
4230 
4231       need_wr_bar = needWrDepBar(insn);
4232       need_rd_bar = needRdDepBar(insn);
4233 
4234       if (need_wr_bar) {
4235          // When the instruction requires to emit a write dependency barrier
4236          // (all which write something at a variable latency), find the next
4237          // instruction which reads the outputs (or writes to them, potentially
4238          // completing before this insn.
4239          usei = findFirstUse(insn);
4240 
4241          // Allocate and emit a new barrier.
4242          bar_id = bars.findFreeRange(1);
4243          if (bar_id == -1)
4244             bar_id = 5;
4245          bars.set(bar_id);
4246          emitWrDepBar(insn, bar_id);
4247          if (usei)
4248             live_uses.push_back(LiveBarUse(insn, usei));
4249       }
4250 
4251       if (need_rd_bar) {
4252          // When the instruction requires to emit a read dependency barrier
4253          // (all which read something at a variable latency), find the next
4254          // instruction which will write the inputs.
4255          defi = findFirstDef(insn);
4256 
4257          if (usei && defi && usei->serial <= defi->serial)
4258             continue;
4259 
4260          // Allocate and emit a new barrier.
4261          bar_id = bars.findFreeRange(1);
4262          if (bar_id == -1)
4263             bar_id = 5;
4264          bars.set(bar_id);
4265          emitRdDepBar(insn, bar_id);
4266          if (defi)
4267             live_defs.push_back(LiveBarDef(insn, defi));
4268       }
4269    }
4270 
4271    // Remove unnecessary barrier waits.
4272    BitSet alive_bars(6, 1);
4273    for (insn = bb->getEntry(); insn != NULL; insn = next) {
4274       int wr, rd, wt;
4275 
4276       next = insn->next;
4277 
4278       wr = getWrDepBar(insn);
4279       rd = getRdDepBar(insn);
4280       wt = getWtDepBar(insn);
4281 
4282       for (int idx = 0; idx < 6; ++idx) {
4283          if (!(wt & (1 << idx)))
4284             continue;
4285          if (!alive_bars.test(idx)) {
4286             insn->sched &= ~(1 << (11  + idx));
4287          } else {
4288             alive_bars.clr(idx);
4289          }
4290       }
4291 
4292       if (wr < 6)
4293          alive_bars.set(wr);
4294       if (rd < 6)
4295          alive_bars.set(rd);
4296    }
4297 
4298    return true;
4299 }
4300 
4301 bool
visit(Function * func)4302 SchedDataCalculatorGM107::visit(Function *func)
4303 {
4304    ArrayList insns;
4305 
4306    func->orderInstructions(insns);
4307 
4308    scoreBoards.resize(func->cfg.getSize());
4309    for (size_t i = 0; i < scoreBoards.size(); ++i)
4310       scoreBoards[i].wipe();
4311    return true;
4312 }
4313 
4314 bool
visit(BasicBlock * bb)4315 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4316 {
4317    Instruction *insn, *next = NULL;
4318    int cycle = 0;
4319 
4320    for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4321       /*XXX*/
4322       insn->sched = 0x7e0;
4323    }
4324 
4325    if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4326       return true;
4327 
4328    // Insert read/write dependency barriers for instructions which don't
4329    // operate at a fixed latency.
4330    insertBarriers(bb);
4331 
4332    score = &scoreBoards.at(bb->getId());
4333 
4334    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4335       // back branches will wait until all target dependencies are satisfied
4336       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4337          continue;
4338       BasicBlock *in = BasicBlock::get(ei.getNode());
4339       score->setMax(&scoreBoards.at(in->getId()));
4340    }
4341 
4342 #ifdef GM107_DEBUG_SCHED_DATA
4343    INFO("=== BB:%i initial scores\n", bb->getId());
4344    score->print(cycle);
4345 #endif
4346 
4347    // Because barriers are allocated locally (intra-BB), we have to make sure
4348    // that all produced barriers have been consumed before entering inside a
4349    // new basic block. The best way is to do a global allocation pre RA but
4350    // it's really more difficult, especially because of the phi nodes. Anyways,
4351    // it seems like that waiting on a barrier which has already been consumed
4352    // doesn't add any additional cost, it's just not elegant!
4353    Instruction *start = bb->getEntry();
4354    if (start && bb->cfg.incidentCount() > 0) {
4355       for (int b = 0; b < 6; b++)
4356          emitWtDepBar(start, b);
4357    }
4358 
4359    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4360       next = insn->next;
4361 
4362       commitInsn(insn, cycle);
4363       int delay = calcDelay(next, cycle);
4364       setDelay(insn, delay, next);
4365       cycle += getStall(insn);
4366 
4367       setReuseFlag(insn);
4368 
4369       // XXX: The yield flag seems to destroy a bunch of things when it is
4370       // set on every instruction, need investigation.
4371       //emitYield(insn);
4372 
4373 #ifdef GM107_DEBUG_SCHED_DATA
4374       printSchedInfo(cycle, insn);
4375       insn->print();
4376       next->print();
4377 #endif
4378    }
4379 
4380    if (!insn)
4381       return true;
4382    commitInsn(insn, cycle);
4383 
4384    int bbDelay = -1;
4385 
4386 #ifdef GM107_DEBUG_SCHED_DATA
4387    fprintf(stderr, "last instruction is : ");
4388    insn->print();
4389    fprintf(stderr, "cycle=%d\n", cycle);
4390 #endif
4391 
4392    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4393       BasicBlock *out = BasicBlock::get(ei.getNode());
4394 
4395       if (ei.getType() != Graph::Edge::BACK) {
4396          // Only test the first instruction of the outgoing block.
4397          next = out->getEntry();
4398          if (next) {
4399             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4400          } else {
4401             // When the outgoing BB is empty, make sure to set the number of
4402             // stall counts needed by the instruction because we don't know the
4403             // next instruction.
4404             bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4405          }
4406       } else {
4407          // Wait until all dependencies are satisfied.
4408          const int regsFree = score->getLatest();
4409          next = out->getFirst();
4410          for (int c = cycle; next && c < regsFree; next = next->next) {
4411             bbDelay = MAX2(bbDelay, calcDelay(next, c));
4412             c += getStall(next);
4413          }
4414          next = NULL;
4415       }
4416    }
4417    if (bb->cfg.outgoingCount() != 1)
4418       next = NULL;
4419    setDelay(insn, bbDelay, next);
4420    cycle += getStall(insn);
4421 
4422    score->rebase(cycle); // common base for initializing out blocks' scores
4423    return true;
4424 }
4425 
4426 /*******************************************************************************
4427  * main
4428  ******************************************************************************/
4429 
4430 void
prepareEmission(Function * func)4431 CodeEmitterGM107::prepareEmission(Function *func)
4432 {
4433    SchedDataCalculatorGM107 sched(targGM107);
4434    CodeEmitter::prepareEmission(func);
4435    sched.run(func, true, true);
4436 }
4437 
sizeToBundlesGM107(uint32_t size)4438 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4439 {
4440    return (size + 23) / 24;
4441 }
4442 
4443 void
prepareEmission(Program * prog)4444 CodeEmitterGM107::prepareEmission(Program *prog)
4445 {
4446    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4447         !fi.end(); fi.next()) {
4448       Function *func = reinterpret_cast<Function *>(fi.get());
4449       func->binPos = prog->binSize;
4450       prepareEmission(func);
4451 
4452       // adjust sizes & positions for schedulding info:
4453       if (prog->getTarget()->hasSWSched) {
4454          uint32_t adjPos = func->binPos;
4455          BasicBlock *bb = NULL;
4456          for (int i = 0; i < func->bbCount; ++i) {
4457             bb = func->bbArray[i];
4458             int32_t adjSize = bb->binSize;
4459             if (adjPos % 32) {
4460                adjSize -= 32 - adjPos % 32;
4461                if (adjSize < 0)
4462                   adjSize = 0;
4463             }
4464             adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4465             bb->binPos = adjPos;
4466             bb->binSize = adjSize;
4467             adjPos += adjSize;
4468          }
4469          if (bb)
4470             func->binSize = adjPos - func->binPos;
4471       }
4472 
4473       prog->binSize += func->binSize;
4474    }
4475 }
4476 
CodeEmitterGM107(const TargetGM107 * target)4477 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4478    : CodeEmitter(target),
4479      targGM107(target),
4480      writeIssueDelays(target->hasSWSched)
4481 {
4482    code = NULL;
4483    codeSize = codeSizeLimit = 0;
4484    relocInfo = NULL;
4485 }
4486 
4487 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4488 TargetGM107::createCodeEmitterGM107(Program::Type type)
4489 {
4490    CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4491    emit->setProgramType(type);
4492    return emit;
4493 }
4494 
4495 } // namespace nv50_ir
4496