• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs <bskeggs@redhat.com>
23  */
24 
25 #include "codegen/nv50_ir_target_gm107.h"
26 
27 //#define GM107_DEBUG_SCHED_DATA
28 
29 namespace nv50_ir {
30 
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34    CodeEmitterGM107(const TargetGM107 *);
35 
36    virtual bool emitInstruction(Instruction *);
37    virtual uint32_t getMinEncodingSize(const Instruction *) const;
38 
39    virtual void prepareEmission(Program *);
40    virtual void prepareEmission(Function *);
41 
setProgramType(Program::Type pType)42    inline void setProgramType(Program::Type pType) { progType = pType; }
43 
44 private:
45    const TargetGM107 *targGM107;
46 
47    Program::Type progType;
48 
49    const Instruction *insn;
50    const bool writeIssueDelays;
51    uint32_t *data;
52 
53 private:
54    inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)55    inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56 
57    inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)58    inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59    inline void emitPred();
60    inline void emitGPR(int, const Value *);
emitGPR(int pos)61    inline void emitGPR(int pos) {
62       emitGPR(pos, (const Value *)NULL);
63    }
emitGPR(int pos,const ValueRef & ref)64    inline void emitGPR(int pos, const ValueRef &ref) {
65       emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66    }
emitGPR(int pos,const ValueRef * ref)67    inline void emitGPR(int pos, const ValueRef *ref) {
68       emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69    }
emitGPR(int pos,const ValueDef & def)70    inline void emitGPR(int pos, const ValueDef &def) {
71       emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72    }
73    inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)74    inline void emitSYS(int pos, const ValueRef &ref) {
75       emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76    }
77    inline void emitPRED(int, const Value *);
emitPRED(int pos)78    inline void emitPRED(int pos) {
79       emitPRED(pos, (const Value *)NULL);
80    }
emitPRED(int pos,const ValueRef & ref)81    inline void emitPRED(int pos, const ValueRef &ref) {
82       emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83    }
emitPRED(int pos,const ValueDef & def)84    inline void emitPRED(int pos, const ValueDef &def) {
85       emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86    }
87    inline void emitADDR(int, int, int, int, const ValueRef &);
88    inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89    inline bool longIMMD(const ValueRef &);
90    inline void emitIMMD(int, int, const ValueRef &);
91 
92    void emitCond3(int, CondCode);
93    void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)94    void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95    inline void emitO(int);
96    inline void emitP(int);
97    inline void emitSAT(int);
98    inline void emitCC(int);
99    inline void emitX(int);
100    inline void emitABS(int, const ValueRef &);
101    inline void emitNEG(int, const ValueRef &);
102    inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103    inline void emitFMZ(int, int);
104    inline void emitRND(int, RoundMode, int);
emitRND(int pos)105    inline void emitRND(int pos) {
106       emitRND(pos, insn->rnd, -1);
107    }
108    inline void emitPDIV(int);
109    inline void emitINV(int, const ValueRef &);
110 
111    void emitEXIT();
112    void emitBRA();
113    void emitCAL();
114    void emitPCNT();
115    void emitCONT();
116    void emitPBK();
117    void emitBRK();
118    void emitPRET();
119    void emitRET();
120    void emitSSY();
121    void emitSYNC();
122    void emitSAM();
123    void emitRAM();
124 
125    void emitMOV();
126    void emitS2R();
127    void emitF2F();
128    void emitF2I();
129    void emitI2F();
130    void emitI2I();
131    void emitSEL();
132    void emitSHFL();
133 
134    void emitDADD();
135    void emitDMUL();
136    void emitDFMA();
137    void emitDMNMX();
138    void emitDSET();
139    void emitDSETP();
140 
141    void emitFADD();
142    void emitFMUL();
143    void emitFFMA();
144    void emitMUFU();
145    void emitFMNMX();
146    void emitRRO();
147    void emitFCMP();
148    void emitFSET();
149    void emitFSETP();
150    void emitFSWZADD();
151 
152    void emitLOP();
153    void emitNOT();
154    void emitIADD();
155    void emitIMUL();
156    void emitIMAD();
157    void emitISCADD();
158    void emitIMNMX();
159    void emitICMP();
160    void emitISET();
161    void emitISETP();
162    void emitSHL();
163    void emitSHR();
164    void emitPOPC();
165    void emitBFI();
166    void emitBFE();
167    void emitFLO();
168 
169    void emitLDSTs(int, DataType);
170    void emitLDSTc(int);
171    void emitLDC();
172    void emitLDL();
173    void emitLDS();
174    void emitLD();
175    void emitSTL();
176    void emitSTS();
177    void emitST();
178    void emitALD();
179    void emitAST();
180    void emitISBERD();
181    void emitAL2P();
182    void emitIPA();
183    void emitATOM();
184    void emitATOMS();
185    void emitRED();
186    void emitCCTL();
187 
188    void emitPIXLD();
189 
190    void emitTEXs(int);
191    void emitTEX();
192    void emitTLD();
193    void emitTLD4();
194    void emitTXD();
195    void emitTXQ();
196    void emitTMML();
197    void emitDEPBAR();
198 
199    void emitNOP();
200    void emitKIL();
201    void emitOUT();
202 
203    void emitBAR();
204    void emitMEMBAR();
205 
206    void emitVOTE();
207 
208    void emitSUTarget();
209    void emitSUHandle(const int s);
210    void emitSUSTx();
211    void emitSULDx();
212    void emitSUREDx();
213 };
214 
215 /*******************************************************************************
216  * general instruction layout/fields
217  ******************************************************************************/
218 
219 void
emitField(uint32_t * data,int b,int s,uint32_t v)220 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
221 {
222    if (b >= 0) {
223       uint32_t m = ((1ULL << s) - 1);
224       uint64_t d = (uint64_t)(v & m) << b;
225       assert(!(v & ~m) || (v & ~m) == ~m);
226       data[1] |= d >> 32;
227       data[0] |= d;
228    }
229 }
230 
231 void
emitPred()232 CodeEmitterGM107::emitPred()
233 {
234    if (insn->predSrc >= 0) {
235       emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
236       emitField(19, 1, insn->cc == CC_NOT_P);
237    } else {
238       emitField(16, 3, 7);
239    }
240 }
241 
242 void
emitInsn(uint32_t hi,bool pred)243 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
244 {
245    code[0] = 0x00000000;
246    code[1] = hi;
247    if (pred)
248       emitPred();
249 }
250 
251 void
emitGPR(int pos,const Value * val)252 CodeEmitterGM107::emitGPR(int pos, const Value *val)
253 {
254    emitField(pos, 8, val ? val->reg.data.id : 255);
255 }
256 
257 void
emitSYS(int pos,const Value * val)258 CodeEmitterGM107::emitSYS(int pos, const Value *val)
259 {
260    int id = val ? val->reg.data.id : -1;
261 
262    switch (id) {
263    case SV_LANEID         : id = 0x00; break;
264    case SV_VERTEX_COUNT   : id = 0x10; break;
265    case SV_INVOCATION_ID  : id = 0x11; break;
266    case SV_THREAD_KILL    : id = 0x13; break;
267    case SV_INVOCATION_INFO: id = 0x1d; break;
268    case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
269    case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
270    default:
271       assert(!"invalid system value");
272       id = 0;
273       break;
274    }
275 
276    emitField(pos, 8, id);
277 }
278 
279 void
emitPRED(int pos,const Value * val)280 CodeEmitterGM107::emitPRED(int pos, const Value *val)
281 {
282    emitField(pos, 3, val ? val->reg.data.id : 7);
283 }
284 
285 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)286 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
287                            const ValueRef &ref)
288 {
289    const Value *v = ref.get();
290    assert(!(v->reg.data.offset & ((1 << shr) - 1)));
291    if (gpr >= 0)
292       emitGPR(gpr, ref.getIndirect(0));
293    emitField(off, len, v->reg.data.offset >> shr);
294 }
295 
296 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)297 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
298                            const ValueRef &ref)
299 {
300    const Value *v = ref.get();
301    const Symbol *s = v->asSym();
302 
303    assert(!(s->reg.data.offset & ((1 << shr) - 1)));
304 
305    emitField(buf,  5, v->reg.fileIndex);
306    if (gpr >= 0)
307       emitGPR(gpr, ref.getIndirect(0));
308    emitField(off, 16, s->reg.data.offset >> shr);
309 }
310 
311 bool
longIMMD(const ValueRef & ref)312 CodeEmitterGM107::longIMMD(const ValueRef &ref)
313 {
314    if (ref.getFile() == FILE_IMMEDIATE) {
315       const ImmediateValue *imm = ref.get()->asImm();
316       if (isFloatType(insn->sType)) {
317          if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
318             return true;
319       } else {
320          if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
321              (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
322             return true;
323       }
324    }
325    return false;
326 }
327 
328 void
emitIMMD(int pos,int len,const ValueRef & ref)329 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
330 {
331    const ImmediateValue *imm = ref.get()->asImm();
332    uint32_t val = imm->reg.data.u32;
333 
334    if (len == 19) {
335       if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
336          assert(!(val & 0x00000fff));
337          val >>= 12;
338       } else if (insn->sType == TYPE_F64) {
339          assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
340          val = imm->reg.data.u64 >> 44;
341       }
342       assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
343       emitField( 56,   1, (val & 0x80000) >> 19);
344       emitField(pos, len, (val & 0x7ffff));
345    } else {
346       emitField(pos, len, val);
347    }
348 }
349 
350 /*******************************************************************************
351  * modifiers
352  ******************************************************************************/
353 
354 void
emitCond3(int pos,CondCode code)355 CodeEmitterGM107::emitCond3(int pos, CondCode code)
356 {
357    int data = 0;
358 
359    switch (code) {
360    case CC_FL : data = 0x00; break;
361    case CC_LTU:
362    case CC_LT : data = 0x01; break;
363    case CC_EQU:
364    case CC_EQ : data = 0x02; break;
365    case CC_LEU:
366    case CC_LE : data = 0x03; break;
367    case CC_GTU:
368    case CC_GT : data = 0x04; break;
369    case CC_NEU:
370    case CC_NE : data = 0x05; break;
371    case CC_GEU:
372    case CC_GE : data = 0x06; break;
373    case CC_TR : data = 0x07; break;
374    default:
375       assert(!"invalid cond3");
376       break;
377    }
378 
379    emitField(pos, 3, data);
380 }
381 
382 void
emitCond4(int pos,CondCode code)383 CodeEmitterGM107::emitCond4(int pos, CondCode code)
384 {
385    int data = 0;
386 
387    switch (code) {
388    case CC_FL: data = 0x00; break;
389    case CC_LT: data = 0x01; break;
390    case CC_EQ: data = 0x02; break;
391    case CC_LE: data = 0x03; break;
392    case CC_GT: data = 0x04; break;
393    case CC_NE: data = 0x05; break;
394    case CC_GE: data = 0x06; break;
395 //   case CC_NUM: data = 0x07; break;
396 //   case CC_NAN: data = 0x08; break;
397    case CC_LTU: data = 0x09; break;
398    case CC_EQU: data = 0x0a; break;
399    case CC_LEU: data = 0x0b; break;
400    case CC_GTU: data = 0x0c; break;
401    case CC_NEU: data = 0x0d; break;
402    case CC_GEU: data = 0x0e; break;
403    case CC_TR:  data = 0x0f; break;
404    default:
405       assert(!"invalid cond4");
406       break;
407    }
408 
409    emitField(pos, 4, data);
410 }
411 
412 void
emitO(int pos)413 CodeEmitterGM107::emitO(int pos)
414 {
415    emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
416 }
417 
418 void
emitP(int pos)419 CodeEmitterGM107::emitP(int pos)
420 {
421    emitField(pos, 1, insn->perPatch);
422 }
423 
424 void
emitSAT(int pos)425 CodeEmitterGM107::emitSAT(int pos)
426 {
427    emitField(pos, 1, insn->saturate);
428 }
429 
430 void
emitCC(int pos)431 CodeEmitterGM107::emitCC(int pos)
432 {
433    emitField(pos, 1, insn->flagsDef >= 0);
434 }
435 
436 void
emitX(int pos)437 CodeEmitterGM107::emitX(int pos)
438 {
439    emitField(pos, 1, insn->flagsSrc >= 0);
440 }
441 
442 void
emitABS(int pos,const ValueRef & ref)443 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
444 {
445    emitField(pos, 1, ref.mod.abs());
446 }
447 
448 void
emitNEG(int pos,const ValueRef & ref)449 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
450 {
451    emitField(pos, 1, ref.mod.neg());
452 }
453 
454 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)455 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
456 {
457    emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
458 }
459 
460 void
emitFMZ(int pos,int len)461 CodeEmitterGM107::emitFMZ(int pos, int len)
462 {
463    emitField(pos, len, insn->dnz << 1 | insn->ftz);
464 }
465 
466 void
emitRND(int rmp,RoundMode rnd,int rip)467 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
468 {
469    int rm = 0, ri = 0;
470    switch (rnd) {
471    case ROUND_NI: ri = 1;
472    case ROUND_N : rm = 0; break;
473    case ROUND_MI: ri = 1;
474    case ROUND_M : rm = 1; break;
475    case ROUND_PI: ri = 1;
476    case ROUND_P : rm = 2; break;
477    case ROUND_ZI: ri = 1;
478    case ROUND_Z : rm = 3; break;
479    default:
480       assert(!"invalid round mode");
481       break;
482    }
483    emitField(rip, 1, ri);
484    emitField(rmp, 2, rm);
485 }
486 
487 void
emitPDIV(int pos)488 CodeEmitterGM107::emitPDIV(int pos)
489 {
490    assert(insn->postFactor >= -3 && insn->postFactor <= 3);
491    if (insn->postFactor > 0)
492       emitField(pos, 3, 7 - insn->postFactor);
493    else
494       emitField(pos, 3, 0 - insn->postFactor);
495 }
496 
497 void
emitINV(int pos,const ValueRef & ref)498 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
499 {
500    emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
501 }
502 
503 /*******************************************************************************
504  * control flow
505  ******************************************************************************/
506 
507 void
emitEXIT()508 CodeEmitterGM107::emitEXIT()
509 {
510    emitInsn (0xe3000000);
511    emitCond5(0x00, CC_TR);
512 }
513 
514 void
emitBRA()515 CodeEmitterGM107::emitBRA()
516 {
517    const FlowInstruction *insn = this->insn->asFlow();
518    int gpr = -1;
519 
520    if (insn->indirect) {
521       if (insn->absolute)
522          emitInsn(0xe2000000); // JMX
523       else
524          emitInsn(0xe2500000); // BRX
525       gpr = 0x08;
526    } else {
527       if (insn->absolute)
528          emitInsn(0xe2100000); // JMP
529       else
530          emitInsn(0xe2400000); // BRA
531       emitField(0x07, 1, insn->allWarp);
532    }
533 
534    emitField(0x06, 1, insn->limit);
535    emitCond5(0x00, CC_TR);
536 
537    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
538       int32_t pos = insn->target.bb->binPos;
539       if (writeIssueDelays && !(pos & 0x1f))
540          pos += 8;
541       if (!insn->absolute)
542          emitField(0x14, 24, pos - (codeSize + 8));
543       else
544          emitField(0x14, 32, pos);
545    } else {
546       emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
547       emitField(0x05, 1, 1);
548    }
549 }
550 
551 void
emitCAL()552 CodeEmitterGM107::emitCAL()
553 {
554    const FlowInstruction *insn = this->insn->asFlow();
555 
556    if (insn->absolute) {
557       emitInsn(0xe2200000, 0); // JCAL
558    } else {
559       emitInsn(0xe2600000, 0); // CAL
560    }
561 
562    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
563       if (!insn->absolute)
564          emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
565       else {
566          if (insn->builtin) {
567             int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
568             addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000,  20);
569             addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
570          } else {
571             emitField(0x14, 32, insn->target.bb->binPos);
572          }
573       }
574    } else {
575       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
576       emitField(0x05, 1, 1);
577    }
578 }
579 
580 void
emitPCNT()581 CodeEmitterGM107::emitPCNT()
582 {
583    const FlowInstruction *insn = this->insn->asFlow();
584 
585    emitInsn(0xe2b00000, 0);
586 
587    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
588       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
589    } else {
590       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
591       emitField(0x05, 1, 1);
592    }
593 }
594 
595 void
emitCONT()596 CodeEmitterGM107::emitCONT()
597 {
598    emitInsn (0xe3500000);
599    emitCond5(0x00, CC_TR);
600 }
601 
602 void
emitPBK()603 CodeEmitterGM107::emitPBK()
604 {
605    const FlowInstruction *insn = this->insn->asFlow();
606 
607    emitInsn(0xe2a00000, 0);
608 
609    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
610       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
611    } else {
612       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
613       emitField(0x05, 1, 1);
614    }
615 }
616 
617 void
emitBRK()618 CodeEmitterGM107::emitBRK()
619 {
620    emitInsn (0xe3400000);
621    emitCond5(0x00, CC_TR);
622 }
623 
624 void
emitPRET()625 CodeEmitterGM107::emitPRET()
626 {
627    const FlowInstruction *insn = this->insn->asFlow();
628 
629    emitInsn(0xe2700000, 0);
630 
631    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
632       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
633    } else {
634       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
635       emitField(0x05, 1, 1);
636    }
637 }
638 
639 void
emitRET()640 CodeEmitterGM107::emitRET()
641 {
642    emitInsn (0xe3200000);
643    emitCond5(0x00, CC_TR);
644 }
645 
646 void
emitSSY()647 CodeEmitterGM107::emitSSY()
648 {
649    const FlowInstruction *insn = this->insn->asFlow();
650 
651    emitInsn(0xe2900000, 0);
652 
653    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
654       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
655    } else {
656       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
657       emitField(0x05, 1, 1);
658    }
659 }
660 
661 void
emitSYNC()662 CodeEmitterGM107::emitSYNC()
663 {
664    emitInsn (0xf0f80000);
665    emitCond5(0x00, CC_TR);
666 }
667 
668 void
emitSAM()669 CodeEmitterGM107::emitSAM()
670 {
671    emitInsn(0xe3700000, 0);
672 }
673 
674 void
emitRAM()675 CodeEmitterGM107::emitRAM()
676 {
677    emitInsn(0xe3800000, 0);
678 }
679 
680 /*******************************************************************************
681  * predicate/cc
682  ******************************************************************************/
683 
684 /*******************************************************************************
685  * movement / conversion
686  ******************************************************************************/
687 
688 void
emitMOV()689 CodeEmitterGM107::emitMOV()
690 {
691    if (insn->src(0).getFile() != FILE_IMMEDIATE) {
692       switch (insn->src(0).getFile()) {
693       case FILE_GPR:
694          if (insn->def(0).getFile() == FILE_PREDICATE) {
695             emitInsn(0x5b6a0000);
696             emitGPR (0x08);
697          } else {
698             emitInsn(0x5c980000);
699          }
700          emitGPR (0x14, insn->src(0));
701          break;
702       case FILE_MEMORY_CONST:
703          emitInsn(0x4c980000);
704          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
705          break;
706       case FILE_IMMEDIATE:
707          emitInsn(0x38980000);
708          emitIMMD(0x14, 19, insn->src(0));
709          break;
710       case FILE_PREDICATE:
711          emitInsn(0x50880000);
712          emitPRED(0x0c, insn->src(0));
713          emitPRED(0x1d);
714          emitPRED(0x27);
715          break;
716       default:
717          assert(!"bad src file");
718          break;
719       }
720       if (insn->def(0).getFile() != FILE_PREDICATE &&
721           insn->src(0).getFile() != FILE_PREDICATE)
722          emitField(0x27, 4, insn->lanes);
723    } else {
724       emitInsn (0x01000000);
725       emitIMMD (0x14, 32, insn->src(0));
726       emitField(0x0c, 4, insn->lanes);
727    }
728 
729    if (insn->def(0).getFile() == FILE_PREDICATE) {
730       emitPRED(0x27);
731       emitPRED(0x03, insn->def(0));
732       emitPRED(0x00);
733    } else {
734       emitGPR(0x00, insn->def(0));
735    }
736 }
737 
738 void
emitS2R()739 CodeEmitterGM107::emitS2R()
740 {
741    emitInsn(0xf0c80000);
742    emitSYS (0x14, insn->src(0));
743    emitGPR (0x00, insn->def(0));
744 }
745 
746 void
emitF2F()747 CodeEmitterGM107::emitF2F()
748 {
749    RoundMode rnd = insn->rnd;
750 
751    switch (insn->op) {
752    case OP_FLOOR: rnd = ROUND_MI; break;
753    case OP_CEIL : rnd = ROUND_PI; break;
754    case OP_TRUNC: rnd = ROUND_ZI; break;
755    default:
756       break;
757    }
758 
759    switch (insn->src(0).getFile()) {
760    case FILE_GPR:
761       emitInsn(0x5ca80000);
762       emitGPR (0x14, insn->src(0));
763       break;
764    case FILE_MEMORY_CONST:
765       emitInsn(0x4ca80000);
766       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
767       break;
768    case FILE_IMMEDIATE:
769       emitInsn(0x38a80000);
770       emitIMMD(0x14, 19, insn->src(0));
771       break;
772    default:
773       assert(!"bad src0 file");
774       break;
775    }
776 
777    emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
778    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
779    emitCC   (0x2f);
780    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
781    emitFMZ  (0x2c, 1);
782    emitField(0x29, 1, insn->subOp);
783    emitRND  (0x27, rnd, 0x2a);
784    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
785    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
786    emitGPR  (0x00, insn->def(0));
787 }
788 
789 void
emitF2I()790 CodeEmitterGM107::emitF2I()
791 {
792    RoundMode rnd = insn->rnd;
793 
794    switch (insn->op) {
795    case OP_FLOOR: rnd = ROUND_M; break;
796    case OP_CEIL : rnd = ROUND_P; break;
797    case OP_TRUNC: rnd = ROUND_Z; break;
798    default:
799       break;
800    }
801 
802    switch (insn->src(0).getFile()) {
803    case FILE_GPR:
804       emitInsn(0x5cb00000);
805       emitGPR (0x14, insn->src(0));
806       break;
807    case FILE_MEMORY_CONST:
808       emitInsn(0x4cb00000);
809       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
810       break;
811    case FILE_IMMEDIATE:
812       emitInsn(0x38b00000);
813       emitIMMD(0x14, 19, insn->src(0));
814       break;
815    default:
816       assert(!"bad src0 file");
817       break;
818    }
819 
820    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
821    emitCC   (0x2f);
822    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
823    emitFMZ  (0x2c, 1);
824    emitRND  (0x27, rnd, 0x2a);
825    emitField(0x0c, 1, isSignedType(insn->dType));
826    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
827    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
828    emitGPR  (0x00, insn->def(0));
829 }
830 
831 void
emitI2F()832 CodeEmitterGM107::emitI2F()
833 {
834    RoundMode rnd = insn->rnd;
835 
836    switch (insn->op) {
837    case OP_FLOOR: rnd = ROUND_M; break;
838    case OP_CEIL : rnd = ROUND_P; break;
839    case OP_TRUNC: rnd = ROUND_Z; break;
840    default:
841       break;
842    }
843 
844    switch (insn->src(0).getFile()) {
845    case FILE_GPR:
846       emitInsn(0x5cb80000);
847       emitGPR (0x14, insn->src(0));
848       break;
849    case FILE_MEMORY_CONST:
850       emitInsn(0x4cb80000);
851       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
852       break;
853    case FILE_IMMEDIATE:
854       emitInsn(0x38b80000);
855       emitIMMD(0x14, 19, insn->src(0));
856       break;
857    default:
858       assert(!"bad src0 file");
859       break;
860    }
861 
862    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
863    emitCC   (0x2f);
864    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
865    emitField(0x29, 2, insn->subOp);
866    emitRND  (0x27, rnd, -1);
867    emitField(0x0d, 1, isSignedType(insn->sType));
868    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
869    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
870    emitGPR  (0x00, insn->def(0));
871 }
872 
873 void
emitI2I()874 CodeEmitterGM107::emitI2I()
875 {
876    switch (insn->src(0).getFile()) {
877    case FILE_GPR:
878       emitInsn(0x5ce00000);
879       emitGPR (0x14, insn->src(0));
880       break;
881    case FILE_MEMORY_CONST:
882       emitInsn(0x4ce00000);
883       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
884       break;
885    case FILE_IMMEDIATE:
886       emitInsn(0x38e00000);
887       emitIMMD(0x14, 19, insn->src(0));
888       break;
889    default:
890       assert(!"bad src0 file");
891       break;
892    }
893 
894    emitSAT  (0x32);
895    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
896    emitCC   (0x2f);
897    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
898    emitField(0x29, 2, insn->subOp);
899    emitField(0x0d, 1, isSignedType(insn->sType));
900    emitField(0x0c, 1, isSignedType(insn->dType));
901    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
902    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
903    emitGPR  (0x00, insn->def(0));
904 }
905 
906 static void
selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)907 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
908 {
909    int loc = entry->loc;
910    if (data.force_persample_interp)
911       code[loc + 1] |= 1 << 10;
912    else
913       code[loc + 1] &= ~(1 << 10);
914 }
915 
916 void
emitSEL()917 CodeEmitterGM107::emitSEL()
918 {
919    switch (insn->src(1).getFile()) {
920    case FILE_GPR:
921       emitInsn(0x5ca00000);
922       emitGPR (0x14, insn->src(1));
923       break;
924    case FILE_MEMORY_CONST:
925       emitInsn(0x4ca00000);
926       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
927       break;
928    case FILE_IMMEDIATE:
929       emitInsn(0x38a00000);
930       emitIMMD(0x14, 19, insn->src(1));
931       break;
932    default:
933       assert(!"bad src1 file");
934       break;
935    }
936 
937    emitINV (0x2a, insn->src(2));
938    emitPRED(0x27, insn->src(2));
939    emitGPR (0x08, insn->src(0));
940    emitGPR (0x00, insn->def(0));
941 
942    if (insn->subOp == 1) {
943       addInterp(0, 0, selpFlip);
944    }
945 }
946 
947 void
emitSHFL()948 CodeEmitterGM107::emitSHFL()
949 {
950    int type = 0;
951 
952    emitInsn (0xef100000);
953 
954    switch (insn->src(1).getFile()) {
955    case FILE_GPR:
956       emitGPR(0x14, insn->src(1));
957       break;
958    case FILE_IMMEDIATE:
959       emitIMMD(0x14, 5, insn->src(1));
960       type |= 1;
961       break;
962    default:
963       assert(!"invalid src1 file");
964       break;
965    }
966 
967    /*XXX: what is this arg? hardcode immediate for now */
968    emitField(0x22, 13, 0x1c03);
969    type |= 2;
970 
971    emitPRED (0x30);
972    emitField(0x1e, 2, insn->subOp);
973    emitField(0x1c, 2, type);
974    emitGPR  (0x08, insn->src(0));
975    emitGPR  (0x00, insn->def(0));
976 }
977 
978 /*******************************************************************************
979  * double
980  ******************************************************************************/
981 
982 void
emitDADD()983 CodeEmitterGM107::emitDADD()
984 {
985    switch (insn->src(1).getFile()) {
986    case FILE_GPR:
987       emitInsn(0x5c700000);
988       emitGPR (0x14, insn->src(1));
989       break;
990    case FILE_MEMORY_CONST:
991       emitInsn(0x4c700000);
992       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
993       break;
994    case FILE_IMMEDIATE:
995       emitInsn(0x38700000);
996       emitIMMD(0x14, 19, insn->src(1));
997       break;
998    default:
999       assert(!"bad src1 file");
1000       break;
1001    }
1002    emitABS(0x31, insn->src(1));
1003    emitNEG(0x30, insn->src(0));
1004    emitCC (0x2f);
1005    emitABS(0x2e, insn->src(0));
1006    emitNEG(0x2d, insn->src(1));
1007 
1008    if (insn->op == OP_SUB)
1009       code[1] ^= 0x00002000;
1010 
1011    emitGPR(0x08, insn->src(0));
1012    emitGPR(0x00, insn->def(0));
1013 }
1014 
1015 void
emitDMUL()1016 CodeEmitterGM107::emitDMUL()
1017 {
1018    switch (insn->src(1).getFile()) {
1019    case FILE_GPR:
1020       emitInsn(0x5c800000);
1021       emitGPR (0x14, insn->src(1));
1022       break;
1023    case FILE_MEMORY_CONST:
1024       emitInsn(0x4c800000);
1025       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1026       break;
1027    case FILE_IMMEDIATE:
1028       emitInsn(0x38800000);
1029       emitIMMD(0x14, 19, insn->src(1));
1030       break;
1031    default:
1032       assert(!"bad src1 file");
1033       break;
1034    }
1035 
1036    emitNEG2(0x30, insn->src(0), insn->src(1));
1037    emitCC  (0x2f);
1038    emitRND (0x27);
1039    emitGPR (0x08, insn->src(0));
1040    emitGPR (0x00, insn->def(0));
1041 }
1042 
1043 void
emitDFMA()1044 CodeEmitterGM107::emitDFMA()
1045 {
1046    switch(insn->src(2).getFile()) {
1047    case FILE_GPR:
1048       switch (insn->src(1).getFile()) {
1049       case FILE_GPR:
1050          emitInsn(0x5b700000);
1051          emitGPR (0x14, insn->src(1));
1052          break;
1053       case FILE_MEMORY_CONST:
1054          emitInsn(0x4b700000);
1055          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1056          break;
1057       case FILE_IMMEDIATE:
1058          emitInsn(0x36700000);
1059          emitIMMD(0x14, 19, insn->src(1));
1060          break;
1061       default:
1062          assert(!"bad src1 file");
1063          break;
1064       }
1065       emitGPR (0x27, insn->src(2));
1066       break;
1067    case FILE_MEMORY_CONST:
1068       emitInsn(0x53700000);
1069       emitGPR (0x27, insn->src(1));
1070       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1071       break;
1072    default:
1073       assert(!"bad src2 file");
1074       break;
1075    }
1076 
1077    emitRND (0x32);
1078    emitNEG (0x31, insn->src(2));
1079    emitNEG2(0x30, insn->src(0), insn->src(1));
1080    emitCC  (0x2f);
1081    emitGPR (0x08, insn->src(0));
1082    emitGPR (0x00, insn->def(0));
1083 }
1084 
1085 void
emitDMNMX()1086 CodeEmitterGM107::emitDMNMX()
1087 {
1088    switch (insn->src(1).getFile()) {
1089    case FILE_GPR:
1090       emitInsn(0x5c500000);
1091       emitGPR (0x14, insn->src(1));
1092       break;
1093    case FILE_MEMORY_CONST:
1094       emitInsn(0x4c500000);
1095       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096       break;
1097    case FILE_IMMEDIATE:
1098       emitInsn(0x38500000);
1099       emitIMMD(0x14, 19, insn->src(1));
1100       break;
1101    default:
1102       assert(!"bad src1 file");
1103       break;
1104    }
1105 
1106    emitABS  (0x31, insn->src(1));
1107    emitNEG  (0x30, insn->src(0));
1108    emitCC   (0x2f);
1109    emitABS  (0x2e, insn->src(0));
1110    emitNEG  (0x2d, insn->src(1));
1111    emitField(0x2a, 1, insn->op == OP_MAX);
1112    emitPRED (0x27);
1113    emitGPR  (0x08, insn->src(0));
1114    emitGPR  (0x00, insn->def(0));
1115 }
1116 
1117 void
emitDSET()1118 CodeEmitterGM107::emitDSET()
1119 {
1120    const CmpInstruction *insn = this->insn->asCmp();
1121 
1122    switch (insn->src(1).getFile()) {
1123    case FILE_GPR:
1124       emitInsn(0x59000000);
1125       emitGPR (0x14, insn->src(1));
1126       break;
1127    case FILE_MEMORY_CONST:
1128       emitInsn(0x49000000);
1129       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1130       break;
1131    case FILE_IMMEDIATE:
1132       emitInsn(0x32000000);
1133       emitIMMD(0x14, 19, insn->src(1));
1134       break;
1135    default:
1136       assert(!"bad src1 file");
1137       break;
1138    }
1139 
1140    if (insn->op != OP_SET) {
1141       switch (insn->op) {
1142       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1143       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1144       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1145       default:
1146          assert(!"invalid set op");
1147          break;
1148       }
1149       emitPRED(0x27, insn->src(2));
1150    } else {
1151       emitPRED(0x27);
1152    }
1153 
1154    emitABS  (0x36, insn->src(0));
1155    emitNEG  (0x35, insn->src(1));
1156    emitField(0x34, 1, insn->dType == TYPE_F32);
1157    emitCond4(0x30, insn->setCond);
1158    emitCC   (0x2f);
1159    emitABS  (0x2c, insn->src(1));
1160    emitNEG  (0x2b, insn->src(0));
1161    emitGPR  (0x08, insn->src(0));
1162    emitGPR  (0x00, insn->def(0));
1163 }
1164 
1165 void
emitDSETP()1166 CodeEmitterGM107::emitDSETP()
1167 {
1168    const CmpInstruction *insn = this->insn->asCmp();
1169 
1170    switch (insn->src(1).getFile()) {
1171    case FILE_GPR:
1172       emitInsn(0x5b800000);
1173       emitGPR (0x14, insn->src(1));
1174       break;
1175    case FILE_MEMORY_CONST:
1176       emitInsn(0x4b800000);
1177       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1178       break;
1179    case FILE_IMMEDIATE:
1180       emitInsn(0x36800000);
1181       emitIMMD(0x14, 19, insn->src(1));
1182       break;
1183    default:
1184       assert(!"bad src1 file");
1185       break;
1186    }
1187 
1188    if (insn->op != OP_SET) {
1189       switch (insn->op) {
1190       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1191       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1192       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1193       default:
1194          assert(!"invalid set op");
1195          break;
1196       }
1197       emitPRED(0x27, insn->src(2));
1198    } else {
1199       emitPRED(0x27);
1200    }
1201 
1202    emitCond4(0x30, insn->setCond);
1203    emitABS  (0x2c, insn->src(1));
1204    emitNEG  (0x2b, insn->src(0));
1205    emitGPR  (0x08, insn->src(0));
1206    emitABS  (0x07, insn->src(0));
1207    emitNEG  (0x06, insn->src(1));
1208    emitPRED (0x03, insn->def(0));
1209    if (insn->defExists(1))
1210       emitPRED(0x00, insn->def(1));
1211    else
1212       emitPRED(0x00);
1213 }
1214 
1215 /*******************************************************************************
1216  * float
1217  ******************************************************************************/
1218 
1219 void
emitFADD()1220 CodeEmitterGM107::emitFADD()
1221 {
1222    if (!longIMMD(insn->src(1))) {
1223       switch (insn->src(1).getFile()) {
1224       case FILE_GPR:
1225          emitInsn(0x5c580000);
1226          emitGPR (0x14, insn->src(1));
1227          break;
1228       case FILE_MEMORY_CONST:
1229          emitInsn(0x4c580000);
1230          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1231          break;
1232       case FILE_IMMEDIATE:
1233          emitInsn(0x38580000);
1234          emitIMMD(0x14, 19, insn->src(1));
1235          break;
1236       default:
1237          assert(!"bad src1 file");
1238          break;
1239       }
1240       emitSAT(0x32);
1241       emitABS(0x31, insn->src(1));
1242       emitNEG(0x30, insn->src(0));
1243       emitCC (0x2f);
1244       emitABS(0x2e, insn->src(0));
1245       emitNEG(0x2d, insn->src(1));
1246       emitFMZ(0x2c, 1);
1247 
1248       if (insn->op == OP_SUB)
1249          code[1] ^= 0x00002000;
1250    } else {
1251       emitInsn(0x08000000);
1252       emitABS(0x39, insn->src(1));
1253       emitNEG(0x38, insn->src(0));
1254       emitFMZ(0x37, 1);
1255       emitABS(0x36, insn->src(0));
1256       emitNEG(0x35, insn->src(1));
1257       emitCC  (0x34);
1258       emitIMMD(0x14, 32, insn->src(1));
1259 
1260       if (insn->op == OP_SUB)
1261          code[1] ^= 0x00080000;
1262    }
1263 
1264    emitGPR(0x08, insn->src(0));
1265    emitGPR(0x00, insn->def(0));
1266 }
1267 
1268 void
emitFMUL()1269 CodeEmitterGM107::emitFMUL()
1270 {
1271    if (!longIMMD(insn->src(1))) {
1272       switch (insn->src(1).getFile()) {
1273       case FILE_GPR:
1274          emitInsn(0x5c680000);
1275          emitGPR (0x14, insn->src(1));
1276          break;
1277       case FILE_MEMORY_CONST:
1278          emitInsn(0x4c680000);
1279          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1280          break;
1281       case FILE_IMMEDIATE:
1282          emitInsn(0x38680000);
1283          emitIMMD(0x14, 19, insn->src(1));
1284          break;
1285       default:
1286          assert(!"bad src1 file");
1287          break;
1288       }
1289       emitSAT (0x32);
1290       emitNEG2(0x30, insn->src(0), insn->src(1));
1291       emitCC  (0x2f);
1292       emitFMZ (0x2c, 2);
1293       emitPDIV(0x29);
1294       emitRND (0x27);
1295    } else {
1296       emitInsn(0x1e000000);
1297       emitSAT (0x37);
1298       emitFMZ (0x35, 2);
1299       emitCC  (0x34);
1300       emitIMMD(0x14, 32, insn->src(1));
1301       if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1302          code[1] ^= 0x00080000; /* flip immd sign bit */
1303    }
1304 
1305    emitGPR(0x08, insn->src(0));
1306    emitGPR(0x00, insn->def(0));
1307 }
1308 
1309 void
emitFFMA()1310 CodeEmitterGM107::emitFFMA()
1311 {
1312    /*XXX: ffma32i exists, but not using it as third src overlaps dst */
1313    switch(insn->src(2).getFile()) {
1314    case FILE_GPR:
1315       switch (insn->src(1).getFile()) {
1316       case FILE_GPR:
1317          emitInsn(0x59800000);
1318          emitGPR (0x14, insn->src(1));
1319          break;
1320       case FILE_MEMORY_CONST:
1321          emitInsn(0x49800000);
1322          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1323          break;
1324       case FILE_IMMEDIATE:
1325          emitInsn(0x32800000);
1326          emitIMMD(0x14, 19, insn->src(1));
1327          break;
1328       default:
1329          assert(!"bad src1 file");
1330          break;
1331       }
1332       emitGPR (0x27, insn->src(2));
1333       break;
1334    case FILE_MEMORY_CONST:
1335       emitInsn(0x51800000);
1336       emitGPR (0x27, insn->src(1));
1337       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1338       break;
1339    default:
1340       assert(!"bad src2 file");
1341       break;
1342    }
1343    emitRND (0x33);
1344    emitSAT (0x32);
1345    emitNEG (0x31, insn->src(2));
1346    emitNEG2(0x30, insn->src(0), insn->src(1));
1347    emitCC  (0x2f);
1348 
1349    emitFMZ(0x35, 2);
1350    emitGPR(0x08, insn->src(0));
1351    emitGPR(0x00, insn->def(0));
1352 }
1353 
1354 void
emitMUFU()1355 CodeEmitterGM107::emitMUFU()
1356 {
1357    int mufu = 0;
1358 
1359    switch (insn->op) {
1360    case OP_COS: mufu = 0; break;
1361    case OP_SIN: mufu = 1; break;
1362    case OP_EX2: mufu = 2; break;
1363    case OP_LG2: mufu = 3; break;
1364    case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1365    case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1366    default:
1367       assert(!"invalid mufu");
1368       break;
1369    }
1370 
1371    emitInsn (0x50800000);
1372    emitSAT  (0x32);
1373    emitNEG  (0x30, insn->src(0));
1374    emitABS  (0x2e, insn->src(0));
1375    emitField(0x14, 3, mufu);
1376    emitGPR  (0x08, insn->src(0));
1377    emitGPR  (0x00, insn->def(0));
1378 }
1379 
1380 void
emitFMNMX()1381 CodeEmitterGM107::emitFMNMX()
1382 {
1383    switch (insn->src(1).getFile()) {
1384    case FILE_GPR:
1385       emitInsn(0x5c600000);
1386       emitGPR (0x14, insn->src(1));
1387       break;
1388    case FILE_MEMORY_CONST:
1389       emitInsn(0x4c600000);
1390       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1391       break;
1392    case FILE_IMMEDIATE:
1393       emitInsn(0x38600000);
1394       emitIMMD(0x14, 19, insn->src(1));
1395       break;
1396    default:
1397       assert(!"bad src1 file");
1398       break;
1399    }
1400 
1401    emitField(0x2a, 1, insn->op == OP_MAX);
1402    emitPRED (0x27);
1403 
1404    emitABS(0x31, insn->src(1));
1405    emitNEG(0x30, insn->src(0));
1406    emitCC (0x2f);
1407    emitABS(0x2e, insn->src(0));
1408    emitNEG(0x2d, insn->src(1));
1409    emitFMZ(0x2c, 1);
1410    emitGPR(0x08, insn->src(0));
1411    emitGPR(0x00, insn->def(0));
1412 }
1413 
1414 void
emitRRO()1415 CodeEmitterGM107::emitRRO()
1416 {
1417    switch (insn->src(0).getFile()) {
1418    case FILE_GPR:
1419       emitInsn(0x5c900000);
1420       emitGPR (0x14, insn->src(0));
1421       break;
1422    case FILE_MEMORY_CONST:
1423       emitInsn(0x4c900000);
1424       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1425       break;
1426    case FILE_IMMEDIATE:
1427       emitInsn(0x38900000);
1428       emitIMMD(0x14, 19, insn->src(0));
1429       break;
1430    default:
1431       assert(!"bad src file");
1432       break;
1433    }
1434 
1435    emitABS  (0x31, insn->src(0));
1436    emitNEG  (0x2d, insn->src(0));
1437    emitField(0x27, 1, insn->op == OP_PREEX2);
1438    emitGPR  (0x00, insn->def(0));
1439 }
1440 
1441 void
emitFCMP()1442 CodeEmitterGM107::emitFCMP()
1443 {
1444    const CmpInstruction *insn = this->insn->asCmp();
1445    CondCode cc = insn->setCond;
1446 
1447    if (insn->src(2).mod.neg())
1448       cc = reverseCondCode(cc);
1449 
1450    switch(insn->src(2).getFile()) {
1451    case FILE_GPR:
1452       switch (insn->src(1).getFile()) {
1453       case FILE_GPR:
1454          emitInsn(0x5ba00000);
1455          emitGPR (0x14, insn->src(1));
1456          break;
1457       case FILE_MEMORY_CONST:
1458          emitInsn(0x4ba00000);
1459          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1460          break;
1461       case FILE_IMMEDIATE:
1462          emitInsn(0x36a00000);
1463          emitIMMD(0x14, 19, insn->src(1));
1464          break;
1465       default:
1466          assert(!"bad src1 file");
1467          break;
1468       }
1469       emitGPR (0x27, insn->src(2));
1470       break;
1471    case FILE_MEMORY_CONST:
1472       emitInsn(0x53a00000);
1473       emitGPR (0x27, insn->src(1));
1474       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1475       break;
1476    default:
1477       assert(!"bad src2 file");
1478       break;
1479    }
1480 
1481    emitCond4(0x30, cc);
1482    emitFMZ  (0x2f, 1);
1483    emitGPR  (0x08, insn->src(0));
1484    emitGPR  (0x00, insn->def(0));
1485 }
1486 
1487 void
emitFSET()1488 CodeEmitterGM107::emitFSET()
1489 {
1490    const CmpInstruction *insn = this->insn->asCmp();
1491 
1492    switch (insn->src(1).getFile()) {
1493    case FILE_GPR:
1494       emitInsn(0x58000000);
1495       emitGPR (0x14, insn->src(1));
1496       break;
1497    case FILE_MEMORY_CONST:
1498       emitInsn(0x48000000);
1499       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1500       break;
1501    case FILE_IMMEDIATE:
1502       emitInsn(0x30000000);
1503       emitIMMD(0x14, 19, insn->src(1));
1504       break;
1505    default:
1506       assert(!"bad src1 file");
1507       break;
1508    }
1509 
1510    if (insn->op != OP_SET) {
1511       switch (insn->op) {
1512       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1513       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1514       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1515       default:
1516          assert(!"invalid set op");
1517          break;
1518       }
1519       emitPRED(0x27, insn->src(2));
1520    } else {
1521       emitPRED(0x27);
1522    }
1523 
1524    emitFMZ  (0x37, 1);
1525    emitABS  (0x36, insn->src(0));
1526    emitNEG  (0x35, insn->src(1));
1527    emitField(0x34, 1, insn->dType == TYPE_F32);
1528    emitCond4(0x30, insn->setCond);
1529    emitCC   (0x2f);
1530    emitABS  (0x2c, insn->src(1));
1531    emitNEG  (0x2b, insn->src(0));
1532    emitGPR  (0x08, insn->src(0));
1533    emitGPR  (0x00, insn->def(0));
1534 }
1535 
1536 void
emitFSETP()1537 CodeEmitterGM107::emitFSETP()
1538 {
1539    const CmpInstruction *insn = this->insn->asCmp();
1540 
1541    switch (insn->src(1).getFile()) {
1542    case FILE_GPR:
1543       emitInsn(0x5bb00000);
1544       emitGPR (0x14, insn->src(1));
1545       break;
1546    case FILE_MEMORY_CONST:
1547       emitInsn(0x4bb00000);
1548       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1549       break;
1550    case FILE_IMMEDIATE:
1551       emitInsn(0x36b00000);
1552       emitIMMD(0x14, 19, insn->src(1));
1553       break;
1554    default:
1555       assert(!"bad src1 file");
1556       break;
1557    }
1558 
1559    if (insn->op != OP_SET) {
1560       switch (insn->op) {
1561       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1562       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1563       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1564       default:
1565          assert(!"invalid set op");
1566          break;
1567       }
1568       emitPRED(0x27, insn->src(2));
1569    } else {
1570       emitPRED(0x27);
1571    }
1572 
1573    emitCond4(0x30, insn->setCond);
1574    emitFMZ  (0x2f, 1);
1575    emitABS  (0x2c, insn->src(1));
1576    emitNEG  (0x2b, insn->src(0));
1577    emitGPR  (0x08, insn->src(0));
1578    emitABS  (0x07, insn->src(0));
1579    emitNEG  (0x06, insn->src(1));
1580    emitPRED (0x03, insn->def(0));
1581    if (insn->defExists(1))
1582       emitPRED(0x00, insn->def(1));
1583    else
1584       emitPRED(0x00);
1585 }
1586 
1587 void
emitFSWZADD()1588 CodeEmitterGM107::emitFSWZADD()
1589 {
1590    emitInsn (0x50f80000);
1591    emitCC   (0x2f);
1592    emitFMZ  (0x2c, 1);
1593    emitRND  (0x27);
1594    emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1595    emitField(0x1c, 8, insn->subOp);
1596    if (insn->predSrc != 1)
1597       emitGPR  (0x14, insn->src(1));
1598    else
1599       emitGPR  (0x14);
1600    emitGPR  (0x08, insn->src(0));
1601    emitGPR  (0x00, insn->def(0));
1602 }
1603 
1604 /*******************************************************************************
1605  * integer
1606  ******************************************************************************/
1607 
1608 void
emitLOP()1609 CodeEmitterGM107::emitLOP()
1610 {
1611    int lop = 0;
1612 
1613    switch (insn->op) {
1614    case OP_AND: lop = 0; break;
1615    case OP_OR : lop = 1; break;
1616    case OP_XOR: lop = 2; break;
1617    default:
1618       assert(!"invalid lop");
1619       break;
1620    }
1621 
1622    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1623       switch (insn->src(1).getFile()) {
1624       case FILE_GPR:
1625          emitInsn(0x5c400000);
1626          emitGPR (0x14, insn->src(1));
1627          break;
1628       case FILE_MEMORY_CONST:
1629          emitInsn(0x4c400000);
1630          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1631          break;
1632       case FILE_IMMEDIATE:
1633          emitInsn(0x38400000);
1634          emitIMMD(0x14, 19, insn->src(1));
1635          break;
1636       default:
1637          assert(!"bad src1 file");
1638          break;
1639       }
1640       emitPRED (0x30);
1641       emitCC   (0x2f);
1642       emitX    (0x2b);
1643       emitField(0x29, 2, lop);
1644       emitINV  (0x28, insn->src(1));
1645       emitINV  (0x27, insn->src(0));
1646    } else {
1647       emitInsn (0x04000000);
1648       emitX    (0x39);
1649       emitINV  (0x38, insn->src(1));
1650       emitINV  (0x37, insn->src(0));
1651       emitField(0x35, 2, lop);
1652       emitCC   (0x34);
1653       emitIMMD (0x14, 32, insn->src(1));
1654    }
1655 
1656    emitGPR  (0x08, insn->src(0));
1657    emitGPR  (0x00, insn->def(0));
1658 }
1659 
1660 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1661 void
emitNOT()1662 CodeEmitterGM107::emitNOT()
1663 {
1664    if (!longIMMD(insn->src(0))) {
1665       switch (insn->src(0).getFile()) {
1666       case FILE_GPR:
1667          emitInsn(0x5c400700);
1668          emitGPR (0x14, insn->src(0));
1669          break;
1670       case FILE_MEMORY_CONST:
1671          emitInsn(0x4c400700);
1672          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1673          break;
1674       case FILE_IMMEDIATE:
1675          emitInsn(0x38400700);
1676          emitIMMD(0x14, 19, insn->src(0));
1677          break;
1678       default:
1679          assert(!"bad src1 file");
1680          break;
1681       }
1682       emitPRED (0x30);
1683    } else {
1684       emitInsn (0x05600000);
1685       emitIMMD (0x14, 32, insn->src(1));
1686    }
1687 
1688    emitGPR(0x08);
1689    emitGPR(0x00, insn->def(0));
1690 }
1691 
1692 void
emitIADD()1693 CodeEmitterGM107::emitIADD()
1694 {
1695    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1696       switch (insn->src(1).getFile()) {
1697       case FILE_GPR:
1698          emitInsn(0x5c100000);
1699          emitGPR (0x14, insn->src(1));
1700          break;
1701       case FILE_MEMORY_CONST:
1702          emitInsn(0x4c100000);
1703          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1704          break;
1705       case FILE_IMMEDIATE:
1706          emitInsn(0x38100000);
1707          emitIMMD(0x14, 19, insn->src(1));
1708          break;
1709       default:
1710          assert(!"bad src1 file");
1711          break;
1712       }
1713       emitSAT(0x32);
1714       emitNEG(0x31, insn->src(0));
1715       emitNEG(0x30, insn->src(1));
1716       emitCC (0x2f);
1717       emitX  (0x2b);
1718    } else {
1719       emitInsn(0x1c000000);
1720       emitNEG (0x38, insn->src(0));
1721       emitSAT (0x36);
1722       emitX   (0x35);
1723       emitCC  (0x34);
1724       emitIMMD(0x14, 32, insn->src(1));
1725    }
1726 
1727    if (insn->op == OP_SUB)
1728       code[1] ^= 0x00010000;
1729 
1730    emitGPR(0x08, insn->src(0));
1731    emitGPR(0x00, insn->def(0));
1732 }
1733 
1734 void
emitIMUL()1735 CodeEmitterGM107::emitIMUL()
1736 {
1737    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1738       switch (insn->src(1).getFile()) {
1739       case FILE_GPR:
1740          emitInsn(0x5c380000);
1741          emitGPR (0x14, insn->src(1));
1742          break;
1743       case FILE_MEMORY_CONST:
1744          emitInsn(0x4c380000);
1745          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1746          break;
1747       case FILE_IMMEDIATE:
1748          emitInsn(0x38380000);
1749          emitIMMD(0x14, 19, insn->src(1));
1750          break;
1751       default:
1752          assert(!"bad src1 file");
1753          break;
1754       }
1755       emitCC   (0x2f);
1756       emitField(0x29, 1, isSignedType(insn->sType));
1757       emitField(0x28, 1, isSignedType(insn->dType));
1758       emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1759    } else {
1760       emitInsn (0x1f000000);
1761       emitField(0x37, 1, isSignedType(insn->sType));
1762       emitField(0x36, 1, isSignedType(insn->dType));
1763       emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1764       emitCC   (0x34);
1765       emitIMMD (0x14, 32, insn->src(1));
1766    }
1767 
1768    emitGPR(0x08, insn->src(0));
1769    emitGPR(0x00, insn->def(0));
1770 }
1771 
1772 void
emitIMAD()1773 CodeEmitterGM107::emitIMAD()
1774 {
1775    /*XXX: imad32i exists, but not using it as third src overlaps dst */
1776    switch(insn->src(2).getFile()) {
1777    case FILE_GPR:
1778       switch (insn->src(1).getFile()) {
1779       case FILE_GPR:
1780          emitInsn(0x5a000000);
1781          emitGPR (0x14, insn->src(1));
1782          break;
1783       case FILE_MEMORY_CONST:
1784          emitInsn(0x4a000000);
1785          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1786          break;
1787       case FILE_IMMEDIATE:
1788          emitInsn(0x34000000);
1789          emitIMMD(0x14, 19, insn->src(1));
1790          break;
1791       default:
1792          assert(!"bad src1 file");
1793          break;
1794       }
1795       emitGPR (0x27, insn->src(2));
1796       break;
1797    case FILE_MEMORY_CONST:
1798       emitInsn(0x52000000);
1799       emitGPR (0x27, insn->src(1));
1800       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1801       break;
1802    default:
1803       assert(!"bad src2 file");
1804       break;
1805    }
1806 
1807    emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1808    emitField(0x35, 1, isSignedType(insn->sType));
1809    emitNEG  (0x34, insn->src(2));
1810    emitNEG2 (0x33, insn->src(0), insn->src(1));
1811    emitSAT  (0x32);
1812    emitX    (0x31);
1813    emitField(0x30, 1, isSignedType(insn->dType));
1814    emitCC   (0x2f);
1815    emitGPR  (0x08, insn->src(0));
1816    emitGPR  (0x00, insn->def(0));
1817 }
1818 
1819 void
emitISCADD()1820 CodeEmitterGM107::emitISCADD()
1821 {
1822    switch (insn->src(2).getFile()) {
1823    case FILE_GPR:
1824       emitInsn(0x5c180000);
1825       emitGPR (0x14, insn->src(2));
1826       break;
1827    case FILE_MEMORY_CONST:
1828       emitInsn(0x4c180000);
1829       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1830       break;
1831    case FILE_IMMEDIATE:
1832       emitInsn(0x38180000);
1833       emitIMMD(0x14, 19, insn->src(2));
1834       break;
1835    default:
1836       assert(!"bad src1 file");
1837       break;
1838    }
1839    emitNEG (0x31, insn->src(0));
1840    emitNEG (0x30, insn->src(2));
1841    emitCC  (0x2f);
1842    emitIMMD(0x27, 5, insn->src(1));
1843    emitGPR (0x08, insn->src(0));
1844    emitGPR (0x00, insn->def(0));
1845 }
1846 
1847 void
emitIMNMX()1848 CodeEmitterGM107::emitIMNMX()
1849 {
1850    switch (insn->src(1).getFile()) {
1851    case FILE_GPR:
1852       emitInsn(0x5c200000);
1853       emitGPR (0x14, insn->src(1));
1854       break;
1855    case FILE_MEMORY_CONST:
1856       emitInsn(0x4c200000);
1857       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1858       break;
1859    case FILE_IMMEDIATE:
1860       emitInsn(0x38200000);
1861       emitIMMD(0x14, 19, insn->src(1));
1862       break;
1863    default:
1864       assert(!"bad src1 file");
1865       break;
1866    }
1867 
1868    emitField(0x30, 1, isSignedType(insn->dType));
1869    emitCC   (0x2f);
1870    emitField(0x2a, 1, insn->op == OP_MAX);
1871    emitPRED (0x27);
1872    emitGPR  (0x08, insn->src(0));
1873    emitGPR  (0x00, insn->def(0));
1874 }
1875 
1876 void
emitICMP()1877 CodeEmitterGM107::emitICMP()
1878 {
1879    const CmpInstruction *insn = this->insn->asCmp();
1880    CondCode cc = insn->setCond;
1881 
1882    if (insn->src(2).mod.neg())
1883       cc = reverseCondCode(cc);
1884 
1885    switch(insn->src(2).getFile()) {
1886    case FILE_GPR:
1887       switch (insn->src(1).getFile()) {
1888       case FILE_GPR:
1889          emitInsn(0x5b400000);
1890          emitGPR (0x14, insn->src(1));
1891          break;
1892       case FILE_MEMORY_CONST:
1893          emitInsn(0x4b400000);
1894          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1895          break;
1896       case FILE_IMMEDIATE:
1897          emitInsn(0x36400000);
1898          emitIMMD(0x14, 19, insn->src(1));
1899          break;
1900       default:
1901          assert(!"bad src1 file");
1902          break;
1903       }
1904       emitGPR (0x27, insn->src(2));
1905       break;
1906    case FILE_MEMORY_CONST:
1907       emitInsn(0x53400000);
1908       emitGPR (0x27, insn->src(1));
1909       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1910       break;
1911    default:
1912       assert(!"bad src2 file");
1913       break;
1914    }
1915 
1916    emitCond3(0x31, cc);
1917    emitField(0x30, 1, isSignedType(insn->sType));
1918    emitGPR  (0x08, insn->src(0));
1919    emitGPR  (0x00, insn->def(0));
1920 }
1921 
1922 void
emitISET()1923 CodeEmitterGM107::emitISET()
1924 {
1925    const CmpInstruction *insn = this->insn->asCmp();
1926 
1927    switch (insn->src(1).getFile()) {
1928    case FILE_GPR:
1929       emitInsn(0x5b500000);
1930       emitGPR (0x14, insn->src(1));
1931       break;
1932    case FILE_MEMORY_CONST:
1933       emitInsn(0x4b500000);
1934       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1935       break;
1936    case FILE_IMMEDIATE:
1937       emitInsn(0x36500000);
1938       emitIMMD(0x14, 19, insn->src(1));
1939       break;
1940    default:
1941       assert(!"bad src1 file");
1942       break;
1943    }
1944 
1945    if (insn->op != OP_SET) {
1946       switch (insn->op) {
1947       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1948       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1949       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1950       default:
1951          assert(!"invalid set op");
1952          break;
1953       }
1954       emitPRED(0x27, insn->src(2));
1955    } else {
1956       emitPRED(0x27);
1957    }
1958 
1959    emitCond3(0x31, insn->setCond);
1960    emitField(0x30, 1, isSignedType(insn->sType));
1961    emitCC   (0x2f);
1962    emitField(0x2c, 1, insn->dType == TYPE_F32);
1963    emitX    (0x2b);
1964    emitGPR  (0x08, insn->src(0));
1965    emitGPR  (0x00, insn->def(0));
1966 }
1967 
1968 void
emitISETP()1969 CodeEmitterGM107::emitISETP()
1970 {
1971    const CmpInstruction *insn = this->insn->asCmp();
1972 
1973    switch (insn->src(1).getFile()) {
1974    case FILE_GPR:
1975       emitInsn(0x5b600000);
1976       emitGPR (0x14, insn->src(1));
1977       break;
1978    case FILE_MEMORY_CONST:
1979       emitInsn(0x4b600000);
1980       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1981       break;
1982    case FILE_IMMEDIATE:
1983       emitInsn(0x36600000);
1984       emitIMMD(0x14, 19, insn->src(1));
1985       break;
1986    default:
1987       assert(!"bad src1 file");
1988       break;
1989    }
1990 
1991    if (insn->op != OP_SET) {
1992       switch (insn->op) {
1993       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1994       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1995       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1996       default:
1997          assert(!"invalid set op");
1998          break;
1999       }
2000       emitPRED(0x27, insn->src(2));
2001    } else {
2002       emitPRED(0x27);
2003    }
2004 
2005    emitCond3(0x31, insn->setCond);
2006    emitField(0x30, 1, isSignedType(insn->sType));
2007    emitX    (0x2b);
2008    emitGPR  (0x08, insn->src(0));
2009    emitPRED (0x03, insn->def(0));
2010    if (insn->defExists(1))
2011       emitPRED(0x00, insn->def(1));
2012    else
2013       emitPRED(0x00);
2014 }
2015 
2016 void
emitSHL()2017 CodeEmitterGM107::emitSHL()
2018 {
2019    switch (insn->src(1).getFile()) {
2020    case FILE_GPR:
2021       emitInsn(0x5c480000);
2022       emitGPR (0x14, insn->src(1));
2023       break;
2024    case FILE_MEMORY_CONST:
2025       emitInsn(0x4c480000);
2026       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2027       break;
2028    case FILE_IMMEDIATE:
2029       emitInsn(0x38480000);
2030       emitIMMD(0x14, 19, insn->src(1));
2031       break;
2032    default:
2033       assert(!"bad src1 file");
2034       break;
2035    }
2036 
2037    emitCC   (0x2f);
2038    emitX    (0x2b);
2039    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2040    emitGPR  (0x08, insn->src(0));
2041    emitGPR  (0x00, insn->def(0));
2042 }
2043 
2044 void
emitSHR()2045 CodeEmitterGM107::emitSHR()
2046 {
2047    switch (insn->src(1).getFile()) {
2048    case FILE_GPR:
2049       emitInsn(0x5c280000);
2050       emitGPR (0x14, insn->src(1));
2051       break;
2052    case FILE_MEMORY_CONST:
2053       emitInsn(0x4c280000);
2054       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2055       break;
2056    case FILE_IMMEDIATE:
2057       emitInsn(0x38280000);
2058       emitIMMD(0x14, 19, insn->src(1));
2059       break;
2060    default:
2061       assert(!"bad src1 file");
2062       break;
2063    }
2064 
2065    emitField(0x30, 1, isSignedType(insn->dType));
2066    emitCC   (0x2f);
2067    emitX    (0x2c);
2068    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2069    emitGPR  (0x08, insn->src(0));
2070    emitGPR  (0x00, insn->def(0));
2071 }
2072 
2073 void
emitPOPC()2074 CodeEmitterGM107::emitPOPC()
2075 {
2076    switch (insn->src(0).getFile()) {
2077    case FILE_GPR:
2078       emitInsn(0x5c080000);
2079       emitGPR (0x14, insn->src(0));
2080       break;
2081    case FILE_MEMORY_CONST:
2082       emitInsn(0x4c080000);
2083       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2084       break;
2085    case FILE_IMMEDIATE:
2086       emitInsn(0x38080000);
2087       emitIMMD(0x14, 19, insn->src(0));
2088       break;
2089    default:
2090       assert(!"bad src1 file");
2091       break;
2092    }
2093 
2094    emitINV(0x28, insn->src(0));
2095    emitGPR(0x00, insn->def(0));
2096 }
2097 
2098 void
emitBFI()2099 CodeEmitterGM107::emitBFI()
2100 {
2101    switch(insn->src(2).getFile()) {
2102    case FILE_GPR:
2103       switch (insn->src(1).getFile()) {
2104       case FILE_GPR:
2105          emitInsn(0x5bf00000);
2106          emitGPR (0x14, insn->src(1));
2107          break;
2108       case FILE_MEMORY_CONST:
2109          emitInsn(0x4bf00000);
2110          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2111          break;
2112       case FILE_IMMEDIATE:
2113          emitInsn(0x36f00000);
2114          emitIMMD(0x14, 19, insn->src(1));
2115          break;
2116       default:
2117          assert(!"bad src1 file");
2118          break;
2119       }
2120       emitGPR (0x27, insn->src(2));
2121       break;
2122    case FILE_MEMORY_CONST:
2123       emitInsn(0x53f00000);
2124       emitGPR (0x27, insn->src(1));
2125       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2126       break;
2127    default:
2128       assert(!"bad src2 file");
2129       break;
2130    }
2131 
2132    emitCC   (0x2f);
2133    emitGPR  (0x08, insn->src(0));
2134    emitGPR  (0x00, insn->def(0));
2135 }
2136 
2137 void
emitBFE()2138 CodeEmitterGM107::emitBFE()
2139 {
2140    switch (insn->src(1).getFile()) {
2141    case FILE_GPR:
2142       emitInsn(0x5c000000);
2143       emitGPR (0x14, insn->src(1));
2144       break;
2145    case FILE_MEMORY_CONST:
2146       emitInsn(0x4c000000);
2147       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2148       break;
2149    case FILE_IMMEDIATE:
2150       emitInsn(0x38000000);
2151       emitIMMD(0x14, 19, insn->src(1));
2152       break;
2153    default:
2154       assert(!"bad src1 file");
2155       break;
2156    }
2157 
2158    emitField(0x30, 1, isSignedType(insn->dType));
2159    emitCC   (0x2f);
2160    emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2161    emitGPR  (0x08, insn->src(0));
2162    emitGPR  (0x00, insn->def(0));
2163 }
2164 
2165 void
emitFLO()2166 CodeEmitterGM107::emitFLO()
2167 {
2168    switch (insn->src(0).getFile()) {
2169    case FILE_GPR:
2170       emitInsn(0x5c300000);
2171       emitGPR (0x14, insn->src(0));
2172       break;
2173    case FILE_MEMORY_CONST:
2174       emitInsn(0x4c300000);
2175       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2176       break;
2177    case FILE_IMMEDIATE:
2178       emitInsn(0x38300000);
2179       emitIMMD(0x14, 19, insn->src(0));
2180       break;
2181    default:
2182       assert(!"bad src1 file");
2183       break;
2184    }
2185 
2186    emitField(0x30, 1, isSignedType(insn->dType));
2187    emitCC   (0x2f);
2188    emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2189    emitINV  (0x28, insn->src(0));
2190    emitGPR  (0x00, insn->def(0));
2191 }
2192 
2193 /*******************************************************************************
2194  * memory
2195  ******************************************************************************/
2196 
2197 void
emitLDSTs(int pos,DataType type)2198 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2199 {
2200    int data = 0;
2201 
2202    switch (typeSizeof(type)) {
2203    case  1: data = isSignedType(type) ? 1 : 0; break;
2204    case  2: data = isSignedType(type) ? 3 : 2; break;
2205    case  4: data = 4; break;
2206    case  8: data = 5; break;
2207    case 16: data = 6; break;
2208    default:
2209       assert(!"bad type");
2210       break;
2211    }
2212 
2213    emitField(pos, 3, data);
2214 }
2215 
2216 void
emitLDSTc(int pos)2217 CodeEmitterGM107::emitLDSTc(int pos)
2218 {
2219    int mode = 0;
2220 
2221    switch (insn->cache) {
2222    case CACHE_CA: mode = 0; break;
2223    case CACHE_CG: mode = 1; break;
2224    case CACHE_CS: mode = 2; break;
2225    case CACHE_CV: mode = 3; break;
2226    default:
2227       assert(!"invalid caching mode");
2228       break;
2229    }
2230 
2231    emitField(pos, 2, mode);
2232 }
2233 
2234 void
emitLDC()2235 CodeEmitterGM107::emitLDC()
2236 {
2237    emitInsn (0xef900000);
2238    emitLDSTs(0x30, insn->dType);
2239    emitField(0x2c, 2, insn->subOp);
2240    emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2241    emitGPR  (0x00, insn->def(0));
2242 }
2243 
2244 void
emitLDL()2245 CodeEmitterGM107::emitLDL()
2246 {
2247    emitInsn (0xef400000);
2248    emitLDSTs(0x30, insn->dType);
2249    emitLDSTc(0x2c);
2250    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2251    emitGPR  (0x00, insn->def(0));
2252 }
2253 
2254 void
emitLDS()2255 CodeEmitterGM107::emitLDS()
2256 {
2257    emitInsn (0xef480000);
2258    emitLDSTs(0x30, insn->dType);
2259    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2260    emitGPR  (0x00, insn->def(0));
2261 }
2262 
2263 void
emitLD()2264 CodeEmitterGM107::emitLD()
2265 {
2266    emitInsn (0x80000000);
2267    emitPRED (0x3a);
2268    emitLDSTc(0x38);
2269    emitLDSTs(0x35, insn->dType);
2270    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2271    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2272    emitGPR  (0x00, insn->def(0));
2273 }
2274 
2275 void
emitSTL()2276 CodeEmitterGM107::emitSTL()
2277 {
2278    emitInsn (0xef500000);
2279    emitLDSTs(0x30, insn->dType);
2280    emitLDSTc(0x2c);
2281    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2282    emitGPR  (0x00, insn->src(1));
2283 }
2284 
2285 void
emitSTS()2286 CodeEmitterGM107::emitSTS()
2287 {
2288    emitInsn (0xef580000);
2289    emitLDSTs(0x30, insn->dType);
2290    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2291    emitGPR  (0x00, insn->src(1));
2292 }
2293 
2294 void
emitST()2295 CodeEmitterGM107::emitST()
2296 {
2297    emitInsn (0xa0000000);
2298    emitPRED (0x3a);
2299    emitLDSTc(0x38);
2300    emitLDSTs(0x35, insn->dType);
2301    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2302    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2303    emitGPR  (0x00, insn->src(1));
2304 }
2305 
2306 void
emitALD()2307 CodeEmitterGM107::emitALD()
2308 {
2309    emitInsn (0xefd80000);
2310    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2311    emitGPR  (0x27, insn->src(0).getIndirect(1));
2312    emitO    (0x20);
2313    emitP    (0x1f);
2314    emitADDR (0x08, 20, 10, 0, insn->src(0));
2315    emitGPR  (0x00, insn->def(0));
2316 }
2317 
2318 void
emitAST()2319 CodeEmitterGM107::emitAST()
2320 {
2321    emitInsn (0xeff00000);
2322    emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2323    emitGPR  (0x27, insn->src(0).getIndirect(1));
2324    emitP    (0x1f);
2325    emitADDR (0x08, 20, 10, 0, insn->src(0));
2326    emitGPR  (0x00, insn->src(1));
2327 }
2328 
2329 void
emitISBERD()2330 CodeEmitterGM107::emitISBERD()
2331 {
2332    emitInsn(0xefd00000);
2333    emitGPR (0x08, insn->src(0));
2334    emitGPR (0x00, insn->def(0));
2335 }
2336 
2337 void
emitAL2P()2338 CodeEmitterGM107::emitAL2P()
2339 {
2340    emitInsn (0xefa00000);
2341    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2342    emitPRED (0x2c);
2343    emitO    (0x20);
2344    emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2345    emitGPR  (0x08, insn->src(0).getIndirect(0));
2346    emitGPR  (0x00, insn->def(0));
2347 }
2348 
2349 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2350 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2351 {
2352    int ipa = entry->ipa;
2353    int reg = entry->reg;
2354    int loc = entry->loc;
2355 
2356    if (data.flatshade &&
2357        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2358       ipa = NV50_IR_INTERP_FLAT;
2359       reg = 0xff;
2360    } else if (data.force_persample_interp &&
2361               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2362               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2363       ipa |= NV50_IR_INTERP_CENTROID;
2364    }
2365    code[loc + 1] &= ~(0xf << 0x14);
2366    code[loc + 1] |= (ipa & 0x3) << 0x16;
2367    code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2368    code[loc + 0] &= ~(0xff << 0x14);
2369    code[loc + 0] |= reg << 0x14;
2370 }
2371 
2372 void
emitIPA()2373 CodeEmitterGM107::emitIPA()
2374 {
2375    int ipam = 0, ipas = 0;
2376 
2377    switch (insn->getInterpMode()) {
2378    case NV50_IR_INTERP_LINEAR     : ipam = 0; break;
2379    case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2380    case NV50_IR_INTERP_FLAT       : ipam = 2; break;
2381    case NV50_IR_INTERP_SC         : ipam = 3; break;
2382    default:
2383       assert(!"invalid ipa mode");
2384       break;
2385    }
2386 
2387    switch (insn->getSampleMode()) {
2388    case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2389    case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2390    case NV50_IR_INTERP_OFFSET  : ipas = 2; break;
2391    default:
2392       assert(!"invalid ipa sample mode");
2393       break;
2394    }
2395 
2396    emitInsn (0xe0000000);
2397    emitField(0x36, 2, ipam);
2398    emitField(0x34, 2, ipas);
2399    emitSAT  (0x33);
2400    emitField(0x2f, 3, 7);
2401    emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2402    if ((code[0] & 0x0000ff00) != 0x0000ff00)
2403       code[1] |= 0x00000040; /* .idx */
2404    emitGPR(0x00, insn->def(0));
2405 
2406    if (insn->op == OP_PINTERP) {
2407       emitGPR(0x14, insn->src(1));
2408       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2409          emitGPR(0x27, insn->src(2));
2410       addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2411    } else {
2412       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2413          emitGPR(0x27, insn->src(1));
2414       emitGPR(0x14);
2415       addInterp(insn->ipa, 0xff, interpApply);
2416    }
2417 
2418    if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2419       emitGPR(0x27);
2420 }
2421 
2422 void
emitATOM()2423 CodeEmitterGM107::emitATOM()
2424 {
2425    unsigned dType, subOp;
2426 
2427    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2428       switch (insn->dType) {
2429       case TYPE_U32: dType = 0; break;
2430       case TYPE_U64: dType = 1; break;
2431       default: assert(!"unexpected dType"); dType = 0; break;
2432       }
2433       subOp = 15;
2434 
2435       emitInsn (0xee000000);
2436    } else {
2437       switch (insn->dType) {
2438       case TYPE_U32: dType = 0; break;
2439       case TYPE_S32: dType = 1; break;
2440       case TYPE_U64: dType = 2; break;
2441       case TYPE_F32: dType = 3; break;
2442       case TYPE_B128: dType = 4; break;
2443       case TYPE_S64: dType = 5; break;
2444       default: assert(!"unexpected dType"); dType = 0; break;
2445       }
2446       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2447          subOp = 8;
2448       else
2449          subOp = insn->subOp;
2450 
2451       emitInsn (0xed000000);
2452    }
2453 
2454    emitField(0x34, 4, subOp);
2455    emitField(0x31, 3, dType);
2456    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2457    emitGPR  (0x14, insn->src(1));
2458    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2459    emitGPR  (0x00, insn->def(0));
2460 }
2461 
2462 void
emitATOMS()2463 CodeEmitterGM107::emitATOMS()
2464 {
2465    unsigned dType, subOp;
2466 
2467    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2468       switch (insn->dType) {
2469       case TYPE_U32: dType = 0; break;
2470       case TYPE_U64: dType = 1; break;
2471       default: assert(!"unexpected dType"); dType = 0; break;
2472       }
2473       subOp = 4;
2474 
2475       emitInsn (0xee000000);
2476       emitField(0x34, 1, dType);
2477    } else {
2478       switch (insn->dType) {
2479       case TYPE_U32: dType = 0; break;
2480       case TYPE_S32: dType = 1; break;
2481       case TYPE_U64: dType = 2; break;
2482       case TYPE_S64: dType = 3; break;
2483       default: assert(!"unexpected dType"); dType = 0; break;
2484       }
2485 
2486       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2487          subOp = 8;
2488       else
2489          subOp = insn->subOp;
2490 
2491       emitInsn (0xec000000);
2492       emitField(0x1c, 3, dType);
2493    }
2494 
2495    emitField(0x34, 4, subOp);
2496    emitGPR  (0x14, insn->src(1));
2497    emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2498    emitGPR  (0x00, insn->def(0));
2499 }
2500 
2501 void
emitRED()2502 CodeEmitterGM107::emitRED()
2503 {
2504    unsigned dType;
2505 
2506    switch (insn->dType) {
2507    case TYPE_U32: dType = 0; break;
2508    case TYPE_S32: dType = 1; break;
2509    case TYPE_U64: dType = 2; break;
2510    case TYPE_F32: dType = 3; break;
2511    case TYPE_B128: dType = 4; break;
2512    case TYPE_S64: dType = 5; break;
2513    default: assert(!"unexpected dType"); dType = 0; break;
2514    }
2515 
2516    emitInsn (0xebf80000);
2517    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2518    emitField(0x17, 3, insn->subOp);
2519    emitField(0x14, 3, dType);
2520    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2521    emitGPR  (0x00, insn->src(1));
2522 }
2523 
2524 void
emitCCTL()2525 CodeEmitterGM107::emitCCTL()
2526 {
2527    unsigned width;
2528    if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2529       emitInsn(0xef600000);
2530       width = 30;
2531    } else {
2532       emitInsn(0xef800000);
2533       width = 22;
2534    }
2535    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2536    emitADDR (0x08, 0x16, width, 2, insn->src(0));
2537    emitField(0x00, 4, insn->subOp);
2538 }
2539 
2540 /*******************************************************************************
2541  * surface
2542  ******************************************************************************/
2543 
2544 void
emitPIXLD()2545 CodeEmitterGM107::emitPIXLD()
2546 {
2547    emitInsn (0xefe80000);
2548    emitPRED (0x2d);
2549    emitField(0x1f, 3, insn->subOp);
2550    emitGPR  (0x08, insn->src(0));
2551    emitGPR  (0x00, insn->def(0));
2552 }
2553 
2554 /*******************************************************************************
2555  * texture
2556  ******************************************************************************/
2557 
2558 void
emitTEXs(int pos)2559 CodeEmitterGM107::emitTEXs(int pos)
2560 {
2561    int src1 = insn->predSrc == 1 ? 2 : 1;
2562    if (insn->srcExists(src1))
2563       emitGPR(pos, insn->src(src1));
2564    else
2565       emitGPR(pos);
2566 }
2567 
2568 void
emitTEX()2569 CodeEmitterGM107::emitTEX()
2570 {
2571    const TexInstruction *insn = this->insn->asTex();
2572    int lodm = 0;
2573 
2574    if (!insn->tex.levelZero) {
2575       switch (insn->op) {
2576       case OP_TEX: lodm = 0; break;
2577       case OP_TXB: lodm = 2; break;
2578       case OP_TXL: lodm = 3; break;
2579       default:
2580          assert(!"invalid tex op");
2581          break;
2582       }
2583    } else {
2584       lodm = 1;
2585    }
2586 
2587    if (insn->tex.rIndirectSrc >= 0) {
2588       emitInsn (0xdeb80000);
2589       emitField(0x25, 2, lodm);
2590       emitField(0x24, 1, insn->tex.useOffsets == 1);
2591    } else {
2592       emitInsn (0xc0380000);
2593       emitField(0x37, 2, lodm);
2594       emitField(0x36, 1, insn->tex.useOffsets == 1);
2595       emitField(0x24, 13, insn->tex.r);
2596    }
2597 
2598    emitField(0x32, 1, insn->tex.target.isShadow());
2599    emitField(0x31, 1, insn->tex.liveOnly);
2600    emitField(0x23, 1, insn->tex.derivAll);
2601    emitField(0x1f, 4, insn->tex.mask);
2602    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2603                       insn->tex.target.getDim() - 1);
2604    emitField(0x1c, 1, insn->tex.target.isArray());
2605    emitTEXs (0x14);
2606    emitGPR  (0x08, insn->src(0));
2607    emitGPR  (0x00, insn->def(0));
2608 }
2609 
2610 void
emitTLD()2611 CodeEmitterGM107::emitTLD()
2612 {
2613    const TexInstruction *insn = this->insn->asTex();
2614 
2615    if (insn->tex.rIndirectSrc >= 0) {
2616       emitInsn (0xdd380000);
2617    } else {
2618       emitInsn (0xdc380000);
2619       emitField(0x24, 13, insn->tex.r);
2620    }
2621 
2622    emitField(0x37, 1, insn->tex.levelZero == 0);
2623    emitField(0x32, 1, insn->tex.target.isMS());
2624    emitField(0x31, 1, insn->tex.liveOnly);
2625    emitField(0x23, 1, insn->tex.useOffsets == 1);
2626    emitField(0x1f, 4, insn->tex.mask);
2627    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2628                       insn->tex.target.getDim() - 1);
2629    emitField(0x1c, 1, insn->tex.target.isArray());
2630    emitTEXs (0x14);
2631    emitGPR  (0x08, insn->src(0));
2632    emitGPR  (0x00, insn->def(0));
2633 }
2634 
2635 void
emitTLD4()2636 CodeEmitterGM107::emitTLD4()
2637 {
2638    const TexInstruction *insn = this->insn->asTex();
2639 
2640    if (insn->tex.rIndirectSrc >= 0) {
2641       emitInsn (0xdef80000);
2642       emitField(0x26, 2, insn->tex.gatherComp);
2643       emitField(0x25, 2, insn->tex.useOffsets == 4);
2644       emitField(0x24, 2, insn->tex.useOffsets == 1);
2645    } else {
2646       emitInsn (0xc8380000);
2647       emitField(0x38, 2, insn->tex.gatherComp);
2648       emitField(0x37, 2, insn->tex.useOffsets == 4);
2649       emitField(0x36, 2, insn->tex.useOffsets == 1);
2650       emitField(0x24, 13, insn->tex.r);
2651    }
2652 
2653    emitField(0x32, 1, insn->tex.target.isShadow());
2654    emitField(0x31, 1, insn->tex.liveOnly);
2655    emitField(0x23, 1, insn->tex.derivAll);
2656    emitField(0x1f, 4, insn->tex.mask);
2657    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2658                       insn->tex.target.getDim() - 1);
2659    emitField(0x1c, 1, insn->tex.target.isArray());
2660    emitTEXs (0x14);
2661    emitGPR  (0x08, insn->src(0));
2662    emitGPR  (0x00, insn->def(0));
2663 }
2664 
2665 void
emitTXD()2666 CodeEmitterGM107::emitTXD()
2667 {
2668    const TexInstruction *insn = this->insn->asTex();
2669 
2670    if (insn->tex.rIndirectSrc >= 0) {
2671       emitInsn (0xde780000);
2672    } else {
2673       emitInsn (0xde380000);
2674       emitField(0x24, 13, insn->tex.r);
2675    }
2676 
2677    emitField(0x31, 1, insn->tex.liveOnly);
2678    emitField(0x23, 1, insn->tex.useOffsets == 1);
2679    emitField(0x1f, 4, insn->tex.mask);
2680    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2681                       insn->tex.target.getDim() - 1);
2682    emitField(0x1c, 1, insn->tex.target.isArray());
2683    emitTEXs (0x14);
2684    emitGPR  (0x08, insn->src(0));
2685    emitGPR  (0x00, insn->def(0));
2686 }
2687 
2688 void
emitTMML()2689 CodeEmitterGM107::emitTMML()
2690 {
2691    const TexInstruction *insn = this->insn->asTex();
2692 
2693    if (insn->tex.rIndirectSrc >= 0) {
2694       emitInsn (0xdf600000);
2695    } else {
2696       emitInsn (0xdf580000);
2697       emitField(0x24, 13, insn->tex.r);
2698    }
2699 
2700    emitField(0x31, 1, insn->tex.liveOnly);
2701    emitField(0x23, 1, insn->tex.derivAll);
2702    emitField(0x1f, 4, insn->tex.mask);
2703    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2704                       insn->tex.target.getDim() - 1);
2705    emitField(0x1c, 1, insn->tex.target.isArray());
2706    emitTEXs (0x14);
2707    emitGPR  (0x08, insn->src(0));
2708    emitGPR  (0x00, insn->def(0));
2709 }
2710 
2711 void
emitTXQ()2712 CodeEmitterGM107::emitTXQ()
2713 {
2714    const TexInstruction *insn = this->insn->asTex();
2715    int type = 0;
2716 
2717    switch (insn->tex.query) {
2718    case TXQ_DIMS           : type = 0x01; break;
2719    case TXQ_TYPE           : type = 0x02; break;
2720    case TXQ_SAMPLE_POSITION: type = 0x05; break;
2721    case TXQ_FILTER         : type = 0x10; break;
2722    case TXQ_LOD            : type = 0x12; break;
2723    case TXQ_WRAP           : type = 0x14; break;
2724    case TXQ_BORDER_COLOUR  : type = 0x16; break;
2725    default:
2726       assert(!"invalid txq query");
2727       break;
2728    }
2729 
2730    if (insn->tex.rIndirectSrc >= 0) {
2731       emitInsn (0xdf500000);
2732    } else {
2733       emitInsn (0xdf480000);
2734       emitField(0x24, 13, insn->tex.r);
2735    }
2736 
2737    emitField(0x31, 1, insn->tex.liveOnly);
2738    emitField(0x1f, 4, insn->tex.mask);
2739    emitField(0x16, 6, type);
2740    emitGPR  (0x08, insn->src(0));
2741    emitGPR  (0x00, insn->def(0));
2742 }
2743 
2744 void
emitDEPBAR()2745 CodeEmitterGM107::emitDEPBAR()
2746 {
2747    emitInsn (0xf0f00000);
2748    emitField(0x1d, 1, 1); /* le */
2749    emitField(0x1a, 3, 5);
2750    emitField(0x14, 6, insn->subOp);
2751    emitField(0x00, 6, insn->subOp);
2752 }
2753 
2754 /*******************************************************************************
2755  * misc
2756  ******************************************************************************/
2757 
2758 void
emitNOP()2759 CodeEmitterGM107::emitNOP()
2760 {
2761    emitInsn(0x50b00000);
2762 }
2763 
2764 void
emitKIL()2765 CodeEmitterGM107::emitKIL()
2766 {
2767    emitInsn (0xe3300000);
2768    emitCond5(0x00, CC_TR);
2769 }
2770 
2771 void
emitOUT()2772 CodeEmitterGM107::emitOUT()
2773 {
2774    const int cut  = insn->op == OP_RESTART || insn->subOp;
2775    const int emit = insn->op == OP_EMIT;
2776 
2777    switch (insn->src(1).getFile()) {
2778    case FILE_GPR:
2779       emitInsn(0xfbe00000);
2780       emitGPR (0x14, insn->src(1));
2781       break;
2782    case FILE_IMMEDIATE:
2783       emitInsn(0xf6e00000);
2784       emitIMMD(0x14, 19, insn->src(1));
2785       break;
2786    case FILE_MEMORY_CONST:
2787       emitInsn(0xebe00000);
2788       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2789       break;
2790    default:
2791       assert(!"bad src1 file");
2792       break;
2793    }
2794 
2795    emitField(0x27, 2, (cut << 1) | emit);
2796    emitGPR  (0x08, insn->src(0));
2797    emitGPR  (0x00, insn->def(0));
2798 }
2799 
2800 void
emitBAR()2801 CodeEmitterGM107::emitBAR()
2802 {
2803    uint8_t subop;
2804 
2805    emitInsn (0xf0a80000);
2806 
2807    switch (insn->subOp) {
2808    case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2809    case NV50_IR_SUBOP_BAR_RED_AND:  subop = 0x0a; break;
2810    case NV50_IR_SUBOP_BAR_RED_OR:   subop = 0x12; break;
2811    case NV50_IR_SUBOP_BAR_ARRIVE:   subop = 0x81; break;
2812    default:
2813       subop = 0x80;
2814       assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2815       break;
2816    }
2817 
2818    emitField(0x20, 8, subop);
2819 
2820    // barrier id
2821    if (insn->src(0).getFile() == FILE_GPR) {
2822       emitGPR(0x08, insn->src(0));
2823    } else {
2824       ImmediateValue *imm = insn->getSrc(0)->asImm();
2825       assert(imm);
2826       emitField(0x08, 8, imm->reg.data.u32);
2827       emitField(0x2b, 1, 1);
2828    }
2829 
2830    // thread count
2831    if (insn->src(1).getFile() == FILE_GPR) {
2832       emitGPR(0x14, insn->src(1));
2833    } else {
2834       ImmediateValue *imm = insn->getSrc(0)->asImm();
2835       assert(imm);
2836       emitField(0x14, 12, imm->reg.data.u32);
2837       emitField(0x2c, 1, 1);
2838    }
2839 
2840    if (insn->srcExists(2) && (insn->predSrc != 2)) {
2841       emitPRED (0x27, insn->src(2));
2842       emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2843    } else {
2844       emitField(0x27, 3, 7);
2845    }
2846 }
2847 
2848 void
emitMEMBAR()2849 CodeEmitterGM107::emitMEMBAR()
2850 {
2851    emitInsn (0xef980000);
2852    emitField(0x08, 2, insn->subOp >> 2);
2853 }
2854 
2855 void
emitVOTE()2856 CodeEmitterGM107::emitVOTE()
2857 {
2858    assert(insn->src(0).getFile() == FILE_PREDICATE);
2859 
2860    int r = -1, p = -1;
2861    for (int i = 0; insn->defExists(i); i++) {
2862       if (insn->def(i).getFile() == FILE_GPR)
2863          r = i;
2864       else if (insn->def(i).getFile() == FILE_PREDICATE)
2865          p = i;
2866    }
2867 
2868    emitInsn (0x50d80000);
2869    emitField(0x30, 2, insn->subOp);
2870    if (r >= 0)
2871       emitGPR  (0x00, insn->def(r));
2872    else
2873       emitGPR  (0x00);
2874    if (p >= 0)
2875       emitPRED (0x2d, insn->def(p));
2876    else
2877       emitPRED (0x2d);
2878    emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2879    emitPRED (0x27, insn->src(0));
2880 }
2881 
2882 void
emitSUTarget()2883 CodeEmitterGM107::emitSUTarget()
2884 {
2885    const TexInstruction *insn = this->insn->asTex();
2886    int target = 0;
2887 
2888    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2889 
2890    if (insn->tex.target == TEX_TARGET_BUFFER) {
2891       target = 2;
2892    } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2893       target = 4;
2894    } else if (insn->tex.target == TEX_TARGET_2D ||
2895               insn->tex.target == TEX_TARGET_RECT) {
2896       target = 6;
2897    } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2898               insn->tex.target == TEX_TARGET_CUBE ||
2899               insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
2900       target = 8;
2901    } else if (insn->tex.target == TEX_TARGET_3D) {
2902       target = 10;
2903    } else {
2904       assert(insn->tex.target == TEX_TARGET_1D);
2905    }
2906    emitField(0x20, 4, target);
2907 }
2908 
2909 void
emitSUHandle(const int s)2910 CodeEmitterGM107::emitSUHandle(const int s)
2911 {
2912    const TexInstruction *insn = this->insn->asTex();
2913 
2914    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2915 
2916    if (insn->src(s).getFile() == FILE_GPR) {
2917       emitGPR(0x27, insn->src(s));
2918    } else {
2919       ImmediateValue *imm = insn->getSrc(s)->asImm();
2920       assert(imm);
2921       emitField(0x33, 1, 1);
2922       emitField(0x24, 13, imm->reg.data.u32);
2923    }
2924 }
2925 
2926 void
emitSUSTx()2927 CodeEmitterGM107::emitSUSTx()
2928 {
2929    const TexInstruction *insn = this->insn->asTex();
2930 
2931    emitInsn(0xeb200000);
2932    if (insn->op == OP_SUSTB)
2933       emitField(0x34, 1, 1);
2934    emitSUTarget();
2935 
2936    emitLDSTc(0x18);
2937    emitField(0x14, 4, 0xf); // rgba
2938    emitGPR  (0x08, insn->src(0));
2939    emitGPR  (0x00, insn->src(1));
2940 
2941    emitSUHandle(2);
2942 }
2943 
2944 void
emitSULDx()2945 CodeEmitterGM107::emitSULDx()
2946 {
2947    const TexInstruction *insn = this->insn->asTex();
2948    int type = 0;
2949 
2950    emitInsn(0xeb000000);
2951    if (insn->op == OP_SULDB)
2952       emitField(0x34, 1, 1);
2953    emitSUTarget();
2954 
2955    switch (insn->dType) {
2956    case TYPE_S8:   type = 1; break;
2957    case TYPE_U16:  type = 2; break;
2958    case TYPE_S16:  type = 3; break;
2959    case TYPE_U32:  type = 4; break;
2960    case TYPE_U64:  type = 5; break;
2961    case TYPE_B128: type = 6; break;
2962    default:
2963       assert(insn->dType == TYPE_U8);
2964       break;
2965    }
2966    emitLDSTc(0x18);
2967    emitField(0x14, 3, type);
2968    emitGPR  (0x00, insn->def(0));
2969    emitGPR  (0x08, insn->src(0));
2970 
2971    emitSUHandle(1);
2972 }
2973 
2974 void
emitSUREDx()2975 CodeEmitterGM107::emitSUREDx()
2976 {
2977    const TexInstruction *insn = this->insn->asTex();
2978    uint8_t type = 0, subOp;
2979 
2980    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
2981       emitInsn(0xeac00000);
2982    else
2983       emitInsn(0xea600000);
2984 
2985    if (insn->op == OP_SUREDB)
2986       emitField(0x34, 1, 1);
2987    emitSUTarget();
2988 
2989    // destination type
2990    switch (insn->dType) {
2991    case TYPE_S32: type = 1; break;
2992    case TYPE_U64: type = 2; break;
2993    case TYPE_F32: type = 3; break;
2994    case TYPE_S64: type = 5; break;
2995    default:
2996       assert(insn->dType == TYPE_U32);
2997       break;
2998    }
2999 
3000    // atomic operation
3001    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3002       subOp = 0;
3003    } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3004       subOp = 8;
3005    } else {
3006       subOp = insn->subOp;
3007    }
3008 
3009    emitField(0x24, 3, type);
3010    emitField(0x1d, 4, subOp);
3011    emitGPR  (0x14, insn->src(1));
3012    emitGPR  (0x08, insn->src(0));
3013    emitGPR  (0x00, insn->def(0));
3014 
3015    emitSUHandle(2);
3016 }
3017 
3018 /*******************************************************************************
3019  * assembler front-end
3020  ******************************************************************************/
3021 
3022 bool
emitInstruction(Instruction * i)3023 CodeEmitterGM107::emitInstruction(Instruction *i)
3024 {
3025    const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3026    bool ret = true;
3027 
3028    insn = i;
3029 
3030    if (insn->encSize != 8) {
3031       ERROR("skipping undecodable instruction: "); insn->print();
3032       return false;
3033    } else
3034    if (codeSize + size > codeSizeLimit) {
3035       ERROR("code emitter output buffer too small\n");
3036       return false;
3037    }
3038 
3039    if (writeIssueDelays) {
3040       int n = ((codeSize & 0x1f) / 8) - 1;
3041       if (n < 0) {
3042          data = code;
3043          data[0] = 0x00000000;
3044          data[1] = 0x00000000;
3045          code += 2;
3046          codeSize += 8;
3047          n++;
3048       }
3049 
3050       emitField(data, n * 21, 21, insn->sched);
3051    }
3052 
3053    switch (insn->op) {
3054    case OP_EXIT:
3055       emitEXIT();
3056       break;
3057    case OP_BRA:
3058       emitBRA();
3059       break;
3060    case OP_CALL:
3061       emitCAL();
3062       break;
3063    case OP_PRECONT:
3064       emitPCNT();
3065       break;
3066    case OP_CONT:
3067       emitCONT();
3068       break;
3069    case OP_PREBREAK:
3070       emitPBK();
3071       break;
3072    case OP_BREAK:
3073       emitBRK();
3074       break;
3075    case OP_PRERET:
3076       emitPRET();
3077       break;
3078    case OP_RET:
3079       emitRET();
3080       break;
3081    case OP_JOINAT:
3082       emitSSY();
3083       break;
3084    case OP_JOIN:
3085       emitSYNC();
3086       break;
3087    case OP_QUADON:
3088       emitSAM();
3089       break;
3090    case OP_QUADPOP:
3091       emitRAM();
3092       break;
3093    case OP_MOV:
3094       emitMOV();
3095       break;
3096    case OP_RDSV:
3097       emitS2R();
3098       break;
3099    case OP_ABS:
3100    case OP_NEG:
3101    case OP_SAT:
3102    case OP_FLOOR:
3103    case OP_CEIL:
3104    case OP_TRUNC:
3105    case OP_CVT:
3106       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3107                                  insn->src(0).getFile() == FILE_PREDICATE)) {
3108          emitMOV();
3109       } else if (isFloatType(insn->dType)) {
3110          if (isFloatType(insn->sType))
3111             emitF2F();
3112          else
3113             emitI2F();
3114       } else {
3115          if (isFloatType(insn->sType))
3116             emitF2I();
3117          else
3118             emitI2I();
3119       }
3120       break;
3121    case OP_SHFL:
3122       emitSHFL();
3123       break;
3124    case OP_ADD:
3125    case OP_SUB:
3126       if (isFloatType(insn->dType)) {
3127          if (insn->dType == TYPE_F64)
3128             emitDADD();
3129          else
3130             emitFADD();
3131       } else {
3132          emitIADD();
3133       }
3134       break;
3135    case OP_MUL:
3136       if (isFloatType(insn->dType)) {
3137          if (insn->dType == TYPE_F64)
3138             emitDMUL();
3139          else
3140             emitFMUL();
3141       } else {
3142          emitIMUL();
3143       }
3144       break;
3145    case OP_MAD:
3146    case OP_FMA:
3147       if (isFloatType(insn->dType)) {
3148          if (insn->dType == TYPE_F64)
3149             emitDFMA();
3150          else
3151             emitFFMA();
3152       } else {
3153          emitIMAD();
3154       }
3155       break;
3156    case OP_SHLADD:
3157       emitISCADD();
3158       break;
3159    case OP_MIN:
3160    case OP_MAX:
3161       if (isFloatType(insn->dType)) {
3162          if (insn->dType == TYPE_F64)
3163             emitDMNMX();
3164          else
3165             emitFMNMX();
3166       } else {
3167          emitIMNMX();
3168       }
3169       break;
3170    case OP_SHL:
3171       emitSHL();
3172       break;
3173    case OP_SHR:
3174       emitSHR();
3175       break;
3176    case OP_POPCNT:
3177       emitPOPC();
3178       break;
3179    case OP_INSBF:
3180       emitBFI();
3181       break;
3182    case OP_EXTBF:
3183       emitBFE();
3184       break;
3185    case OP_BFIND:
3186       emitFLO();
3187       break;
3188    case OP_SLCT:
3189       if (isFloatType(insn->dType))
3190          emitFCMP();
3191       else
3192          emitICMP();
3193       break;
3194    case OP_SET:
3195    case OP_SET_AND:
3196    case OP_SET_OR:
3197    case OP_SET_XOR:
3198       if (insn->def(0).getFile() != FILE_PREDICATE) {
3199          if (isFloatType(insn->sType))
3200             if (insn->sType == TYPE_F64)
3201                emitDSET();
3202             else
3203                emitFSET();
3204          else
3205             emitISET();
3206       } else {
3207          if (isFloatType(insn->sType))
3208             if (insn->sType == TYPE_F64)
3209                emitDSETP();
3210             else
3211                emitFSETP();
3212          else
3213             emitISETP();
3214       }
3215       break;
3216    case OP_SELP:
3217       emitSEL();
3218       break;
3219    case OP_PRESIN:
3220    case OP_PREEX2:
3221       emitRRO();
3222       break;
3223    case OP_COS:
3224    case OP_SIN:
3225    case OP_EX2:
3226    case OP_LG2:
3227    case OP_RCP:
3228    case OP_RSQ:
3229       emitMUFU();
3230       break;
3231    case OP_AND:
3232    case OP_OR:
3233    case OP_XOR:
3234       emitLOP();
3235       break;
3236    case OP_NOT:
3237       emitNOT();
3238       break;
3239    case OP_LOAD:
3240       switch (insn->src(0).getFile()) {
3241       case FILE_MEMORY_CONST : emitLDC(); break;
3242       case FILE_MEMORY_LOCAL : emitLDL(); break;
3243       case FILE_MEMORY_SHARED: emitLDS(); break;
3244       case FILE_MEMORY_GLOBAL: emitLD(); break;
3245       default:
3246          assert(!"invalid load");
3247          emitNOP();
3248          break;
3249       }
3250       break;
3251    case OP_STORE:
3252       switch (insn->src(0).getFile()) {
3253       case FILE_MEMORY_LOCAL : emitSTL(); break;
3254       case FILE_MEMORY_SHARED: emitSTS(); break;
3255       case FILE_MEMORY_GLOBAL: emitST(); break;
3256       default:
3257          assert(!"invalid store");
3258          emitNOP();
3259          break;
3260       }
3261       break;
3262    case OP_ATOM:
3263       if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3264          emitATOMS();
3265       else
3266          if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3267             emitRED();
3268          else
3269             emitATOM();
3270       break;
3271    case OP_CCTL:
3272       emitCCTL();
3273       break;
3274    case OP_VFETCH:
3275       emitALD();
3276       break;
3277    case OP_EXPORT:
3278       emitAST();
3279       break;
3280    case OP_PFETCH:
3281       emitISBERD();
3282       break;
3283    case OP_AFETCH:
3284       emitAL2P();
3285       break;
3286    case OP_LINTERP:
3287    case OP_PINTERP:
3288       emitIPA();
3289       break;
3290    case OP_PIXLD:
3291       emitPIXLD();
3292       break;
3293    case OP_TEX:
3294    case OP_TXB:
3295    case OP_TXL:
3296       emitTEX();
3297       break;
3298    case OP_TXF:
3299       emitTLD();
3300       break;
3301    case OP_TXG:
3302       emitTLD4();
3303       break;
3304    case OP_TXD:
3305       emitTXD();
3306       break;
3307    case OP_TXQ:
3308       emitTXQ();
3309       break;
3310    case OP_TXLQ:
3311       emitTMML();
3312       break;
3313    case OP_TEXBAR:
3314       emitDEPBAR();
3315       break;
3316    case OP_QUADOP:
3317       emitFSWZADD();
3318       break;
3319    case OP_NOP:
3320       emitNOP();
3321       break;
3322    case OP_DISCARD:
3323       emitKIL();
3324       break;
3325    case OP_EMIT:
3326    case OP_RESTART:
3327       emitOUT();
3328       break;
3329    case OP_BAR:
3330       emitBAR();
3331       break;
3332    case OP_MEMBAR:
3333       emitMEMBAR();
3334       break;
3335    case OP_VOTE:
3336       emitVOTE();
3337       break;
3338    case OP_SUSTB:
3339    case OP_SUSTP:
3340       emitSUSTx();
3341       break;
3342    case OP_SULDB:
3343    case OP_SULDP:
3344       emitSULDx();
3345       break;
3346    case OP_SUREDB:
3347    case OP_SUREDP:
3348       emitSUREDx();
3349       break;
3350    default:
3351       assert(!"invalid opcode");
3352       emitNOP();
3353       ret = false;
3354       break;
3355    }
3356 
3357    if (insn->join) {
3358       /*XXX*/
3359    }
3360 
3361    code += 2;
3362    codeSize += 8;
3363    return ret;
3364 }
3365 
3366 uint32_t
getMinEncodingSize(const Instruction * i) const3367 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3368 {
3369    return 8;
3370 }
3371 
3372 /*******************************************************************************
3373  * sched data calculator
3374  ******************************************************************************/
3375 
3376 class SchedDataCalculatorGM107 : public Pass
3377 {
3378 public:
SchedDataCalculatorGM107(const TargetGM107 * targ)3379    SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3380 
3381 private:
3382    struct RegScores
3383    {
3384       struct ScoreData {
3385          int r[256];
3386          int p[8];
3387          int c;
3388       } rd, wr;
3389       int base;
3390 
rebasenv50_ir::SchedDataCalculatorGM107::RegScores3391       void rebase(const int base)
3392       {
3393          const int delta = this->base - base;
3394          if (!delta)
3395             return;
3396          this->base = 0;
3397 
3398          for (int i = 0; i < 256; ++i) {
3399             rd.r[i] += delta;
3400             wr.r[i] += delta;
3401          }
3402          for (int i = 0; i < 8; ++i) {
3403             rd.p[i] += delta;
3404             wr.p[i] += delta;
3405          }
3406          rd.c += delta;
3407          wr.c += delta;
3408       }
wipenv50_ir::SchedDataCalculatorGM107::RegScores3409       void wipe()
3410       {
3411          memset(&rd, 0, sizeof(rd));
3412          memset(&wr, 0, sizeof(wr));
3413       }
getLatestnv50_ir::SchedDataCalculatorGM107::RegScores3414       int getLatest(const ScoreData& d) const
3415       {
3416          int max = 0;
3417          for (int i = 0; i < 256; ++i)
3418             if (d.r[i] > max)
3419                max = d.r[i];
3420          for (int i = 0; i < 8; ++i)
3421             if (d.p[i] > max)
3422                max = d.p[i];
3423          if (d.c > max)
3424             max = d.c;
3425          return max;
3426       }
getLatestRdnv50_ir::SchedDataCalculatorGM107::RegScores3427       inline int getLatestRd() const
3428       {
3429          return getLatest(rd);
3430       }
getLatestWrnv50_ir::SchedDataCalculatorGM107::RegScores3431       inline int getLatestWr() const
3432       {
3433          return getLatest(wr);
3434       }
getLatestnv50_ir::SchedDataCalculatorGM107::RegScores3435       inline int getLatest() const
3436       {
3437          return MAX2(getLatestRd(), getLatestWr());
3438       }
setMaxnv50_ir::SchedDataCalculatorGM107::RegScores3439       void setMax(const RegScores *that)
3440       {
3441          for (int i = 0; i < 256; ++i) {
3442             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3443             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3444          }
3445          for (int i = 0; i < 8; ++i) {
3446             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3447             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3448          }
3449          rd.c = MAX2(rd.c, that->rd.c);
3450          wr.c = MAX2(wr.c, that->wr.c);
3451       }
printnv50_ir::SchedDataCalculatorGM107::RegScores3452       void print(int cycle)
3453       {
3454          for (int i = 0; i < 256; ++i) {
3455             if (rd.r[i] > cycle)
3456                INFO("rd $r%i @ %i\n", i, rd.r[i]);
3457             if (wr.r[i] > cycle)
3458                INFO("wr $r%i @ %i\n", i, wr.r[i]);
3459          }
3460          for (int i = 0; i < 8; ++i) {
3461             if (rd.p[i] > cycle)
3462                INFO("rd $p%i @ %i\n", i, rd.p[i]);
3463             if (wr.p[i] > cycle)
3464                INFO("wr $p%i @ %i\n", i, wr.p[i]);
3465          }
3466          if (rd.c > cycle)
3467             INFO("rd $c @ %i\n", rd.c);
3468          if (wr.c > cycle)
3469             INFO("wr $c @ %i\n", wr.c);
3470       }
3471    };
3472 
3473    RegScores *score; // for current BB
3474    std::vector<RegScores> scoreBoards;
3475 
3476    const TargetGM107 *targ;
3477    bool visit(Function *);
3478    bool visit(BasicBlock *);
3479 
3480    void commitInsn(const Instruction *, int);
3481    int calcDelay(const Instruction *, int) const;
3482    void setDelay(Instruction *, int, const Instruction *);
3483    void recordWr(const Value *, int, int);
3484    void checkRd(const Value *, int, int&) const;
3485 
3486    inline void emitYield(Instruction *);
3487    inline void emitStall(Instruction *, uint8_t);
3488    inline void emitReuse(Instruction *, uint8_t);
3489    inline void emitWrDepBar(Instruction *, uint8_t);
3490    inline void emitRdDepBar(Instruction *, uint8_t);
3491    inline void emitWtDepBar(Instruction *, uint8_t);
3492 
3493    inline int getStall(const Instruction *) const;
3494    inline int getWrDepBar(const Instruction *) const;
3495    inline int getRdDepBar(const Instruction *) const;
3496    inline int getWtDepBar(const Instruction *) const;
3497 
3498    void setReuseFlag(Instruction *);
3499 
3500    inline void printSchedInfo(int, const Instruction *) const;
3501 
3502    struct LiveBarUse {
LiveBarUsenv50_ir::SchedDataCalculatorGM107::LiveBarUse3503       LiveBarUse(Instruction *insn, Instruction *usei)
3504          : insn(insn), usei(usei) { }
3505       Instruction *insn;
3506       Instruction *usei;
3507    };
3508 
3509    struct LiveBarDef {
LiveBarDefnv50_ir::SchedDataCalculatorGM107::LiveBarDef3510       LiveBarDef(Instruction *insn, Instruction *defi)
3511          : insn(insn), defi(defi) { }
3512       Instruction *insn;
3513       Instruction *defi;
3514    };
3515 
3516    bool insertBarriers(BasicBlock *);
3517 
3518    Instruction *findFirstUse(const Instruction *) const;
3519    Instruction *findFirstDef(const Instruction *) const;
3520 
3521    bool needRdDepBar(const Instruction *) const;
3522    bool needWrDepBar(const Instruction *) const;
3523 };
3524 
3525 inline void
emitStall(Instruction * insn,uint8_t cnt)3526 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3527 {
3528    assert(cnt < 16);
3529    insn->sched |= cnt;
3530 }
3531 
3532 inline void
emitYield(Instruction * insn)3533 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3534 {
3535    insn->sched |= 1 << 4;
3536 }
3537 
3538 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3539 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3540 {
3541    assert(id < 6);
3542    if ((insn->sched & 0xe0) == 0xe0)
3543       insn->sched ^= 0xe0;
3544    insn->sched |= id << 5;
3545 }
3546 
3547 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3548 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3549 {
3550    assert(id < 6);
3551    if ((insn->sched & 0x700) == 0x700)
3552       insn->sched ^= 0x700;
3553    insn->sched |= id << 8;
3554 }
3555 
3556 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3557 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3558 {
3559    assert(id < 6);
3560    insn->sched |= 1 << (11 + id);
3561 }
3562 
3563 inline void
emitReuse(Instruction * insn,uint8_t id)3564 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3565 {
3566    assert(id < 4);
3567    insn->sched |= 1 << (17 + id);
3568 }
3569 
3570 inline void
printSchedInfo(int cycle,const Instruction * insn) const3571 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3572                                          const Instruction *insn) const
3573 {
3574    uint8_t st, yl, wr, rd, wt, ru;
3575 
3576    st = (insn->sched & 0x00000f) >> 0;
3577    yl = (insn->sched & 0x000010) >> 4;
3578    wr = (insn->sched & 0x0000e0) >> 5;
3579    rd = (insn->sched & 0x000700) >> 8;
3580    wt = (insn->sched & 0x01f800) >> 11;
3581    ru = (insn->sched & 0x1e0000) >> 17;
3582 
3583    INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3584         cycle, st, yl, wr, rd, wt, ru);
3585 }
3586 
3587 inline int
getStall(const Instruction * insn) const3588 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3589 {
3590    return insn->sched & 0xf;
3591 }
3592 
3593 inline int
getWrDepBar(const Instruction * insn) const3594 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3595 {
3596    return (insn->sched & 0x0000e0) >> 5;
3597 }
3598 
3599 inline int
getRdDepBar(const Instruction * insn) const3600 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3601 {
3602    return (insn->sched & 0x000700) >> 8;
3603 }
3604 
3605 inline int
getWtDepBar(const Instruction * insn) const3606 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3607 {
3608    return (insn->sched & 0x01f800) >> 11;
3609 }
3610 
3611 // Emit the reuse flag which allows to make use of the new memory hierarchy
3612 // introduced since Maxwell, the operand reuse cache.
3613 //
3614 // It allows to reduce bank conflicts by caching operands. Each time you issue
3615 // an instruction, that flag can tell the hw which operands are going to be
3616 // re-used by the next instruction. Note that the next instruction has to use
3617 // the same GPR id in the same operand slot.
3618 void
setReuseFlag(Instruction * insn)3619 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3620 {
3621    Instruction *next = insn->next;
3622    BitSet defs(255, 1);
3623 
3624    if (!targ->isReuseSupported(insn))
3625       return;
3626 
3627    for (int d = 0; insn->defExists(d); ++d) {
3628       const Value *def = insn->def(d).rep();
3629       if (insn->def(d).getFile() != FILE_GPR)
3630          continue;
3631       if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3632          continue;
3633       defs.set(def->reg.data.id);
3634    }
3635 
3636    for (int s = 0; insn->srcExists(s); s++) {
3637       const Value *src = insn->src(s).rep();
3638       if (insn->src(s).getFile() != FILE_GPR)
3639          continue;
3640       if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3641          continue;
3642       if (defs.test(src->reg.data.id))
3643          continue;
3644       if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3645          continue;
3646       if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3647          continue;
3648       assert(s < 4);
3649       emitReuse(insn, s);
3650    }
3651 }
3652 
3653 void
recordWr(const Value * v,int cycle,int ready)3654 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3655 {
3656    int a = v->reg.data.id, b;
3657 
3658    switch (v->reg.file) {
3659    case FILE_GPR:
3660       b = a + v->reg.size / 4;
3661       for (int r = a; r < b; ++r)
3662          score->rd.r[r] = ready;
3663       break;
3664    case FILE_PREDICATE:
3665       // To immediately use a predicate set by any instructions, the minimum
3666       // number of stall counts is 13.
3667       score->rd.p[a] = cycle + 13;
3668       break;
3669    case FILE_FLAGS:
3670       score->rd.c = ready;
3671       break;
3672    default:
3673       break;
3674    }
3675 }
3676 
3677 void
checkRd(const Value * v,int cycle,int & delay) const3678 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3679 {
3680    int a = v->reg.data.id, b;
3681    int ready = cycle;
3682 
3683    switch (v->reg.file) {
3684    case FILE_GPR:
3685       b = a + v->reg.size / 4;
3686       for (int r = a; r < b; ++r)
3687          ready = MAX2(ready, score->rd.r[r]);
3688       break;
3689    case FILE_PREDICATE:
3690       ready = MAX2(ready, score->rd.p[a]);
3691       break;
3692    case FILE_FLAGS:
3693       ready = MAX2(ready, score->rd.c);
3694       break;
3695    default:
3696       break;
3697    }
3698    if (cycle < ready)
3699       delay = MAX2(delay, ready - cycle);
3700 }
3701 
3702 void
commitInsn(const Instruction * insn,int cycle)3703 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3704 {
3705    const int ready = cycle + targ->getLatency(insn);
3706 
3707    for (int d = 0; insn->defExists(d); ++d)
3708       recordWr(insn->getDef(d), cycle, ready);
3709 
3710 #ifdef GM107_DEBUG_SCHED_DATA
3711    score->print(cycle);
3712 #endif
3713 }
3714 
3715 #define GM107_MIN_ISSUE_DELAY 0x1
3716 #define GM107_MAX_ISSUE_DELAY 0xf
3717 
3718 int
calcDelay(const Instruction * insn,int cycle) const3719 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3720 {
3721    int delay = 0, ready = cycle;
3722 
3723    for (int s = 0; insn->srcExists(s); ++s)
3724       checkRd(insn->getSrc(s), cycle, delay);
3725 
3726    // TODO: make use of getReadLatency()!
3727 
3728    return MAX2(delay, ready - cycle);
3729 }
3730 
3731 void
setDelay(Instruction * insn,int delay,const Instruction * next)3732 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3733                                    const Instruction *next)
3734 {
3735    const OpClass cl = targ->getOpClass(insn->op);
3736    int wr, rd;
3737 
3738    if (insn->op == OP_EXIT ||
3739        insn->op == OP_BAR ||
3740        insn->op == OP_MEMBAR) {
3741       delay = GM107_MAX_ISSUE_DELAY;
3742    } else
3743    if (insn->op == OP_QUADON ||
3744        insn->op == OP_QUADPOP) {
3745       delay = 0xd;
3746    } else
3747    if (cl == OPCLASS_FLOW || insn->join) {
3748       delay = 0xd;
3749    }
3750 
3751    if (!next || !targ->canDualIssue(insn, next)) {
3752       delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3753    } else {
3754       delay = 0x0; // dual-issue
3755    }
3756 
3757    wr = getWrDepBar(insn);
3758    rd = getRdDepBar(insn);
3759 
3760    if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3761       // Barriers take one additional clock cycle to become active on top of
3762       // the clock consumed by the instruction producing it.
3763       if (!next || insn->bb != next->bb) {
3764          delay = 0x2;
3765       } else {
3766          int wt = getWtDepBar(next);
3767          if ((wt & (1 << wr)) | (wt & (1 << rd)))
3768             delay = 0x2;
3769       }
3770    }
3771 
3772    emitStall(insn, delay);
3773 }
3774 
3775 
3776 // Return true when the given instruction needs to emit a read dependency
3777 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3778 // setting the maximum number of stall counts is not enough.
3779 bool
needRdDepBar(const Instruction * insn) const3780 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3781 {
3782    BitSet srcs(255, 1), defs(255, 1);
3783    int a, b;
3784 
3785    if (!targ->isBarrierRequired(insn))
3786       return false;
3787 
3788    // Do not emit a read dependency barrier when the instruction doesn't use
3789    // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3790    for (int s = 0; insn->srcExists(s); ++s) {
3791       const Value *src = insn->src(s).rep();
3792       if (insn->src(s).getFile() != FILE_GPR)
3793          continue;
3794       if (src->reg.data.id == 255)
3795          continue;
3796 
3797       a = src->reg.data.id;
3798       b = a + src->reg.size / 4;
3799       for (int r = a; r < b; ++r)
3800          srcs.set(r);
3801    }
3802 
3803    if (!srcs.popCount())
3804       return false;
3805 
3806    // Do not emit a read dependency barrier when the output GPRs are equal to
3807    // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3808    // be produced and WaR hazards are prevented.
3809    for (int d = 0; insn->defExists(d); ++d) {
3810       const Value *def = insn->def(d).rep();
3811       if (insn->def(d).getFile() != FILE_GPR)
3812          continue;
3813       if (def->reg.data.id == 255)
3814          continue;
3815 
3816       a = def->reg.data.id;
3817       b = a + def->reg.size / 4;
3818       for (int r = a; r < b; ++r)
3819          defs.set(r);
3820    }
3821 
3822    srcs.andNot(defs);
3823    if (!srcs.popCount())
3824       return false;
3825 
3826    return true;
3827 }
3828 
3829 // Return true when the given instruction needs to emit a write dependency
3830 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3831 // setting the maximum number of stall counts is not enough. This is only legal
3832 // if the instruction output something.
3833 bool
needWrDepBar(const Instruction * insn) const3834 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3835 {
3836    if (!targ->isBarrierRequired(insn))
3837       return false;
3838 
3839    for (int d = 0; insn->defExists(d); ++d) {
3840       if (insn->def(d).getFile() == FILE_GPR ||
3841           insn->def(d).getFile() == FILE_PREDICATE)
3842          return true;
3843    }
3844    return false;
3845 }
3846 
3847 // Find the next instruction inside the same basic block which uses the output
3848 // of the given instruction in order to avoid RaW hazards.
3849 Instruction *
findFirstUse(const Instruction * bari) const3850 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3851 {
3852    Instruction *insn, *next;
3853    int minGPR, maxGPR;
3854 
3855    if (!bari->defExists(0))
3856       return NULL;
3857 
3858    minGPR = bari->def(0).rep()->reg.data.id;
3859    maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3860 
3861    for (insn = bari->next; insn != NULL; insn = next) {
3862       next = insn->next;
3863 
3864       for (int s = 0; insn->srcExists(s); ++s) {
3865          const Value *src = insn->src(s).rep();
3866          if (bari->def(0).getFile() == FILE_GPR) {
3867             if (insn->src(s).getFile() != FILE_GPR ||
3868                 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3869                 src->reg.data.id > maxGPR)
3870                continue;
3871             return insn;
3872          } else
3873          if (bari->def(0).getFile() == FILE_PREDICATE) {
3874             if (insn->src(s).getFile() != FILE_PREDICATE ||
3875                 src->reg.data.id != minGPR)
3876                continue;
3877             return insn;
3878          }
3879       }
3880    }
3881    return NULL;
3882 }
3883 
3884 // Find the next instruction inside the same basic block which overwrites, at
3885 // least, one source of the given instruction in order to avoid WaR hazards.
3886 Instruction *
findFirstDef(const Instruction * bari) const3887 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3888 {
3889    Instruction *insn, *next;
3890    int minGPR, maxGPR;
3891 
3892    for (insn = bari->next; insn != NULL; insn = next) {
3893       next = insn->next;
3894 
3895       for (int d = 0; insn->defExists(d); ++d) {
3896          const Value *def = insn->def(d).rep();
3897          if (insn->def(d).getFile() != FILE_GPR)
3898             continue;
3899 
3900          minGPR = def->reg.data.id;
3901          maxGPR = minGPR + def->reg.size / 4 - 1;
3902 
3903          for (int s = 0; bari->srcExists(s); ++s) {
3904             const Value *src = bari->src(s).rep();
3905             if (bari->src(s).getFile() != FILE_GPR ||
3906                 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3907                 src->reg.data.id > maxGPR)
3908                continue;
3909             return insn;
3910          }
3911       }
3912    }
3913    return NULL;
3914 }
3915 
3916 // Dependency barriers:
3917 // This pass is a bit ugly and could probably be improved by performing a
3918 // better allocation.
3919 //
3920 // The main idea is to avoid WaR and RaW hazards by emitting read/write
3921 // dependency barriers using the control codes.
3922 bool
insertBarriers(BasicBlock * bb)3923 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
3924 {
3925    std::list<LiveBarUse> live_uses;
3926    std::list<LiveBarDef> live_defs;
3927    Instruction *insn, *next;
3928    BitSet bars(6, 1);
3929    int bar_id;
3930 
3931    for (insn = bb->getEntry(); insn != NULL; insn = next) {
3932       Instruction *usei = NULL, *defi = NULL;
3933       bool need_wr_bar, need_rd_bar;
3934 
3935       next = insn->next;
3936 
3937       // Expire old barrier uses.
3938       for (std::list<LiveBarUse>::iterator it = live_uses.begin();
3939            it != live_uses.end();) {
3940          if (insn->serial >= it->usei->serial) {
3941             int wr = getWrDepBar(it->insn);
3942             emitWtDepBar(insn, wr);
3943             bars.clr(wr); // free barrier
3944             it = live_uses.erase(it);
3945             continue;
3946          }
3947          ++it;
3948       }
3949 
3950       // Expire old barrier defs.
3951       for (std::list<LiveBarDef>::iterator it = live_defs.begin();
3952            it != live_defs.end();) {
3953          if (insn->serial >= it->defi->serial) {
3954             int rd = getRdDepBar(it->insn);
3955             emitWtDepBar(insn, rd);
3956             bars.clr(rd); // free barrier
3957             it = live_defs.erase(it);
3958             continue;
3959          }
3960          ++it;
3961       }
3962 
3963       need_wr_bar = needWrDepBar(insn);
3964       need_rd_bar = needRdDepBar(insn);
3965 
3966       if (need_wr_bar) {
3967          // When the instruction requires to emit a write dependency barrier
3968          // (all which write something at a variable latency), find the next
3969          // instruction which reads the outputs.
3970          usei = findFirstUse(insn);
3971 
3972          // Allocate and emit a new barrier.
3973          bar_id = bars.findFreeRange(1);
3974          if (bar_id == -1)
3975             bar_id = 5;
3976          bars.set(bar_id);
3977          emitWrDepBar(insn, bar_id);
3978          if (usei)
3979             live_uses.push_back(LiveBarUse(insn, usei));
3980       }
3981 
3982       if (need_rd_bar) {
3983          // When the instruction requires to emit a read dependency barrier
3984          // (all which read something at a variable latency), find the next
3985          // instruction which will write the inputs.
3986          defi = findFirstDef(insn);
3987 
3988          if (usei && defi && usei->serial <= defi->serial)
3989             continue;
3990 
3991          // Allocate and emit a new barrier.
3992          bar_id = bars.findFreeRange(1);
3993          if (bar_id == -1)
3994             bar_id = 5;
3995          bars.set(bar_id);
3996          emitRdDepBar(insn, bar_id);
3997          if (defi)
3998             live_defs.push_back(LiveBarDef(insn, defi));
3999       }
4000    }
4001 
4002    // Remove unnecessary barrier waits.
4003    BitSet alive_bars(6, 1);
4004    for (insn = bb->getEntry(); insn != NULL; insn = next) {
4005       int wr, rd, wt;
4006 
4007       next = insn->next;
4008 
4009       wr = getWrDepBar(insn);
4010       rd = getRdDepBar(insn);
4011       wt = getWtDepBar(insn);
4012 
4013       for (int idx = 0; idx < 6; ++idx) {
4014          if (!(wt & (1 << idx)))
4015             continue;
4016          if (!alive_bars.test(idx)) {
4017             insn->sched &= ~(1 << (11  + idx));
4018          } else {
4019             alive_bars.clr(idx);
4020          }
4021       }
4022 
4023       if (wr < 6)
4024          alive_bars.set(wr);
4025       if (rd < 6)
4026          alive_bars.set(rd);
4027    }
4028 
4029    return true;
4030 }
4031 
4032 bool
visit(Function * func)4033 SchedDataCalculatorGM107::visit(Function *func)
4034 {
4035    ArrayList insns;
4036 
4037    func->orderInstructions(insns);
4038 
4039    scoreBoards.resize(func->cfg.getSize());
4040    for (size_t i = 0; i < scoreBoards.size(); ++i)
4041       scoreBoards[i].wipe();
4042    return true;
4043 }
4044 
4045 bool
visit(BasicBlock * bb)4046 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4047 {
4048    Instruction *insn, *next = NULL;
4049    int cycle = 0;
4050 
4051    for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4052       /*XXX*/
4053       insn->sched = 0x7e0;
4054    }
4055 
4056    if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4057       return true;
4058 
4059    // Insert read/write dependency barriers for instructions which don't
4060    // operate at a fixed latency.
4061    insertBarriers(bb);
4062 
4063    score = &scoreBoards.at(bb->getId());
4064 
4065    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4066       // back branches will wait until all target dependencies are satisfied
4067       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4068          continue;
4069       BasicBlock *in = BasicBlock::get(ei.getNode());
4070       score->setMax(&scoreBoards.at(in->getId()));
4071    }
4072 
4073 #ifdef GM107_DEBUG_SCHED_DATA
4074    INFO("=== BB:%i initial scores\n", bb->getId());
4075    score->print(cycle);
4076 #endif
4077 
4078    // Because barriers are allocated locally (intra-BB), we have to make sure
4079    // that all produced barriers have been consumed before entering inside a
4080    // new basic block. The best way is to do a global allocation pre RA but
4081    // it's really more difficult, especially because of the phi nodes. Anyways,
4082    // it seems like that waiting on a barrier which has already been consumed
4083    // doesn't add any additional cost, it's just not elegant!
4084    Instruction *start = bb->getEntry();
4085    if (start && bb->cfg.incidentCount() > 0) {
4086       for (int b = 0; b < 6; b++)
4087          emitWtDepBar(start, b);
4088    }
4089 
4090    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4091       next = insn->next;
4092 
4093       commitInsn(insn, cycle);
4094       int delay = calcDelay(next, cycle);
4095       setDelay(insn, delay, next);
4096       cycle += getStall(insn);
4097 
4098       setReuseFlag(insn);
4099 
4100       // XXX: The yield flag seems to destroy a bunch of things when it is
4101       // set on every instruction, need investigation.
4102       //emitYield(insn);
4103 
4104 #ifdef GM107_DEBUG_SCHED_DATA
4105       printSchedInfo(cycle, insn);
4106       insn->print();
4107       next->print();
4108 #endif
4109    }
4110 
4111    if (!insn)
4112       return true;
4113    commitInsn(insn, cycle);
4114 
4115    int bbDelay = -1;
4116 
4117 #ifdef GM107_DEBUG_SCHED_DATA
4118    fprintf(stderr, "last instruction is : ");
4119    insn->print();
4120    fprintf(stderr, "cycle=%d\n", cycle);
4121 #endif
4122 
4123    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4124       BasicBlock *out = BasicBlock::get(ei.getNode());
4125 
4126       if (ei.getType() != Graph::Edge::BACK) {
4127          // Only test the first instruction of the outgoing block.
4128          next = out->getEntry();
4129          if (next) {
4130             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4131          } else {
4132             // When the outgoing BB is empty, make sure to set the number of
4133             // stall counts needed by the instruction because we don't know the
4134             // next instruction.
4135             bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4136          }
4137       } else {
4138          // Wait until all dependencies are satisfied.
4139          const int regsFree = score->getLatest();
4140          next = out->getFirst();
4141          for (int c = cycle; next && c < regsFree; next = next->next) {
4142             bbDelay = MAX2(bbDelay, calcDelay(next, c));
4143             c += getStall(next);
4144          }
4145          next = NULL;
4146       }
4147    }
4148    if (bb->cfg.outgoingCount() != 1)
4149       next = NULL;
4150    setDelay(insn, bbDelay, next);
4151    cycle += getStall(insn);
4152 
4153    score->rebase(cycle); // common base for initializing out blocks' scores
4154    return true;
4155 }
4156 
4157 /*******************************************************************************
4158  * main
4159  ******************************************************************************/
4160 
4161 void
prepareEmission(Function * func)4162 CodeEmitterGM107::prepareEmission(Function *func)
4163 {
4164    SchedDataCalculatorGM107 sched(targGM107);
4165    CodeEmitter::prepareEmission(func);
4166    sched.run(func, true, true);
4167 }
4168 
sizeToBundlesGM107(uint32_t size)4169 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4170 {
4171    return (size + 23) / 24;
4172 }
4173 
4174 void
prepareEmission(Program * prog)4175 CodeEmitterGM107::prepareEmission(Program *prog)
4176 {
4177    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4178         !fi.end(); fi.next()) {
4179       Function *func = reinterpret_cast<Function *>(fi.get());
4180       func->binPos = prog->binSize;
4181       prepareEmission(func);
4182 
4183       // adjust sizes & positions for schedulding info:
4184       if (prog->getTarget()->hasSWSched) {
4185          uint32_t adjPos = func->binPos;
4186          BasicBlock *bb = NULL;
4187          for (int i = 0; i < func->bbCount; ++i) {
4188             bb = func->bbArray[i];
4189             int32_t adjSize = bb->binSize;
4190             if (adjPos % 32) {
4191                adjSize -= 32 - adjPos % 32;
4192                if (adjSize < 0)
4193                   adjSize = 0;
4194             }
4195             adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4196             bb->binPos = adjPos;
4197             bb->binSize = adjSize;
4198             adjPos += adjSize;
4199          }
4200          if (bb)
4201             func->binSize = adjPos - func->binPos;
4202       }
4203 
4204       prog->binSize += func->binSize;
4205    }
4206 }
4207 
CodeEmitterGM107(const TargetGM107 * target)4208 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4209    : CodeEmitter(target),
4210      targGM107(target),
4211      writeIssueDelays(target->hasSWSched)
4212 {
4213    code = NULL;
4214    codeSize = codeSizeLimit = 0;
4215    relocInfo = NULL;
4216 }
4217 
4218 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4219 TargetGM107::createCodeEmitterGM107(Program::Type type)
4220 {
4221    CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4222    emit->setProgramType(type);
4223    return emit;
4224 }
4225 
4226 } // namespace nv50_ir
4227