• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
25 
26 namespace nv50_ir {
27 
28 #define NV50_OP_ENC_LONG     0
29 #define NV50_OP_ENC_SHORT    1
30 #define NV50_OP_ENC_IMM      2
31 #define NV50_OP_ENC_LONG_ALT 3
32 
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36    CodeEmitterNV50(const TargetNV50 *);
37 
38    virtual bool emitInstruction(Instruction *);
39 
40    virtual uint32_t getMinEncodingSize(const Instruction *) const;
41 
setProgramType(Program::Type pType)42    inline void setProgramType(Program::Type pType) { progType = pType; }
43 
44    virtual void prepareEmission(Function *);
45 
46 private:
47    Program::Type progType;
48 
49    const TargetNV50 *targNV50;
50 
51 private:
52    inline void defId(const ValueDef&, const int pos);
53    inline void srcId(const ValueRef&, const int pos);
54    inline void srcId(const ValueRef *, const int pos);
55 
56    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57    inline void srcAddr8(const ValueRef&, const int pos);
58 
59    void emitFlagsRd(const Instruction *);
60    void emitFlagsWr(const Instruction *);
61 
62    void emitCondCode(CondCode cc, DataType ty, int pos);
63 
64    inline void setARegBits(unsigned int);
65 
66    void setAReg16(const Instruction *, int s);
67    void setImmediate(const Instruction *, int s);
68 
69    void setDst(const Value *);
70    void setDst(const Instruction *, int d);
71    void setSrcFileBits(const Instruction *, int enc);
72    void setSrc(const Instruction *, unsigned int s, int slot);
73 
74    void emitForm_MAD(const Instruction *);
75    void emitForm_ADD(const Instruction *);
76    void emitForm_MUL(const Instruction *);
77    void emitForm_IMM(const Instruction *);
78 
79    void emitLoadStoreSizeLG(DataType ty, int pos);
80    void emitLoadStoreSizeCS(DataType ty);
81 
82    void roundMode_MAD(const Instruction *);
83    void roundMode_CVT(RoundMode);
84 
85    void emitMNeg12(const Instruction *);
86 
87    void emitLOAD(const Instruction *);
88    void emitSTORE(const Instruction *);
89    void emitMOV(const Instruction *);
90    void emitRDSV(const Instruction *);
91    void emitNOP();
92    void emitINTERP(const Instruction *);
93    void emitPFETCH(const Instruction *);
94    void emitOUT(const Instruction *);
95 
96    void emitUADD(const Instruction *);
97    void emitAADD(const Instruction *);
98    void emitFADD(const Instruction *);
99    void emitDADD(const Instruction *);
100    void emitIMUL(const Instruction *);
101    void emitFMUL(const Instruction *);
102    void emitDMUL(const Instruction *);
103    void emitFMAD(const Instruction *);
104    void emitDMAD(const Instruction *);
105    void emitIMAD(const Instruction *);
106    void emitISAD(const Instruction *);
107 
108    void emitMINMAX(const Instruction *);
109 
110    void emitPreOp(const Instruction *);
111    void emitSFnOp(const Instruction *, uint8_t subOp);
112 
113    void emitShift(const Instruction *);
114    void emitARL(const Instruction *, unsigned int shl);
115    void emitLogicOp(const Instruction *);
116    void emitNOT(const Instruction *);
117 
118    void emitCVT(const Instruction *);
119    void emitSET(const Instruction *);
120 
121    void emitTEX(const TexInstruction *);
122    void emitTXQ(const TexInstruction *);
123    void emitTEXPREP(const TexInstruction *);
124 
125    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
126 
127    void emitFlow(const Instruction *, uint8_t flowOp);
128    void emitPRERETEmu(const FlowInstruction *);
129    void emitBAR(const Instruction *);
130 
131    void emitATOM(const Instruction *);
132 };
133 
134 #define SDATA(a) ((a).rep()->reg.data)
135 #define DDATA(a) ((a).rep()->reg.data)
136 
srcId(const ValueRef & src,const int pos)137 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
138 {
139    assert(src.get());
140    code[pos / 32] |= SDATA(src).id << (pos % 32);
141 }
142 
srcId(const ValueRef * src,const int pos)143 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
144 {
145    assert(src->get());
146    code[pos / 32] |= SDATA(*src).id << (pos % 32);
147 }
148 
srcAddr16(const ValueRef & src,bool adj,const int pos)149 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
150 {
151    assert(src.get());
152 
153    int32_t offset = SDATA(src).offset;
154 
155    assert(!adj || src.get()->reg.size <= 4);
156    if (adj)
157       offset /= src.get()->reg.size;
158 
159    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
160 
161    if (offset < 0)
162       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
163 
164    code[pos / 32] |= offset << (pos % 32);
165 }
166 
srcAddr8(const ValueRef & src,const int pos)167 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
168 {
169    assert(src.get());
170 
171    uint32_t offset = SDATA(src).offset;
172 
173    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
174 
175    code[pos / 32] |= (offset >> 2) << (pos % 32);
176 }
177 
defId(const ValueDef & def,const int pos)178 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
179 {
180    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
181 
182    code[pos / 32] |= DDATA(def).id << (pos % 32);
183 }
184 
185 void
roundMode_MAD(const Instruction * insn)186 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
187 {
188    switch (insn->rnd) {
189    case ROUND_M: code[1] |= 1 << 22; break;
190    case ROUND_P: code[1] |= 2 << 22; break;
191    case ROUND_Z: code[1] |= 3 << 22; break;
192    default:
193       assert(insn->rnd == ROUND_N);
194       break;
195    }
196 }
197 
198 void
emitMNeg12(const Instruction * i)199 CodeEmitterNV50::emitMNeg12(const Instruction *i)
200 {
201    code[1] |= i->src(0).mod.neg() << 26;
202    code[1] |= i->src(1).mod.neg() << 27;
203 }
204 
emitCondCode(CondCode cc,DataType ty,int pos)205 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
206 {
207    uint8_t enc;
208 
209    assert(pos >= 32 || pos <= 27);
210 
211    switch (cc) {
212    case CC_LT:  enc = 0x1; break;
213    case CC_LTU: enc = 0x9; break;
214    case CC_EQ:  enc = 0x2; break;
215    case CC_EQU: enc = 0xa; break;
216    case CC_LE:  enc = 0x3; break;
217    case CC_LEU: enc = 0xb; break;
218    case CC_GT:  enc = 0x4; break;
219    case CC_GTU: enc = 0xc; break;
220    case CC_NE:  enc = 0x5; break;
221    case CC_NEU: enc = 0xd; break;
222    case CC_GE:  enc = 0x6; break;
223    case CC_GEU: enc = 0xe; break;
224    case CC_TR:  enc = 0xf; break;
225    case CC_FL:  enc = 0x0; break;
226 
227    case CC_O:  enc = 0x10; break;
228    case CC_C:  enc = 0x11; break;
229    case CC_A:  enc = 0x12; break;
230    case CC_S:  enc = 0x13; break;
231    case CC_NS: enc = 0x1c; break;
232    case CC_NA: enc = 0x1d; break;
233    case CC_NC: enc = 0x1e; break;
234    case CC_NO: enc = 0x1f; break;
235 
236    default:
237       enc = 0;
238       assert(!"invalid condition code");
239       break;
240    }
241    if (ty != TYPE_NONE && !isFloatType(ty))
242       enc &= ~0x8; // unordered only exists for float types
243 
244    code[pos / 32] |= enc << (pos % 32);
245 }
246 
247 void
emitFlagsRd(const Instruction * i)248 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
249 {
250    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
251 
252    assert(!(code[1] & 0x00003f80));
253 
254    if (s >= 0) {
255       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
256       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
257       srcId(i->src(s), 32 + 12);
258    } else {
259       code[1] |= 0x0780;
260    }
261 }
262 
263 void
emitFlagsWr(const Instruction * i)264 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
265 {
266    assert(!(code[1] & 0x70));
267 
268    int flagsDef = i->flagsDef;
269 
270    // find flags definition and check that it is the last def
271    if (flagsDef < 0) {
272       for (int d = 0; i->defExists(d); ++d)
273          if (i->def(d).getFile() == FILE_FLAGS)
274             flagsDef = d;
275       if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
276          WARN("Instruction::flagsDef was not set properly\n");
277    }
278    if (flagsDef == 0 && i->defExists(1))
279       WARN("flags def should not be the primary definition\n");
280 
281    if (flagsDef >= 0)
282       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
283 
284 }
285 
286 void
setARegBits(unsigned int u)287 CodeEmitterNV50::setARegBits(unsigned int u)
288 {
289    code[0] |= (u & 3) << 26;
290    code[1] |= (u & 4);
291 }
292 
293 void
setAReg16(const Instruction * i,int s)294 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
295 {
296    if (i->srcExists(s)) {
297       s = i->src(s).indirect[0];
298       if (s >= 0)
299          setARegBits(SDATA(i->src(s)).id + 1);
300    }
301 }
302 
303 void
setImmediate(const Instruction * i,int s)304 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
305 {
306    const ImmediateValue *imm = i->src(s).get()->asImm();
307    assert(imm);
308 
309    uint32_t u = imm->reg.data.u32;
310 
311    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
312       u = ~u;
313 
314    code[1] |= 3;
315    code[0] |= (u & 0x3f) << 16;
316    code[1] |= (u >> 6) << 2;
317 }
318 
319 void
setDst(const Value * dst)320 CodeEmitterNV50::setDst(const Value *dst)
321 {
322    const Storage *reg = &dst->join->reg;
323 
324    assert(reg->file != FILE_ADDRESS);
325 
326    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
327       code[0] |= (127 << 2) | 1;
328       code[1] |= 8;
329    } else {
330       int id;
331       if (reg->file == FILE_SHADER_OUTPUT) {
332          code[1] |= 8;
333          id = reg->data.offset / 4;
334       } else {
335          id = reg->data.id;
336       }
337       code[0] |= id << 2;
338    }
339 }
340 
341 void
setDst(const Instruction * i,int d)342 CodeEmitterNV50::setDst(const Instruction *i, int d)
343 {
344    if (i->defExists(d)) {
345       setDst(i->getDef(d));
346    } else
347    if (!d) {
348       code[0] |= 0x01fc; // bit bucket
349       code[1] |= 0x0008;
350    }
351 }
352 
353 // 3 * 2 bits:
354 // 0: r
355 // 1: a/s
356 // 2: c
357 // 3: i
358 void
setSrcFileBits(const Instruction * i,int enc)359 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
360 {
361    uint8_t mode = 0;
362 
363    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
364       switch (i->src(s).getFile()) {
365       case FILE_GPR:
366          break;
367       case FILE_MEMORY_SHARED:
368       case FILE_SHADER_INPUT:
369          mode |= 1 << (s * 2);
370          break;
371       case FILE_MEMORY_CONST:
372          mode |= 2 << (s * 2);
373          break;
374       case FILE_IMMEDIATE:
375          mode |= 3 << (s * 2);
376          break;
377       default:
378          ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
379          assert(0);
380          break;
381       }
382    }
383    switch (mode) {
384    case 0x00: // rrr
385       break;
386    case 0x01: // arr/grr
387       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
388          code[0] |= 0x01800000;
389          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
390             code[1] |= 0x00200000;
391       } else {
392          if (enc == NV50_OP_ENC_SHORT)
393             code[0] |= 0x01000000;
394          else
395             code[1] |= 0x00200000;
396       }
397       break;
398    case 0x03: // irr
399       assert(i->op == OP_MOV);
400       return;
401    case 0x0c: // rir
402       break;
403    case 0x0d: // gir
404       assert(progType == Program::TYPE_GEOMETRY ||
405              progType == Program::TYPE_COMPUTE);
406       code[0] |= 0x01000000;
407       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
408          int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
409          assert(reg < 3);
410          code[0] |= (reg + 1) << 26;
411       }
412       break;
413    case 0x08: // rcr
414       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
415       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
416       break;
417    case 0x09: // acr/gcr
418       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
419          code[0] |= 0x01800000;
420       } else {
421          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
422          code[1] |= 0x00200000;
423       }
424       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
425       break;
426    case 0x20: // rrc
427       code[0] |= 0x01000000;
428       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
429       break;
430    case 0x21: // arc
431       code[0] |= 0x01000000;
432       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
433       assert(progType != Program::TYPE_GEOMETRY);
434       break;
435    default:
436       ERROR("not encodable: %x\n", mode);
437       assert(0);
438       break;
439    }
440    if (progType != Program::TYPE_COMPUTE)
441       return;
442 
443    if ((mode & 3) == 1) {
444       const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
445 
446       switch (i->sType) {
447       case TYPE_U8:
448          break;
449       case TYPE_U16:
450          code[0] |= 1 << pos;
451          break;
452       case TYPE_S16:
453          code[0] |= 2 << pos;
454          break;
455       default:
456          code[0] |= 3 << pos;
457          assert(i->getSrc(0)->reg.size == 4);
458          break;
459       }
460    }
461 }
462 
463 void
setSrc(const Instruction * i,unsigned int s,int slot)464 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
465 {
466    if (Target::operationSrcNr[i->op] <= s)
467       return;
468    const Storage *reg = &i->src(s).rep()->reg;
469 
470    unsigned int id = (reg->file == FILE_GPR) ?
471       reg->data.id :
472       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
473 
474    switch (slot) {
475    case 0: code[0] |= id << 9; break;
476    case 1: code[0] |= id << 16; break;
477    case 2: code[1] |= id << 14; break;
478    default:
479       assert(0);
480       break;
481    }
482 }
483 
484 // the default form:
485 //  - long instruction
486 //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
487 //  - address & flags
488 void
emitForm_MAD(const Instruction * i)489 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
490 {
491    assert(i->encSize == 8);
492    code[0] |= 1;
493 
494    emitFlagsRd(i);
495    emitFlagsWr(i);
496 
497    setDst(i, 0);
498 
499    setSrcFileBits(i, NV50_OP_ENC_LONG);
500    setSrc(i, 0, 0);
501    setSrc(i, 1, 1);
502    setSrc(i, 2, 2);
503 
504    if (i->getIndirect(0, 0)) {
505       assert(!i->srcExists(1) || !i->getIndirect(1, 0));
506       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
507       setAReg16(i, 0);
508    } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
509       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
510       setAReg16(i, 1);
511    } else {
512       setAReg16(i, 2);
513    }
514 }
515 
516 // like default form, but 2nd source in slot 2, and no 3rd source
517 void
emitForm_ADD(const Instruction * i)518 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
519 {
520    assert(i->encSize == 8);
521    code[0] |= 1;
522 
523    emitFlagsRd(i);
524    emitFlagsWr(i);
525 
526    setDst(i, 0);
527 
528    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
529    setSrc(i, 0, 0);
530    if (i->predSrc != 1)
531       setSrc(i, 1, 2);
532 
533    if (i->getIndirect(0, 0)) {
534       assert(!i->getIndirect(1, 0));
535       setAReg16(i, 0);
536    } else {
537       setAReg16(i, 1);
538    }
539 }
540 
541 // default short form (rr, ar, rc, gr)
542 void
emitForm_MUL(const Instruction * i)543 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
544 {
545    assert(i->encSize == 4 && !(code[0] & 1));
546    assert(i->defExists(0));
547    assert(!i->getPredicate());
548 
549    setDst(i, 0);
550 
551    setSrcFileBits(i, NV50_OP_ENC_SHORT);
552    setSrc(i, 0, 0);
553    setSrc(i, 1, 1);
554 }
555 
556 // usual immediate form
557 // - 1 to 3 sources where second is immediate (rir, gir)
558 // - no address or predicate possible
559 void
emitForm_IMM(const Instruction * i)560 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
561 {
562    assert(i->encSize == 8);
563    code[0] |= 1;
564 
565    assert(i->defExists(0) && i->srcExists(0));
566 
567    setDst(i, 0);
568 
569    setSrcFileBits(i, NV50_OP_ENC_IMM);
570    if (Target::operationSrcNr[i->op] > 1) {
571       setSrc(i, 0, 0);
572       setImmediate(i, 1);
573       // If there is another source, it has to be the same as the dest reg.
574    } else {
575       setImmediate(i, 0);
576    }
577 }
578 
579 void
emitLoadStoreSizeLG(DataType ty,int pos)580 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
581 {
582    uint8_t enc;
583 
584    switch (ty) {
585    case TYPE_F32: // fall through
586    case TYPE_S32: // fall through
587    case TYPE_U32:  enc = 0x6; break;
588    case TYPE_B128: enc = 0x5; break;
589    case TYPE_F64: // fall through
590    case TYPE_S64: // fall through
591    case TYPE_U64:  enc = 0x4; break;
592    case TYPE_S16:  enc = 0x3; break;
593    case TYPE_U16:  enc = 0x2; break;
594    case TYPE_S8:   enc = 0x1; break;
595    case TYPE_U8:   enc = 0x0; break;
596    default:
597       enc = 0;
598       assert(!"invalid load/store type");
599       break;
600    }
601    code[pos / 32] |= enc << (pos % 32);
602 }
603 
604 void
emitLoadStoreSizeCS(DataType ty)605 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
606 {
607    switch (ty) {
608    case TYPE_U8: break;
609    case TYPE_U16: code[1] |= 0x4000; break;
610    case TYPE_S16: code[1] |= 0x8000; break;
611    case TYPE_F32:
612    case TYPE_S32:
613    case TYPE_U32: code[1] |= 0xc000; break;
614    default:
615       assert(0);
616       break;
617    }
618 }
619 
620 void
emitLOAD(const Instruction * i)621 CodeEmitterNV50::emitLOAD(const Instruction *i)
622 {
623    DataFile sf = i->src(0).getFile();
624    MAYBE_UNUSED int32_t offset = i->getSrc(0)->reg.data.offset;
625 
626    switch (sf) {
627    case FILE_SHADER_INPUT:
628       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
629          code[0] = 0x11800001;
630       else
631          // use 'mov' where we can
632          code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
633       code[1] = 0x00200000 | (i->lanes << 14);
634       if (typeSizeof(i->dType) == 4)
635          code[1] |= 0x04000000;
636       break;
637    case FILE_MEMORY_SHARED:
638       if (targ->getChipset() >= 0x84) {
639          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
640          code[0] = 0x10000001;
641          code[1] = 0x40000000;
642 
643          if (typeSizeof(i->dType) == 4)
644             code[1] |= 0x04000000;
645 
646          emitLoadStoreSizeCS(i->sType);
647       } else {
648          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
649          code[0] = 0x10000001;
650          code[1] = 0x00200000 | (i->lanes << 14);
651          emitLoadStoreSizeCS(i->sType);
652       }
653       break;
654    case FILE_MEMORY_CONST:
655       code[0] = 0x10000001;
656       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
657       if (typeSizeof(i->dType) == 4)
658          code[1] |= 0x04000000;
659       emitLoadStoreSizeCS(i->sType);
660       break;
661    case FILE_MEMORY_LOCAL:
662       code[0] = 0xd0000001;
663       code[1] = 0x40000000;
664       break;
665    case FILE_MEMORY_GLOBAL:
666       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
667       code[1] = 0x80000000;
668       break;
669    default:
670       assert(!"invalid load source file");
671       break;
672    }
673    if (sf == FILE_MEMORY_LOCAL ||
674        sf == FILE_MEMORY_GLOBAL)
675       emitLoadStoreSizeLG(i->sType, 21 + 32);
676 
677    setDst(i, 0);
678 
679    emitFlagsRd(i);
680    emitFlagsWr(i);
681 
682    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
683       srcId(*i->src(0).getIndirect(0), 9);
684    } else {
685       setAReg16(i, 0);
686       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
687    }
688 }
689 
690 void
emitSTORE(const Instruction * i)691 CodeEmitterNV50::emitSTORE(const Instruction *i)
692 {
693    DataFile f = i->getSrc(0)->reg.file;
694    int32_t offset = i->getSrc(0)->reg.data.offset;
695 
696    switch (f) {
697    case FILE_SHADER_OUTPUT:
698       code[0] = 0x00000001 | ((offset >> 2) << 9);
699       code[1] = 0x80c00000;
700       srcId(i->src(1), 32 + 14);
701       break;
702    case FILE_MEMORY_GLOBAL:
703       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
704       code[1] = 0xa0000000;
705       emitLoadStoreSizeLG(i->dType, 21 + 32);
706       srcId(i->src(1), 2);
707       break;
708    case FILE_MEMORY_LOCAL:
709       code[0] = 0xd0000001;
710       code[1] = 0x60000000;
711       emitLoadStoreSizeLG(i->dType, 21 + 32);
712       srcId(i->src(1), 2);
713       break;
714    case FILE_MEMORY_SHARED:
715       code[0] = 0x00000001;
716       code[1] = 0xe0000000;
717       switch (typeSizeof(i->dType)) {
718       case 1:
719          code[0] |= offset << 9;
720          code[1] |= 0x00400000;
721          break;
722       case 2:
723          code[0] |= (offset >> 1) << 9;
724          break;
725       case 4:
726          code[0] |= (offset >> 2) << 9;
727          code[1] |= 0x04200000;
728          break;
729       default:
730          assert(0);
731          break;
732       }
733       srcId(i->src(1), 32 + 14);
734       break;
735    default:
736       assert(!"invalid store destination file");
737       break;
738    }
739 
740    if (f == FILE_MEMORY_GLOBAL)
741       srcId(*i->src(0).getIndirect(0), 9);
742    else
743       setAReg16(i, 0);
744 
745    if (f == FILE_MEMORY_LOCAL)
746       srcAddr16(i->src(0), false, 9);
747 
748    emitFlagsRd(i);
749 }
750 
751 void
emitMOV(const Instruction * i)752 CodeEmitterNV50::emitMOV(const Instruction *i)
753 {
754    DataFile sf = i->getSrc(0)->reg.file;
755    DataFile df = i->getDef(0)->reg.file;
756 
757    assert(sf == FILE_GPR || df == FILE_GPR);
758 
759    if (sf == FILE_FLAGS) {
760       assert(i->flagsSrc >= 0);
761       code[0] = 0x00000001;
762       code[1] = 0x20000000;
763       defId(i->def(0), 2);
764       emitFlagsRd(i);
765    } else
766    if (sf == FILE_ADDRESS) {
767       code[0] = 0x00000001;
768       code[1] = 0x40000000;
769       defId(i->def(0), 2);
770       setARegBits(SDATA(i->src(0)).id + 1);
771       emitFlagsRd(i);
772    } else
773    if (df == FILE_FLAGS) {
774       assert(i->flagsDef >= 0);
775       code[0] = 0x00000001;
776       code[1] = 0xa0000000;
777       srcId(i->src(0), 9);
778       emitFlagsRd(i);
779       emitFlagsWr(i);
780    } else
781    if (sf == FILE_IMMEDIATE) {
782       code[0] = 0x10008001;
783       code[1] = 0x00000003;
784       emitForm_IMM(i);
785    } else {
786       if (i->encSize == 4) {
787          code[0] = 0x10008000;
788       } else {
789          code[0] = 0x10000001;
790          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
791          code[1] |= (i->lanes << 14);
792          emitFlagsRd(i);
793       }
794       defId(i->def(0), 2);
795       srcId(i->src(0), 9);
796    }
797    if (df == FILE_SHADER_OUTPUT) {
798       assert(i->encSize == 8);
799       code[1] |= 0x8;
800    }
801 }
802 
getSRegEncoding(const ValueRef & ref)803 static inline uint8_t getSRegEncoding(const ValueRef &ref)
804 {
805    switch (SDATA(ref).sv.sv) {
806    case SV_PHYSID:        return 0;
807    case SV_CLOCK:         return 1;
808    case SV_VERTEX_STRIDE: return 3;
809 // case SV_PM_COUNTER:    return 4 + SDATA(ref).sv.index;
810    case SV_SAMPLE_INDEX:  return 8;
811    default:
812       assert(!"no sreg for system value");
813       return 0;
814    }
815 }
816 
817 void
emitRDSV(const Instruction * i)818 CodeEmitterNV50::emitRDSV(const Instruction *i)
819 {
820    code[0] = 0x00000001;
821    code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
822    defId(i->def(0), 2);
823    emitFlagsRd(i);
824 }
825 
826 void
emitNOP()827 CodeEmitterNV50::emitNOP()
828 {
829    code[0] = 0xf0000001;
830    code[1] = 0xe0000000;
831 }
832 
833 void
emitQUADOP(const Instruction * i,uint8_t lane,uint8_t quOp)834 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
835 {
836    code[0] = 0xc0000000 | (lane << 16);
837    code[1] = 0x80000000;
838 
839    code[0] |= (quOp & 0x03) << 20;
840    code[1] |= (quOp & 0xfc) << 20;
841 
842    emitForm_ADD(i);
843 
844    if (!i->srcExists(1) || i->predSrc == 1)
845       srcId(i->src(0), 32 + 14);
846 }
847 
848 /* NOTE: This returns the base address of a vertex inside the primitive.
849  * src0 is an immediate, the index (not offset) of the vertex
850  * inside the primitive. XXX: signed or unsigned ?
851  * src1 (may be NULL) should use whatever units the hardware requires
852  * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
853  */
854 void
emitPFETCH(const Instruction * i)855 CodeEmitterNV50::emitPFETCH(const Instruction *i)
856 {
857    const uint32_t prim = i->src(0).get()->reg.data.u32;
858    assert(prim <= 127);
859 
860    if (i->def(0).getFile() == FILE_ADDRESS) {
861       // shl $aX a[] 0
862       code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
863       code[1] = 0xc0200000;
864       code[0] |= prim << 9;
865       assert(!i->srcExists(1));
866    } else
867    if (i->srcExists(1)) {
868       // ld b32 $rX a[$aX+base]
869       code[0] = 0x00000001;
870       code[1] = 0x04200000 | (0xf << 14);
871       defId(i->def(0), 2);
872       code[0] |= prim << 9;
873       setARegBits(SDATA(i->src(1)).id + 1);
874    } else {
875       // mov b32 $rX a[]
876       code[0] = 0x10000001;
877       code[1] = 0x04200000 | (0xf << 14);
878       defId(i->def(0), 2);
879       code[0] |= prim << 9;
880    }
881    emitFlagsRd(i);
882 }
883 
884 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)885 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
886 {
887    int ipa = entry->ipa;
888    int encSize = entry->reg;
889    int loc = entry->loc;
890 
891    if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
892        (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
893       if (data.force_persample_interp) {
894          if (encSize == 8)
895             code[loc + 1] |= 1 << 16;
896          else
897             code[loc + 0] |= 1 << 24;
898       } else {
899          if (encSize == 8)
900             code[loc + 1] &= ~(1 << 16);
901          else
902             code[loc + 0] &= ~(1 << 24);
903       }
904    }
905 }
906 
907 void
emitINTERP(const Instruction * i)908 CodeEmitterNV50::emitINTERP(const Instruction *i)
909 {
910    code[0] = 0x80000000;
911 
912    defId(i->def(0), 2);
913    srcAddr8(i->src(0), 16);
914    setAReg16(i, 0);
915 
916    if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
917       code[0] |= 1 << 8;
918    } else {
919       if (i->op == OP_PINTERP) {
920          code[0] |= 1 << 25;
921          srcId(i->src(1), 9);
922       }
923       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
924          code[0] |= 1 << 24;
925    }
926 
927    if (i->encSize == 8) {
928       if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
929          code[1] = 4 << 16;
930       else
931          code[1] = (code[0] & (3 << 24)) >> (24 - 16);
932       code[0] &= ~0x03000000;
933       code[0] |= 1;
934       emitFlagsRd(i);
935    }
936 
937    addInterp(i->ipa, i->encSize, interpApply);
938 }
939 
940 void
emitMINMAX(const Instruction * i)941 CodeEmitterNV50::emitMINMAX(const Instruction *i)
942 {
943    if (i->dType == TYPE_F64) {
944       code[0] = 0xe0000000;
945       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
946    } else {
947       code[0] = 0x30000000;
948       code[1] = 0x80000000;
949       if (i->op == OP_MIN)
950          code[1] |= 0x20000000;
951 
952       switch (i->dType) {
953       case TYPE_F32: code[0] |= 0x80000000; break;
954       case TYPE_S32: code[1] |= 0x8c000000; break;
955       case TYPE_U32: code[1] |= 0x84000000; break;
956       case TYPE_S16: code[1] |= 0x80000000; break;
957       case TYPE_U16: break;
958       default:
959          assert(0);
960          break;
961       }
962    }
963 
964    code[1] |= i->src(0).mod.abs() << 20;
965    code[1] |= i->src(0).mod.neg() << 26;
966    code[1] |= i->src(1).mod.abs() << 19;
967    code[1] |= i->src(1).mod.neg() << 27;
968 
969    emitForm_MAD(i);
970 }
971 
972 void
emitFMAD(const Instruction * i)973 CodeEmitterNV50::emitFMAD(const Instruction *i)
974 {
975    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
976    const int neg_add = i->src(2).mod.neg();
977 
978    code[0] = 0xe0000000;
979 
980    if (i->src(1).getFile() == FILE_IMMEDIATE) {
981       code[1] = 0;
982       emitForm_IMM(i);
983       code[0] |= neg_mul << 15;
984       code[0] |= neg_add << 22;
985       if (i->saturate)
986          code[0] |= 1 << 8;
987    } else
988    if (i->encSize == 4) {
989       emitForm_MUL(i);
990       code[0] |= neg_mul << 15;
991       code[0] |= neg_add << 22;
992       if (i->saturate)
993          code[0] |= 1 << 8;
994    } else {
995       code[1]  = neg_mul << 26;
996       code[1] |= neg_add << 27;
997       if (i->saturate)
998          code[1] |= 1 << 29;
999       emitForm_MAD(i);
1000    }
1001 }
1002 
1003 void
emitDMAD(const Instruction * i)1004 CodeEmitterNV50::emitDMAD(const Instruction *i)
1005 {
1006    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1007    const int neg_add = i->src(2).mod.neg();
1008 
1009    assert(i->encSize == 8);
1010    assert(!i->saturate);
1011 
1012    code[1] = 0x40000000;
1013    code[0] = 0xe0000000;
1014 
1015    code[1] |= neg_mul << 26;
1016    code[1] |= neg_add << 27;
1017 
1018    roundMode_MAD(i);
1019 
1020    emitForm_MAD(i);
1021 }
1022 
1023 void
emitFADD(const Instruction * i)1024 CodeEmitterNV50::emitFADD(const Instruction *i)
1025 {
1026    const int neg0 = i->src(0).mod.neg();
1027    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1028 
1029    code[0] = 0xb0000000;
1030 
1031    assert(!(i->src(0).mod | i->src(1).mod).abs());
1032 
1033    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1034       code[1] = 0;
1035       emitForm_IMM(i);
1036       code[0] |= neg0 << 15;
1037       code[0] |= neg1 << 22;
1038       if (i->saturate)
1039          code[0] |= 1 << 8;
1040    } else
1041    if (i->encSize == 8) {
1042       code[1] = 0;
1043       emitForm_ADD(i);
1044       code[1] |= neg0 << 26;
1045       code[1] |= neg1 << 27;
1046       if (i->saturate)
1047          code[1] |= 1 << 29;
1048    } else {
1049       emitForm_MUL(i);
1050       code[0] |= neg0 << 15;
1051       code[0] |= neg1 << 22;
1052       if (i->saturate)
1053          code[0] |= 1 << 8;
1054    }
1055 }
1056 
1057 void
emitDADD(const Instruction * i)1058 CodeEmitterNV50::emitDADD(const Instruction *i)
1059 {
1060    const int neg0 = i->src(0).mod.neg();
1061    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1062 
1063    assert(!(i->src(0).mod | i->src(1).mod).abs());
1064    assert(!i->saturate);
1065    assert(i->encSize == 8);
1066 
1067    code[1] = 0x60000000;
1068    code[0] = 0xe0000000;
1069 
1070    emitForm_ADD(i);
1071 
1072    code[1] |= neg0 << 26;
1073    code[1] |= neg1 << 27;
1074 }
1075 
1076 void
emitUADD(const Instruction * i)1077 CodeEmitterNV50::emitUADD(const Instruction *i)
1078 {
1079    const int neg0 = i->src(0).mod.neg();
1080    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1081 
1082    code[0] = 0x20008000;
1083 
1084    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1085       code[1] = 0;
1086       emitForm_IMM(i);
1087    } else
1088    if (i->encSize == 8) {
1089       code[0] = 0x20000000;
1090       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1091       emitForm_ADD(i);
1092    } else {
1093       emitForm_MUL(i);
1094    }
1095    assert(!(neg0 && neg1));
1096    code[0] |= neg0 << 28;
1097    code[0] |= neg1 << 22;
1098 
1099    if (i->flagsSrc >= 0) {
1100       // addc == sub | subr
1101       assert(!(code[0] & 0x10400000) && !i->getPredicate());
1102       code[0] |= 0x10400000;
1103       srcId(i->src(i->flagsSrc), 32 + 12);
1104    }
1105 }
1106 
1107 void
emitAADD(const Instruction * i)1108 CodeEmitterNV50::emitAADD(const Instruction *i)
1109 {
1110    const int s = (i->op == OP_MOV) ? 0 : 1;
1111 
1112    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1113    code[1] = 0x20000000;
1114 
1115    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1116 
1117    emitFlagsRd(i);
1118 
1119    if (s && i->srcExists(0))
1120       setARegBits(SDATA(i->src(0)).id + 1);
1121 }
1122 
1123 void
emitIMUL(const Instruction * i)1124 CodeEmitterNV50::emitIMUL(const Instruction *i)
1125 {
1126    code[0] = 0x40000000;
1127 
1128    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1129       if (i->sType == TYPE_S16)
1130          code[0] |= 0x8100;
1131       code[1] = 0;
1132       emitForm_IMM(i);
1133    } else
1134    if (i->encSize == 8) {
1135       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1136       emitForm_MAD(i);
1137    } else {
1138       if (i->sType == TYPE_S16)
1139          code[0] |= 0x8100;
1140       emitForm_MUL(i);
1141    }
1142 }
1143 
1144 void
emitFMUL(const Instruction * i)1145 CodeEmitterNV50::emitFMUL(const Instruction *i)
1146 {
1147    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1148 
1149    code[0] = 0xc0000000;
1150 
1151    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1152       code[1] = 0;
1153       emitForm_IMM(i);
1154       if (neg)
1155          code[0] |= 0x8000;
1156       if (i->saturate)
1157          code[0] |= 1 << 8;
1158    } else
1159    if (i->encSize == 8) {
1160       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1161       if (neg)
1162          code[1] |= 0x08000000;
1163       if (i->saturate)
1164          code[1] |= 1 << 20;
1165       emitForm_MAD(i);
1166    } else {
1167       emitForm_MUL(i);
1168       if (neg)
1169          code[0] |= 0x8000;
1170       if (i->saturate)
1171          code[0] |= 1 << 8;
1172    }
1173 }
1174 
1175 void
emitDMUL(const Instruction * i)1176 CodeEmitterNV50::emitDMUL(const Instruction *i)
1177 {
1178    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1179 
1180    assert(!i->saturate);
1181    assert(i->encSize == 8);
1182 
1183    code[1] = 0x80000000;
1184    code[0] = 0xe0000000;
1185 
1186    if (neg)
1187       code[1] |= 0x08000000;
1188 
1189    roundMode_CVT(i->rnd);
1190 
1191    emitForm_MAD(i);
1192 }
1193 
1194 void
emitIMAD(const Instruction * i)1195 CodeEmitterNV50::emitIMAD(const Instruction *i)
1196 {
1197    int mode;
1198    code[0] = 0x60000000;
1199 
1200    assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
1201    if (!isSignedType(i->sType))
1202       mode = 0;
1203    else if (i->saturate)
1204       mode = 2;
1205    else
1206       mode = 1;
1207 
1208    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1209       code[1] = 0;
1210       emitForm_IMM(i);
1211       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1212       if (i->flagsSrc >= 0) {
1213          assert(!(code[0] & 0x10400000));
1214          assert(SDATA(i->src(i->flagsSrc)).id == 0);
1215          code[0] |= 0x10400000;
1216       }
1217    } else
1218    if (i->encSize == 4) {
1219       emitForm_MUL(i);
1220       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1221       if (i->flagsSrc >= 0) {
1222          assert(!(code[0] & 0x10400000));
1223          assert(SDATA(i->src(i->flagsSrc)).id == 0);
1224          code[0] |= 0x10400000;
1225       }
1226    } else {
1227       code[1] = mode << 29;
1228       emitForm_MAD(i);
1229 
1230       if (i->flagsSrc >= 0) {
1231          // add with carry from $cX
1232          assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1233          code[1] |= 0xc << 24;
1234          srcId(i->src(i->flagsSrc), 32 + 12);
1235       }
1236    }
1237 }
1238 
1239 void
emitISAD(const Instruction * i)1240 CodeEmitterNV50::emitISAD(const Instruction *i)
1241 {
1242    if (i->encSize == 8) {
1243       code[0] = 0x50000000;
1244       switch (i->sType) {
1245       case TYPE_U32: code[1] = 0x04000000; break;
1246       case TYPE_S32: code[1] = 0x0c000000; break;
1247       case TYPE_U16: code[1] = 0x00000000; break;
1248       case TYPE_S16: code[1] = 0x08000000; break;
1249       default:
1250          assert(0);
1251          break;
1252       }
1253       emitForm_MAD(i);
1254    } else {
1255       switch (i->sType) {
1256       case TYPE_U32: code[0] = 0x50008000; break;
1257       case TYPE_S32: code[0] = 0x50008100; break;
1258       case TYPE_U16: code[0] = 0x50000000; break;
1259       case TYPE_S16: code[0] = 0x50000100; break;
1260       default:
1261          assert(0);
1262          break;
1263       }
1264       emitForm_MUL(i);
1265    }
1266 }
1267 
1268 static void
alphatestSet(const FixupEntry * entry,uint32_t * code,const FixupData & data)1269 alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1270 {
1271    int loc = entry->loc;
1272    int enc;
1273 
1274    switch (data.alphatest) {
1275    case PIPE_FUNC_NEVER: enc = 0x0; break;
1276    case PIPE_FUNC_LESS: enc = 0x1; break;
1277    case PIPE_FUNC_EQUAL: enc = 0x2; break;
1278    case PIPE_FUNC_LEQUAL: enc = 0x3; break;
1279    case PIPE_FUNC_GREATER: enc = 0x4; break;
1280    case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
1281    case PIPE_FUNC_GEQUAL: enc = 0x6; break;
1282    default:
1283    case PIPE_FUNC_ALWAYS: enc = 0xf; break;
1284    }
1285 
1286    code[loc + 1] &= ~(0x1f << 14);
1287    code[loc + 1] |= enc << 14;
1288 }
1289 
1290 void
emitSET(const Instruction * i)1291 CodeEmitterNV50::emitSET(const Instruction *i)
1292 {
1293    code[0] = 0x30000000;
1294    code[1] = 0x60000000;
1295 
1296    switch (i->sType) {
1297    case TYPE_F64:
1298       code[0] = 0xe0000000;
1299       code[1] = 0xe0000000;
1300       break;
1301    case TYPE_F32: code[0] |= 0x80000000; break;
1302    case TYPE_S32: code[1] |= 0x0c000000; break;
1303    case TYPE_U32: code[1] |= 0x04000000; break;
1304    case TYPE_S16: code[1] |= 0x08000000; break;
1305    case TYPE_U16: break;
1306    default:
1307       assert(0);
1308       break;
1309    }
1310 
1311    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1312 
1313    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1314    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1315    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1316    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1317 
1318    emitForm_MAD(i);
1319 
1320    if (i->subOp == 1) {
1321       addInterp(0, 0, alphatestSet);
1322    }
1323 }
1324 
1325 void
roundMode_CVT(RoundMode rnd)1326 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1327 {
1328    switch (rnd) {
1329    case ROUND_NI: code[1] |= 0x08000000; break;
1330    case ROUND_M:  code[1] |= 0x00020000; break;
1331    case ROUND_MI: code[1] |= 0x08020000; break;
1332    case ROUND_P:  code[1] |= 0x00040000; break;
1333    case ROUND_PI: code[1] |= 0x08040000; break;
1334    case ROUND_Z:  code[1] |= 0x00060000; break;
1335    case ROUND_ZI: code[1] |= 0x08060000; break;
1336    default:
1337       assert(rnd == ROUND_N);
1338       break;
1339    }
1340 }
1341 
1342 void
emitCVT(const Instruction * i)1343 CodeEmitterNV50::emitCVT(const Instruction *i)
1344 {
1345    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1346    RoundMode rnd;
1347    DataType dType;
1348 
1349    switch (i->op) {
1350    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1351    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1352    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1353    default:
1354       rnd = i->rnd;
1355       break;
1356    }
1357 
1358    if (i->op == OP_NEG && i->dType == TYPE_U32)
1359       dType = TYPE_S32;
1360    else
1361       dType = i->dType;
1362 
1363    code[0] = 0xa0000000;
1364 
1365    switch (dType) {
1366    case TYPE_F64:
1367       switch (i->sType) {
1368       case TYPE_F64: code[1] = 0xc4404000; break;
1369       case TYPE_S64: code[1] = 0x44414000; break;
1370       case TYPE_U64: code[1] = 0x44404000; break;
1371       case TYPE_F32: code[1] = 0xc4400000; break;
1372       case TYPE_S32: code[1] = 0x44410000; break;
1373       case TYPE_U32: code[1] = 0x44400000; break;
1374       default:
1375          assert(0);
1376          break;
1377       }
1378       break;
1379    case TYPE_S64:
1380       switch (i->sType) {
1381       case TYPE_F64: code[1] = 0x8c404000; break;
1382       case TYPE_F32: code[1] = 0x8c400000; break;
1383       default:
1384          assert(0);
1385          break;
1386       }
1387       break;
1388    case TYPE_U64:
1389       switch (i->sType) {
1390       case TYPE_F64: code[1] = 0x84404000; break;
1391       case TYPE_F32: code[1] = 0x84400000; break;
1392       default:
1393          assert(0);
1394          break;
1395       }
1396       break;
1397    case TYPE_F32:
1398       switch (i->sType) {
1399       case TYPE_F64: code[1] = 0xc0404000; break;
1400       case TYPE_S64: code[1] = 0x40414000; break;
1401       case TYPE_U64: code[1] = 0x40404000; break;
1402       case TYPE_F32: code[1] = 0xc4004000; break;
1403       case TYPE_S32: code[1] = 0x44014000; break;
1404       case TYPE_U32: code[1] = 0x44004000; break;
1405       case TYPE_F16: code[1] = 0xc4000000; break;
1406       case TYPE_U16: code[1] = 0x44000000; break;
1407       default:
1408          assert(0);
1409          break;
1410       }
1411       break;
1412    case TYPE_S32:
1413       switch (i->sType) {
1414       case TYPE_F64: code[1] = 0x88404000; break;
1415       case TYPE_F32: code[1] = 0x8c004000; break;
1416       case TYPE_S32: code[1] = 0x0c014000; break;
1417       case TYPE_U32: code[1] = 0x0c004000; break;
1418       case TYPE_F16: code[1] = 0x8c000000; break;
1419       case TYPE_S16: code[1] = 0x0c010000; break;
1420       case TYPE_U16: code[1] = 0x0c000000; break;
1421       case TYPE_S8:  code[1] = 0x0c018000; break;
1422       case TYPE_U8:  code[1] = 0x0c008000; break;
1423       default:
1424          assert(0);
1425          break;
1426       }
1427       break;
1428    case TYPE_U32:
1429       switch (i->sType) {
1430       case TYPE_F64: code[1] = 0x80404000; break;
1431       case TYPE_F32: code[1] = 0x84004000; break;
1432       case TYPE_S32: code[1] = 0x04014000; break;
1433       case TYPE_U32: code[1] = 0x04004000; break;
1434       case TYPE_F16: code[1] = 0x84000000; break;
1435       case TYPE_S16: code[1] = 0x04010000; break;
1436       case TYPE_U16: code[1] = 0x04000000; break;
1437       case TYPE_S8:  code[1] = 0x04018000; break;
1438       case TYPE_U8:  code[1] = 0x04008000; break;
1439       default:
1440          assert(0);
1441          break;
1442       }
1443       break;
1444    case TYPE_S16:
1445    case TYPE_U16:
1446    case TYPE_S8:
1447    case TYPE_U8:
1448    default:
1449       assert(0);
1450       break;
1451    }
1452    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1453       code[1] |= 0x00004000;
1454 
1455    roundMode_CVT(rnd);
1456 
1457    switch (i->op) {
1458    case OP_ABS: code[1] |= 1 << 20; break;
1459    case OP_SAT: code[1] |= 1 << 19; break;
1460    case OP_NEG: code[1] |= 1 << 29; break;
1461    default:
1462       break;
1463    }
1464    code[1] ^= i->src(0).mod.neg() << 29;
1465    code[1] |= i->src(0).mod.abs() << 20;
1466    if (i->saturate)
1467       code[1] |= 1 << 19;
1468 
1469    assert(i->op != OP_ABS || !i->src(0).mod.neg());
1470 
1471    emitForm_MAD(i);
1472 }
1473 
1474 void
emitPreOp(const Instruction * i)1475 CodeEmitterNV50::emitPreOp(const Instruction *i)
1476 {
1477    code[0] = 0xb0000000;
1478    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1479 
1480    code[1] |= i->src(0).mod.abs() << 20;
1481    code[1] |= i->src(0).mod.neg() << 26;
1482 
1483    emitForm_MAD(i);
1484 }
1485 
1486 void
emitSFnOp(const Instruction * i,uint8_t subOp)1487 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1488 {
1489    code[0] = 0x90000000;
1490 
1491    if (i->encSize == 4) {
1492       assert(i->op == OP_RCP);
1493       assert(!i->saturate);
1494       code[0] |= i->src(0).mod.abs() << 15;
1495       code[0] |= i->src(0).mod.neg() << 22;
1496       emitForm_MUL(i);
1497    } else {
1498       code[1] = subOp << 29;
1499       code[1] |= i->src(0).mod.abs() << 20;
1500       code[1] |= i->src(0).mod.neg() << 26;
1501       if (i->saturate) {
1502          assert(subOp == 6 && i->op == OP_EX2);
1503          code[1] |= 1 << 27;
1504       }
1505       emitForm_MAD(i);
1506    }
1507 }
1508 
1509 void
emitNOT(const Instruction * i)1510 CodeEmitterNV50::emitNOT(const Instruction *i)
1511 {
1512    code[0] = 0xd0000000;
1513    code[1] = 0x0002c000;
1514 
1515    switch (i->sType) {
1516    case TYPE_U32:
1517    case TYPE_S32:
1518       code[1] |= 0x04000000;
1519       break;
1520    default:
1521       break;
1522    }
1523    emitForm_MAD(i);
1524    setSrc(i, 0, 1);
1525 }
1526 
1527 void
emitLogicOp(const Instruction * i)1528 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1529 {
1530    code[0] = 0xd0000000;
1531    code[1] = 0;
1532 
1533    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1534       switch (i->op) {
1535       case OP_OR:  code[0] |= 0x0100; break;
1536       case OP_XOR: code[0] |= 0x8000; break;
1537       default:
1538          assert(i->op == OP_AND);
1539          break;
1540       }
1541       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1542          code[0] |= 1 << 22;
1543 
1544       emitForm_IMM(i);
1545    } else {
1546       switch (i->op) {
1547       case OP_AND: code[1] = 0x04000000; break;
1548       case OP_OR:  code[1] = 0x04004000; break;
1549       case OP_XOR: code[1] = 0x04008000; break;
1550       default:
1551          assert(0);
1552          break;
1553       }
1554       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1555          code[1] |= 1 << 16;
1556       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1557          code[1] |= 1 << 17;
1558 
1559       emitForm_MAD(i);
1560    }
1561 }
1562 
1563 void
emitARL(const Instruction * i,unsigned int shl)1564 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1565 {
1566    code[0] = 0x00000001 | (shl << 16);
1567    code[1] = 0xc0000000;
1568 
1569    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1570 
1571    setSrcFileBits(i, NV50_OP_ENC_IMM);
1572    setSrc(i, 0, 0);
1573    emitFlagsRd(i);
1574 }
1575 
1576 void
emitShift(const Instruction * i)1577 CodeEmitterNV50::emitShift(const Instruction *i)
1578 {
1579    if (i->def(0).getFile() == FILE_ADDRESS) {
1580       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1581       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1582    } else {
1583       code[0] = 0x30000001;
1584       code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1585       if (i->op == OP_SHR && isSignedType(i->sType))
1586           code[1] |= 1 << 27;
1587 
1588       if (i->src(1).getFile() == FILE_IMMEDIATE) {
1589          code[1] |= 1 << 20;
1590          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1591          defId(i->def(0), 2);
1592          srcId(i->src(0), 9);
1593          emitFlagsRd(i);
1594       } else {
1595          emitForm_MAD(i);
1596       }
1597    }
1598 }
1599 
1600 void
emitOUT(const Instruction * i)1601 CodeEmitterNV50::emitOUT(const Instruction *i)
1602 {
1603    code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1604    code[1] = 0xc0000000;
1605 
1606    emitFlagsRd(i);
1607 }
1608 
1609 void
emitTEX(const TexInstruction * i)1610 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1611 {
1612    code[0] = 0xf0000001;
1613    code[1] = 0x00000000;
1614 
1615    switch (i->op) {
1616    case OP_TXB:
1617       code[1] = 0x20000000;
1618       break;
1619    case OP_TXL:
1620       code[1] = 0x40000000;
1621       break;
1622    case OP_TXF:
1623       code[0] |= 0x01000000;
1624       break;
1625    case OP_TXG:
1626       code[0] |= 0x01000000;
1627       code[1] = 0x80000000;
1628       break;
1629    case OP_TXLQ:
1630       code[1] = 0x60020000;
1631       break;
1632    default:
1633       assert(i->op == OP_TEX);
1634       break;
1635    }
1636 
1637    code[0] |= i->tex.r << 9;
1638    code[0] |= i->tex.s << 17;
1639 
1640    int argc = i->tex.target.getArgCount();
1641 
1642    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1643       argc += 1;
1644    if (i->tex.target.isShadow())
1645       argc += 1;
1646    assert(argc <= 4);
1647 
1648    code[0] |= (argc - 1) << 22;
1649 
1650    if (i->tex.target.isCube()) {
1651       code[0] |= 0x08000000;
1652    } else
1653    if (i->tex.useOffsets) {
1654       code[1] |= (i->tex.offset[0] & 0xf) << 24;
1655       code[1] |= (i->tex.offset[1] & 0xf) << 20;
1656       code[1] |= (i->tex.offset[2] & 0xf) << 16;
1657    }
1658 
1659    code[0] |= (i->tex.mask & 0x3) << 25;
1660    code[1] |= (i->tex.mask & 0xc) << 12;
1661 
1662    if (i->tex.liveOnly)
1663       code[1] |= 1 << 2;
1664    if (i->tex.derivAll)
1665       code[1] |= 1 << 3;
1666 
1667    defId(i->def(0), 2);
1668 
1669    emitFlagsRd(i);
1670 }
1671 
1672 void
emitTXQ(const TexInstruction * i)1673 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1674 {
1675    assert(i->tex.query == TXQ_DIMS);
1676 
1677    code[0] = 0xf0000001;
1678    code[1] = 0x60000000;
1679 
1680    code[0] |= i->tex.r << 9;
1681    code[0] |= i->tex.s << 17;
1682 
1683    code[0] |= (i->tex.mask & 0x3) << 25;
1684    code[1] |= (i->tex.mask & 0xc) << 12;
1685 
1686    defId(i->def(0), 2);
1687 
1688    emitFlagsRd(i);
1689 }
1690 
1691 void
emitTEXPREP(const TexInstruction * i)1692 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1693 {
1694    code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1695    code[1] = 0x60010000;
1696 
1697    code[0] |= (i->tex.mask & 0x3) << 25;
1698    code[1] |= (i->tex.mask & 0xc) << 12;
1699    defId(i->def(0), 2);
1700 
1701    emitFlagsRd(i);
1702 }
1703 
1704 void
emitPRERETEmu(const FlowInstruction * i)1705 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1706 {
1707    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1708 
1709    code[0] = 0x10000003; // bra
1710    code[1] = 0x00000780; // always
1711 
1712    switch (i->subOp) {
1713    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1714       break;
1715    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1716       pos += 8;
1717       break;
1718    default:
1719       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1720       code[0] = 0x20000003; // call
1721       code[1] = 0x00000000; // no predicate
1722       break;
1723    }
1724    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1725    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1726 }
1727 
1728 void
emitFlow(const Instruction * i,uint8_t flowOp)1729 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1730 {
1731    const FlowInstruction *f = i->asFlow();
1732    bool hasPred = false;
1733    bool hasTarg = false;
1734 
1735    code[0] = 0x00000003 | (flowOp << 28);
1736    code[1] = 0x00000000;
1737 
1738    switch (i->op) {
1739    case OP_BRA:
1740       hasPred = true;
1741       hasTarg = true;
1742       break;
1743    case OP_BREAK:
1744    case OP_BRKPT:
1745    case OP_DISCARD:
1746    case OP_RET:
1747       hasPred = true;
1748       break;
1749    case OP_CALL:
1750    case OP_PREBREAK:
1751    case OP_JOINAT:
1752       hasTarg = true;
1753       break;
1754    case OP_PRERET:
1755       hasTarg = true;
1756       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1757          emitPRERETEmu(f);
1758          return;
1759       }
1760       break;
1761    default:
1762       break;
1763    }
1764 
1765    if (hasPred)
1766       emitFlagsRd(i);
1767 
1768    if (hasTarg && f) {
1769       uint32_t pos;
1770 
1771       if (f->op == OP_CALL) {
1772          if (f->builtin) {
1773             pos = targNV50->getBuiltinOffset(f->target.builtin);
1774          } else {
1775             pos = f->target.fn->binPos;
1776          }
1777       } else {
1778          pos = f->target.bb->binPos;
1779       }
1780 
1781       code[0] |= ((pos >>  2) & 0xffff) << 11;
1782       code[1] |= ((pos >> 18) & 0x003f) << 14;
1783 
1784       RelocEntry::Type relocTy;
1785 
1786       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1787 
1788       addReloc(relocTy, 0, pos, 0x07fff800, 9);
1789       addReloc(relocTy, 1, pos, 0x000fc000, -4);
1790    }
1791 }
1792 
1793 void
emitBAR(const Instruction * i)1794 CodeEmitterNV50::emitBAR(const Instruction *i)
1795 {
1796    ImmediateValue *barId = i->getSrc(0)->asImm();
1797    assert(barId);
1798 
1799    code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1800    code[1] = 0x00004000;
1801 
1802    if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1803       code[0] |= 1 << 26;
1804 }
1805 
1806 void
emitATOM(const Instruction * i)1807 CodeEmitterNV50::emitATOM(const Instruction *i)
1808 {
1809    uint8_t subOp;
1810    switch (i->subOp) {
1811    case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
1812    case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
1813    case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
1814    case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
1815    case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
1816    case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
1817    case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
1818    case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
1819    case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
1820    case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1821    default:
1822       assert(!"invalid subop");
1823       return;
1824    }
1825    code[0] = 0xd0000001;
1826    code[1] = 0xe0c00000 | (subOp << 2);
1827    if (isSignedType(i->dType))
1828       code[1] |= 1 << 21;
1829 
1830    // args
1831    emitFlagsRd(i);
1832    setDst(i, 0);
1833    setSrc(i, 1, 1);
1834    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1835       setSrc(i, 2, 2);
1836 
1837    // g[] pointer
1838    code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1839    srcId(i->getIndirect(0, 0), 9);
1840 }
1841 
1842 bool
emitInstruction(Instruction * insn)1843 CodeEmitterNV50::emitInstruction(Instruction *insn)
1844 {
1845    if (!insn->encSize) {
1846       ERROR("skipping unencodable instruction: "); insn->print();
1847       return false;
1848    } else
1849    if (codeSize + insn->encSize > codeSizeLimit) {
1850       ERROR("code emitter output buffer too small\n");
1851       return false;
1852    }
1853 
1854    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1855       INFO("EMIT: "); insn->print();
1856    }
1857 
1858    switch (insn->op) {
1859    case OP_MOV:
1860       emitMOV(insn);
1861       break;
1862    case OP_EXIT:
1863    case OP_NOP:
1864    case OP_JOIN:
1865       emitNOP();
1866       break;
1867    case OP_VFETCH:
1868    case OP_LOAD:
1869       emitLOAD(insn);
1870       break;
1871    case OP_EXPORT:
1872    case OP_STORE:
1873       emitSTORE(insn);
1874       break;
1875    case OP_PFETCH:
1876       emitPFETCH(insn);
1877       break;
1878    case OP_RDSV:
1879       emitRDSV(insn);
1880       break;
1881    case OP_LINTERP:
1882    case OP_PINTERP:
1883       emitINTERP(insn);
1884       break;
1885    case OP_ADD:
1886    case OP_SUB:
1887       if (insn->dType == TYPE_F64)
1888          emitDADD(insn);
1889       else if (isFloatType(insn->dType))
1890          emitFADD(insn);
1891       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1892          emitAADD(insn);
1893       else
1894          emitUADD(insn);
1895       break;
1896    case OP_MUL:
1897       if (insn->dType == TYPE_F64)
1898          emitDMUL(insn);
1899       else if (isFloatType(insn->dType))
1900          emitFMUL(insn);
1901       else
1902          emitIMUL(insn);
1903       break;
1904    case OP_MAD:
1905    case OP_FMA:
1906       if (insn->dType == TYPE_F64)
1907          emitDMAD(insn);
1908       else if (isFloatType(insn->dType))
1909          emitFMAD(insn);
1910       else
1911          emitIMAD(insn);
1912       break;
1913    case OP_SAD:
1914       emitISAD(insn);
1915       break;
1916    case OP_NOT:
1917       emitNOT(insn);
1918       break;
1919    case OP_AND:
1920    case OP_OR:
1921    case OP_XOR:
1922       emitLogicOp(insn);
1923       break;
1924    case OP_SHL:
1925    case OP_SHR:
1926       emitShift(insn);
1927       break;
1928    case OP_SET:
1929       emitSET(insn);
1930       break;
1931    case OP_MIN:
1932    case OP_MAX:
1933       emitMINMAX(insn);
1934       break;
1935    case OP_CEIL:
1936    case OP_FLOOR:
1937    case OP_TRUNC:
1938    case OP_ABS:
1939    case OP_NEG:
1940    case OP_SAT:
1941       emitCVT(insn);
1942       break;
1943    case OP_CVT:
1944       if (insn->def(0).getFile() == FILE_ADDRESS)
1945          emitARL(insn, 0);
1946       else
1947       if (insn->def(0).getFile() == FILE_FLAGS ||
1948           insn->src(0).getFile() == FILE_FLAGS ||
1949           insn->src(0).getFile() == FILE_ADDRESS)
1950          emitMOV(insn);
1951       else
1952          emitCVT(insn);
1953       break;
1954    case OP_RCP:
1955       emitSFnOp(insn, 0);
1956       break;
1957    case OP_RSQ:
1958       emitSFnOp(insn, 2);
1959       break;
1960    case OP_LG2:
1961       emitSFnOp(insn, 3);
1962       break;
1963    case OP_SIN:
1964       emitSFnOp(insn, 4);
1965       break;
1966    case OP_COS:
1967       emitSFnOp(insn, 5);
1968       break;
1969    case OP_EX2:
1970       emitSFnOp(insn, 6);
1971       break;
1972    case OP_PRESIN:
1973    case OP_PREEX2:
1974       emitPreOp(insn);
1975       break;
1976    case OP_TEX:
1977    case OP_TXB:
1978    case OP_TXL:
1979    case OP_TXF:
1980    case OP_TXG:
1981    case OP_TXLQ:
1982       emitTEX(insn->asTex());
1983       break;
1984    case OP_TXQ:
1985       emitTXQ(insn->asTex());
1986       break;
1987    case OP_TEXPREP:
1988       emitTEXPREP(insn->asTex());
1989       break;
1990    case OP_EMIT:
1991    case OP_RESTART:
1992       emitOUT(insn);
1993       break;
1994    case OP_DISCARD:
1995       emitFlow(insn, 0x0);
1996       break;
1997    case OP_BRA:
1998       emitFlow(insn, 0x1);
1999       break;
2000    case OP_CALL:
2001       emitFlow(insn, 0x2);
2002       break;
2003    case OP_RET:
2004       emitFlow(insn, 0x3);
2005       break;
2006    case OP_PREBREAK:
2007       emitFlow(insn, 0x4);
2008       break;
2009    case OP_BREAK:
2010       emitFlow(insn, 0x5);
2011       break;
2012    case OP_QUADON:
2013       emitFlow(insn, 0x6);
2014       break;
2015    case OP_QUADPOP:
2016       emitFlow(insn, 0x7);
2017       break;
2018    case OP_JOINAT:
2019       emitFlow(insn, 0xa);
2020       break;
2021    case OP_PRERET:
2022       emitFlow(insn, 0xd);
2023       break;
2024    case OP_QUADOP:
2025       emitQUADOP(insn, insn->lanes, insn->subOp);
2026       break;
2027    case OP_DFDX:
2028       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
2029       break;
2030    case OP_DFDY:
2031       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
2032       break;
2033    case OP_ATOM:
2034       emitATOM(insn);
2035       break;
2036    case OP_BAR:
2037       emitBAR(insn);
2038       break;
2039    case OP_PHI:
2040    case OP_UNION:
2041    case OP_CONSTRAINT:
2042       ERROR("operation should have been eliminated\n");
2043       return false;
2044    case OP_EXP:
2045    case OP_LOG:
2046    case OP_SQRT:
2047    case OP_POW:
2048    case OP_SELP:
2049    case OP_SLCT:
2050    case OP_TXD:
2051    case OP_PRECONT:
2052    case OP_CONT:
2053    case OP_POPCNT:
2054    case OP_INSBF:
2055    case OP_EXTBF:
2056       ERROR("operation should have been lowered\n");
2057       return false;
2058    default:
2059       ERROR("unknown op: %u\n", insn->op);
2060       return false;
2061    }
2062    if (insn->join || insn->op == OP_JOIN)
2063       code[1] |= 0x2;
2064    else
2065    if (insn->exit || insn->op == OP_EXIT)
2066       code[1] |= 0x1;
2067 
2068    assert((insn->encSize == 8) == (code[0] & 1));
2069 
2070    code += insn->encSize / 4;
2071    codeSize += insn->encSize;
2072    return true;
2073 }
2074 
2075 uint32_t
getMinEncodingSize(const Instruction * i) const2076 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2077 {
2078    const Target::OpInfo &info = targ->getOpInfo(i);
2079 
2080    if (info.minEncSize > 4 || i->dType == TYPE_F64)
2081       return 8;
2082 
2083    // check constraints on dst and src operands
2084    for (int d = 0; i->defExists(d); ++d) {
2085       if (i->def(d).rep()->reg.data.id > 63 ||
2086           i->def(d).rep()->reg.file != FILE_GPR)
2087          return 8;
2088    }
2089 
2090    for (int s = 0; i->srcExists(s); ++s) {
2091       DataFile sf = i->src(s).getFile();
2092       if (sf != FILE_GPR)
2093          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2094             return 8;
2095       if (i->src(s).rep()->reg.data.id > 63)
2096          return 8;
2097    }
2098 
2099    // check modifiers & rounding
2100    if (i->join || i->lanes != 0xf || i->exit)
2101       return 8;
2102    if (i->op == OP_MUL && i->rnd != ROUND_N)
2103       return 8;
2104 
2105    if (i->asTex())
2106       return 8; // TODO: short tex encoding
2107 
2108    // check constraints on short MAD
2109    if (info.srcNr >= 2 && i->srcExists(2)) {
2110       if (!i->defExists(0) ||
2111           (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
2112           DDATA(i->def(0)).id != SDATA(i->src(2)).id)
2113          return 8;
2114    }
2115 
2116    return info.minEncSize;
2117 }
2118 
2119 // Change the encoding size of an instruction after BBs have been scheduled.
2120 static void
makeInstructionLong(Instruction * insn)2121 makeInstructionLong(Instruction *insn)
2122 {
2123    if (insn->encSize == 8)
2124       return;
2125    Function *fn = insn->bb->getFunction();
2126    int n = 0;
2127    int adj = 4;
2128 
2129    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2130 
2131    if (n & 1) {
2132       adj = 8;
2133       insn->next->encSize = 8;
2134    } else
2135    if (insn->prev && insn->prev->encSize == 4) {
2136       adj = 8;
2137       insn->prev->encSize = 8;
2138    }
2139    insn->encSize = 8;
2140 
2141    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2142       fn->bbArray[i]->binPos += adj;
2143    }
2144    fn->binSize += adj;
2145    insn->bb->binSize += adj;
2146 }
2147 
2148 static bool
trySetExitModifier(Instruction * insn)2149 trySetExitModifier(Instruction *insn)
2150 {
2151    if (insn->op == OP_DISCARD ||
2152        insn->op == OP_QUADON ||
2153        insn->op == OP_QUADPOP)
2154       return false;
2155    for (int s = 0; insn->srcExists(s); ++s)
2156       if (insn->src(s).getFile() == FILE_IMMEDIATE)
2157          return false;
2158    if (insn->asFlow()) {
2159       if (insn->op == OP_CALL) // side effects !
2160          return false;
2161       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2162          return false;
2163       insn->op = OP_EXIT;
2164    }
2165    insn->exit = 1;
2166    makeInstructionLong(insn);
2167    return true;
2168 }
2169 
2170 static void
replaceExitWithModifier(Function * func)2171 replaceExitWithModifier(Function *func)
2172 {
2173    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2174 
2175    if (!epilogue->getExit() ||
2176        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2177       return;
2178 
2179    if (epilogue->getEntry()->op != OP_EXIT) {
2180       Instruction *insn = epilogue->getExit()->prev;
2181       if (!insn || !trySetExitModifier(insn))
2182          return;
2183       insn->exit = 1;
2184    } else {
2185       for (Graph::EdgeIterator ei = func->cfgExit->incident();
2186            !ei.end(); ei.next()) {
2187          BasicBlock *bb = BasicBlock::get(ei.getNode());
2188          Instruction *i = bb->getExit();
2189 
2190          if (!i || !trySetExitModifier(i))
2191             return;
2192       }
2193    }
2194 
2195    int adj = epilogue->getExit()->encSize;
2196    epilogue->binSize -= adj;
2197    func->binSize -= adj;
2198    delete_Instruction(func->getProgram(), epilogue->getExit());
2199 
2200    // There may be BB's that are laid out after the exit block
2201    for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
2202       func->bbArray[i]->binPos -= adj;
2203    }
2204 }
2205 
2206 void
prepareEmission(Function * func)2207 CodeEmitterNV50::prepareEmission(Function *func)
2208 {
2209    CodeEmitter::prepareEmission(func);
2210 
2211    replaceExitWithModifier(func);
2212 }
2213 
CodeEmitterNV50(const TargetNV50 * target)2214 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
2215    CodeEmitter(target), targNV50(target)
2216 {
2217    targ = target; // specialized
2218    code = NULL;
2219    codeSize = codeSizeLimit = 0;
2220    relocInfo = NULL;
2221 }
2222 
2223 CodeEmitter *
getCodeEmitter(Program::Type type)2224 TargetNV50::getCodeEmitter(Program::Type type)
2225 {
2226    CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
2227    emit->setProgramType(type);
2228    return emit;
2229 }
2230 
2231 } // namespace nv50_ir
2232