1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
25
26 namespace nv50_ir {
27
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
32
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36 CodeEmitterNV50(const TargetNV50 *);
37
38 virtual bool emitInstruction(Instruction *);
39
40 virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
setProgramType(Program::Type pType)42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 virtual void prepareEmission(Function *);
45
46 private:
47 Program::Type progType;
48
49 const TargetNV50 *targNV50;
50
51 private:
52 inline void defId(const ValueDef&, const int pos);
53 inline void srcId(const ValueRef&, const int pos);
54 inline void srcId(const ValueRef *, const int pos);
55
56 inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57 inline void srcAddr8(const ValueRef&, const int pos);
58
59 void emitFlagsRd(const Instruction *);
60 void emitFlagsWr(const Instruction *);
61
62 void emitCondCode(CondCode cc, DataType ty, int pos);
63
64 inline void setARegBits(unsigned int);
65
66 void setAReg16(const Instruction *, int s);
67 void setImmediate(const Instruction *, int s);
68
69 void setDst(const Value *);
70 void setDst(const Instruction *, int d);
71 void setSrcFileBits(const Instruction *, int enc);
72 void setSrc(const Instruction *, unsigned int s, int slot);
73
74 void emitForm_MAD(const Instruction *);
75 void emitForm_ADD(const Instruction *);
76 void emitForm_MUL(const Instruction *);
77 void emitForm_IMM(const Instruction *);
78
79 void emitLoadStoreSizeLG(DataType ty, int pos);
80 void emitLoadStoreSizeCS(DataType ty);
81
82 void roundMode_MAD(const Instruction *);
83 void roundMode_CVT(RoundMode);
84
85 void emitMNeg12(const Instruction *);
86
87 void emitLOAD(const Instruction *);
88 void emitSTORE(const Instruction *);
89 void emitMOV(const Instruction *);
90 void emitRDSV(const Instruction *);
91 void emitNOP();
92 void emitINTERP(const Instruction *);
93 void emitPFETCH(const Instruction *);
94 void emitOUT(const Instruction *);
95
96 void emitUADD(const Instruction *);
97 void emitAADD(const Instruction *);
98 void emitFADD(const Instruction *);
99 void emitDADD(const Instruction *);
100 void emitIMUL(const Instruction *);
101 void emitFMUL(const Instruction *);
102 void emitDMUL(const Instruction *);
103 void emitFMAD(const Instruction *);
104 void emitDMAD(const Instruction *);
105 void emitIMAD(const Instruction *);
106 void emitISAD(const Instruction *);
107
108 void emitMINMAX(const Instruction *);
109
110 void emitPreOp(const Instruction *);
111 void emitSFnOp(const Instruction *, uint8_t subOp);
112
113 void emitShift(const Instruction *);
114 void emitARL(const Instruction *, unsigned int shl);
115 void emitLogicOp(const Instruction *);
116 void emitNOT(const Instruction *);
117
118 void emitCVT(const Instruction *);
119 void emitSET(const Instruction *);
120
121 void emitTEX(const TexInstruction *);
122 void emitTXQ(const TexInstruction *);
123 void emitTEXPREP(const TexInstruction *);
124
125 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
126
127 void emitFlow(const Instruction *, uint8_t flowOp);
128 void emitPRERETEmu(const FlowInstruction *);
129 void emitBAR(const Instruction *);
130
131 void emitATOM(const Instruction *);
132 };
133
134 #define SDATA(a) ((a).rep()->reg.data)
135 #define DDATA(a) ((a).rep()->reg.data)
136
srcId(const ValueRef & src,const int pos)137 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
138 {
139 assert(src.get());
140 code[pos / 32] |= SDATA(src).id << (pos % 32);
141 }
142
srcId(const ValueRef * src,const int pos)143 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
144 {
145 assert(src->get());
146 code[pos / 32] |= SDATA(*src).id << (pos % 32);
147 }
148
srcAddr16(const ValueRef & src,bool adj,const int pos)149 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
150 {
151 assert(src.get());
152
153 int32_t offset = SDATA(src).offset;
154
155 assert(!adj || src.get()->reg.size <= 4);
156 if (adj)
157 offset /= src.get()->reg.size;
158
159 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
160
161 if (offset < 0)
162 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
163
164 code[pos / 32] |= offset << (pos % 32);
165 }
166
srcAddr8(const ValueRef & src,const int pos)167 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
168 {
169 assert(src.get());
170
171 uint32_t offset = SDATA(src).offset;
172
173 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
174
175 code[pos / 32] |= (offset >> 2) << (pos % 32);
176 }
177
defId(const ValueDef & def,const int pos)178 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
179 {
180 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
181
182 code[pos / 32] |= DDATA(def).id << (pos % 32);
183 }
184
185 void
roundMode_MAD(const Instruction * insn)186 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
187 {
188 switch (insn->rnd) {
189 case ROUND_M: code[1] |= 1 << 22; break;
190 case ROUND_P: code[1] |= 2 << 22; break;
191 case ROUND_Z: code[1] |= 3 << 22; break;
192 default:
193 assert(insn->rnd == ROUND_N);
194 break;
195 }
196 }
197
198 void
emitMNeg12(const Instruction * i)199 CodeEmitterNV50::emitMNeg12(const Instruction *i)
200 {
201 code[1] |= i->src(0).mod.neg() << 26;
202 code[1] |= i->src(1).mod.neg() << 27;
203 }
204
emitCondCode(CondCode cc,DataType ty,int pos)205 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
206 {
207 uint8_t enc;
208
209 assert(pos >= 32 || pos <= 27);
210
211 switch (cc) {
212 case CC_LT: enc = 0x1; break;
213 case CC_LTU: enc = 0x9; break;
214 case CC_EQ: enc = 0x2; break;
215 case CC_EQU: enc = 0xa; break;
216 case CC_LE: enc = 0x3; break;
217 case CC_LEU: enc = 0xb; break;
218 case CC_GT: enc = 0x4; break;
219 case CC_GTU: enc = 0xc; break;
220 case CC_NE: enc = 0x5; break;
221 case CC_NEU: enc = 0xd; break;
222 case CC_GE: enc = 0x6; break;
223 case CC_GEU: enc = 0xe; break;
224 case CC_TR: enc = 0xf; break;
225 case CC_FL: enc = 0x0; break;
226
227 case CC_O: enc = 0x10; break;
228 case CC_C: enc = 0x11; break;
229 case CC_A: enc = 0x12; break;
230 case CC_S: enc = 0x13; break;
231 case CC_NS: enc = 0x1c; break;
232 case CC_NA: enc = 0x1d; break;
233 case CC_NC: enc = 0x1e; break;
234 case CC_NO: enc = 0x1f; break;
235
236 default:
237 enc = 0;
238 assert(!"invalid condition code");
239 break;
240 }
241 if (ty != TYPE_NONE && !isFloatType(ty))
242 enc &= ~0x8; // unordered only exists for float types
243
244 code[pos / 32] |= enc << (pos % 32);
245 }
246
247 void
emitFlagsRd(const Instruction * i)248 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
249 {
250 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
251
252 assert(!(code[1] & 0x00003f80));
253
254 if (s >= 0) {
255 assert(i->getSrc(s)->reg.file == FILE_FLAGS);
256 emitCondCode(i->cc, TYPE_NONE, 32 + 7);
257 srcId(i->src(s), 32 + 12);
258 } else {
259 code[1] |= 0x0780;
260 }
261 }
262
263 void
emitFlagsWr(const Instruction * i)264 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
265 {
266 assert(!(code[1] & 0x70));
267
268 int flagsDef = i->flagsDef;
269
270 // find flags definition and check that it is the last def
271 if (flagsDef < 0) {
272 for (int d = 0; i->defExists(d); ++d)
273 if (i->def(d).getFile() == FILE_FLAGS)
274 flagsDef = d;
275 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
276 WARN("Instruction::flagsDef was not set properly\n");
277 }
278 if (flagsDef == 0 && i->defExists(1))
279 WARN("flags def should not be the primary definition\n");
280
281 if (flagsDef >= 0)
282 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
283
284 }
285
286 void
setARegBits(unsigned int u)287 CodeEmitterNV50::setARegBits(unsigned int u)
288 {
289 code[0] |= (u & 3) << 26;
290 code[1] |= (u & 4);
291 }
292
293 void
setAReg16(const Instruction * i,int s)294 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
295 {
296 if (i->srcExists(s)) {
297 s = i->src(s).indirect[0];
298 if (s >= 0)
299 setARegBits(SDATA(i->src(s)).id + 1);
300 }
301 }
302
303 void
setImmediate(const Instruction * i,int s)304 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
305 {
306 const ImmediateValue *imm = i->src(s).get()->asImm();
307 assert(imm);
308
309 uint32_t u = imm->reg.data.u32;
310
311 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
312 u = ~u;
313
314 code[1] |= 3;
315 code[0] |= (u & 0x3f) << 16;
316 code[1] |= (u >> 6) << 2;
317 }
318
319 void
setDst(const Value * dst)320 CodeEmitterNV50::setDst(const Value *dst)
321 {
322 const Storage *reg = &dst->join->reg;
323
324 assert(reg->file != FILE_ADDRESS);
325
326 if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
327 code[0] |= (127 << 2) | 1;
328 code[1] |= 8;
329 } else {
330 int id;
331 if (reg->file == FILE_SHADER_OUTPUT) {
332 code[1] |= 8;
333 id = reg->data.offset / 4;
334 } else {
335 id = reg->data.id;
336 }
337 code[0] |= id << 2;
338 }
339 }
340
341 void
setDst(const Instruction * i,int d)342 CodeEmitterNV50::setDst(const Instruction *i, int d)
343 {
344 if (i->defExists(d)) {
345 setDst(i->getDef(d));
346 } else
347 if (!d) {
348 code[0] |= 0x01fc; // bit bucket
349 code[1] |= 0x0008;
350 }
351 }
352
353 // 3 * 2 bits:
354 // 0: r
355 // 1: a/s
356 // 2: c
357 // 3: i
358 void
setSrcFileBits(const Instruction * i,int enc)359 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
360 {
361 uint8_t mode = 0;
362
363 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
364 switch (i->src(s).getFile()) {
365 case FILE_GPR:
366 break;
367 case FILE_MEMORY_SHARED:
368 case FILE_SHADER_INPUT:
369 mode |= 1 << (s * 2);
370 break;
371 case FILE_MEMORY_CONST:
372 mode |= 2 << (s * 2);
373 break;
374 case FILE_IMMEDIATE:
375 mode |= 3 << (s * 2);
376 break;
377 default:
378 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
379 assert(0);
380 break;
381 }
382 }
383 switch (mode) {
384 case 0x00: // rrr
385 break;
386 case 0x01: // arr/grr
387 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
388 code[0] |= 0x01800000;
389 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
390 code[1] |= 0x00200000;
391 } else {
392 if (enc == NV50_OP_ENC_SHORT)
393 code[0] |= 0x01000000;
394 else
395 code[1] |= 0x00200000;
396 }
397 break;
398 case 0x03: // irr
399 assert(i->op == OP_MOV);
400 return;
401 case 0x0c: // rir
402 break;
403 case 0x0d: // gir
404 assert(progType == Program::TYPE_GEOMETRY ||
405 progType == Program::TYPE_COMPUTE);
406 code[0] |= 0x01000000;
407 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
408 int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
409 assert(reg < 3);
410 code[0] |= (reg + 1) << 26;
411 }
412 break;
413 case 0x08: // rcr
414 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
415 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
416 break;
417 case 0x09: // acr/gcr
418 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
419 code[0] |= 0x01800000;
420 } else {
421 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
422 code[1] |= 0x00200000;
423 }
424 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
425 break;
426 case 0x20: // rrc
427 code[0] |= 0x01000000;
428 code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
429 break;
430 case 0x21: // arc
431 code[0] |= 0x01000000;
432 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
433 assert(progType != Program::TYPE_GEOMETRY);
434 break;
435 default:
436 ERROR("not encodable: %x\n", mode);
437 assert(0);
438 break;
439 }
440 if (progType != Program::TYPE_COMPUTE)
441 return;
442
443 if ((mode & 3) == 1) {
444 const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
445
446 switch (i->sType) {
447 case TYPE_U8:
448 break;
449 case TYPE_U16:
450 code[0] |= 1 << pos;
451 break;
452 case TYPE_S16:
453 code[0] |= 2 << pos;
454 break;
455 default:
456 code[0] |= 3 << pos;
457 assert(i->getSrc(0)->reg.size == 4);
458 break;
459 }
460 }
461 }
462
463 void
setSrc(const Instruction * i,unsigned int s,int slot)464 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
465 {
466 if (Target::operationSrcNr[i->op] <= s)
467 return;
468 const Storage *reg = &i->src(s).rep()->reg;
469
470 unsigned int id = (reg->file == FILE_GPR) ?
471 reg->data.id :
472 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
473
474 switch (slot) {
475 case 0: code[0] |= id << 9; break;
476 case 1: code[0] |= id << 16; break;
477 case 2: code[1] |= id << 14; break;
478 default:
479 assert(0);
480 break;
481 }
482 }
483
484 // the default form:
485 // - long instruction
486 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
487 // - address & flags
488 void
emitForm_MAD(const Instruction * i)489 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
490 {
491 assert(i->encSize == 8);
492 code[0] |= 1;
493
494 emitFlagsRd(i);
495 emitFlagsWr(i);
496
497 setDst(i, 0);
498
499 setSrcFileBits(i, NV50_OP_ENC_LONG);
500 setSrc(i, 0, 0);
501 setSrc(i, 1, 1);
502 setSrc(i, 2, 2);
503
504 if (i->getIndirect(0, 0)) {
505 assert(!i->srcExists(1) || !i->getIndirect(1, 0));
506 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
507 setAReg16(i, 0);
508 } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
509 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
510 setAReg16(i, 1);
511 } else {
512 setAReg16(i, 2);
513 }
514 }
515
516 // like default form, but 2nd source in slot 2, and no 3rd source
517 void
emitForm_ADD(const Instruction * i)518 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
519 {
520 assert(i->encSize == 8);
521 code[0] |= 1;
522
523 emitFlagsRd(i);
524 emitFlagsWr(i);
525
526 setDst(i, 0);
527
528 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
529 setSrc(i, 0, 0);
530 if (i->predSrc != 1)
531 setSrc(i, 1, 2);
532
533 if (i->getIndirect(0, 0)) {
534 assert(!i->getIndirect(1, 0));
535 setAReg16(i, 0);
536 } else {
537 setAReg16(i, 1);
538 }
539 }
540
541 // default short form (rr, ar, rc, gr)
542 void
emitForm_MUL(const Instruction * i)543 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
544 {
545 assert(i->encSize == 4 && !(code[0] & 1));
546 assert(i->defExists(0));
547 assert(!i->getPredicate());
548
549 setDst(i, 0);
550
551 setSrcFileBits(i, NV50_OP_ENC_SHORT);
552 setSrc(i, 0, 0);
553 setSrc(i, 1, 1);
554 }
555
556 // usual immediate form
557 // - 1 to 3 sources where second is immediate (rir, gir)
558 // - no address or predicate possible
559 void
emitForm_IMM(const Instruction * i)560 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
561 {
562 assert(i->encSize == 8);
563 code[0] |= 1;
564
565 assert(i->defExists(0) && i->srcExists(0));
566
567 setDst(i, 0);
568
569 setSrcFileBits(i, NV50_OP_ENC_IMM);
570 if (Target::operationSrcNr[i->op] > 1) {
571 setSrc(i, 0, 0);
572 setImmediate(i, 1);
573 // If there is another source, it has to be the same as the dest reg.
574 } else {
575 setImmediate(i, 0);
576 }
577 }
578
579 void
emitLoadStoreSizeLG(DataType ty,int pos)580 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
581 {
582 uint8_t enc;
583
584 switch (ty) {
585 case TYPE_F32: // fall through
586 case TYPE_S32: // fall through
587 case TYPE_U32: enc = 0x6; break;
588 case TYPE_B128: enc = 0x5; break;
589 case TYPE_F64: // fall through
590 case TYPE_S64: // fall through
591 case TYPE_U64: enc = 0x4; break;
592 case TYPE_S16: enc = 0x3; break;
593 case TYPE_U16: enc = 0x2; break;
594 case TYPE_S8: enc = 0x1; break;
595 case TYPE_U8: enc = 0x0; break;
596 default:
597 enc = 0;
598 assert(!"invalid load/store type");
599 break;
600 }
601 code[pos / 32] |= enc << (pos % 32);
602 }
603
604 void
emitLoadStoreSizeCS(DataType ty)605 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
606 {
607 switch (ty) {
608 case TYPE_U8: break;
609 case TYPE_U16: code[1] |= 0x4000; break;
610 case TYPE_S16: code[1] |= 0x8000; break;
611 case TYPE_F32:
612 case TYPE_S32:
613 case TYPE_U32: code[1] |= 0xc000; break;
614 default:
615 assert(0);
616 break;
617 }
618 }
619
620 void
emitLOAD(const Instruction * i)621 CodeEmitterNV50::emitLOAD(const Instruction *i)
622 {
623 DataFile sf = i->src(0).getFile();
624 MAYBE_UNUSED int32_t offset = i->getSrc(0)->reg.data.offset;
625
626 switch (sf) {
627 case FILE_SHADER_INPUT:
628 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
629 code[0] = 0x11800001;
630 else
631 // use 'mov' where we can
632 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
633 code[1] = 0x00200000 | (i->lanes << 14);
634 if (typeSizeof(i->dType) == 4)
635 code[1] |= 0x04000000;
636 break;
637 case FILE_MEMORY_SHARED:
638 if (targ->getChipset() >= 0x84) {
639 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
640 code[0] = 0x10000001;
641 code[1] = 0x40000000;
642
643 if (typeSizeof(i->dType) == 4)
644 code[1] |= 0x04000000;
645
646 emitLoadStoreSizeCS(i->sType);
647 } else {
648 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
649 code[0] = 0x10000001;
650 code[1] = 0x00200000 | (i->lanes << 14);
651 emitLoadStoreSizeCS(i->sType);
652 }
653 break;
654 case FILE_MEMORY_CONST:
655 code[0] = 0x10000001;
656 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
657 if (typeSizeof(i->dType) == 4)
658 code[1] |= 0x04000000;
659 emitLoadStoreSizeCS(i->sType);
660 break;
661 case FILE_MEMORY_LOCAL:
662 code[0] = 0xd0000001;
663 code[1] = 0x40000000;
664 break;
665 case FILE_MEMORY_GLOBAL:
666 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
667 code[1] = 0x80000000;
668 break;
669 default:
670 assert(!"invalid load source file");
671 break;
672 }
673 if (sf == FILE_MEMORY_LOCAL ||
674 sf == FILE_MEMORY_GLOBAL)
675 emitLoadStoreSizeLG(i->sType, 21 + 32);
676
677 setDst(i, 0);
678
679 emitFlagsRd(i);
680 emitFlagsWr(i);
681
682 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
683 srcId(*i->src(0).getIndirect(0), 9);
684 } else {
685 setAReg16(i, 0);
686 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
687 }
688 }
689
690 void
emitSTORE(const Instruction * i)691 CodeEmitterNV50::emitSTORE(const Instruction *i)
692 {
693 DataFile f = i->getSrc(0)->reg.file;
694 int32_t offset = i->getSrc(0)->reg.data.offset;
695
696 switch (f) {
697 case FILE_SHADER_OUTPUT:
698 code[0] = 0x00000001 | ((offset >> 2) << 9);
699 code[1] = 0x80c00000;
700 srcId(i->src(1), 32 + 14);
701 break;
702 case FILE_MEMORY_GLOBAL:
703 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
704 code[1] = 0xa0000000;
705 emitLoadStoreSizeLG(i->dType, 21 + 32);
706 srcId(i->src(1), 2);
707 break;
708 case FILE_MEMORY_LOCAL:
709 code[0] = 0xd0000001;
710 code[1] = 0x60000000;
711 emitLoadStoreSizeLG(i->dType, 21 + 32);
712 srcId(i->src(1), 2);
713 break;
714 case FILE_MEMORY_SHARED:
715 code[0] = 0x00000001;
716 code[1] = 0xe0000000;
717 switch (typeSizeof(i->dType)) {
718 case 1:
719 code[0] |= offset << 9;
720 code[1] |= 0x00400000;
721 break;
722 case 2:
723 code[0] |= (offset >> 1) << 9;
724 break;
725 case 4:
726 code[0] |= (offset >> 2) << 9;
727 code[1] |= 0x04200000;
728 break;
729 default:
730 assert(0);
731 break;
732 }
733 srcId(i->src(1), 32 + 14);
734 break;
735 default:
736 assert(!"invalid store destination file");
737 break;
738 }
739
740 if (f == FILE_MEMORY_GLOBAL)
741 srcId(*i->src(0).getIndirect(0), 9);
742 else
743 setAReg16(i, 0);
744
745 if (f == FILE_MEMORY_LOCAL)
746 srcAddr16(i->src(0), false, 9);
747
748 emitFlagsRd(i);
749 }
750
751 void
emitMOV(const Instruction * i)752 CodeEmitterNV50::emitMOV(const Instruction *i)
753 {
754 DataFile sf = i->getSrc(0)->reg.file;
755 DataFile df = i->getDef(0)->reg.file;
756
757 assert(sf == FILE_GPR || df == FILE_GPR);
758
759 if (sf == FILE_FLAGS) {
760 assert(i->flagsSrc >= 0);
761 code[0] = 0x00000001;
762 code[1] = 0x20000000;
763 defId(i->def(0), 2);
764 emitFlagsRd(i);
765 } else
766 if (sf == FILE_ADDRESS) {
767 code[0] = 0x00000001;
768 code[1] = 0x40000000;
769 defId(i->def(0), 2);
770 setARegBits(SDATA(i->src(0)).id + 1);
771 emitFlagsRd(i);
772 } else
773 if (df == FILE_FLAGS) {
774 assert(i->flagsDef >= 0);
775 code[0] = 0x00000001;
776 code[1] = 0xa0000000;
777 srcId(i->src(0), 9);
778 emitFlagsRd(i);
779 emitFlagsWr(i);
780 } else
781 if (sf == FILE_IMMEDIATE) {
782 code[0] = 0x10008001;
783 code[1] = 0x00000003;
784 emitForm_IMM(i);
785 } else {
786 if (i->encSize == 4) {
787 code[0] = 0x10008000;
788 } else {
789 code[0] = 0x10000001;
790 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
791 code[1] |= (i->lanes << 14);
792 emitFlagsRd(i);
793 }
794 defId(i->def(0), 2);
795 srcId(i->src(0), 9);
796 }
797 if (df == FILE_SHADER_OUTPUT) {
798 assert(i->encSize == 8);
799 code[1] |= 0x8;
800 }
801 }
802
getSRegEncoding(const ValueRef & ref)803 static inline uint8_t getSRegEncoding(const ValueRef &ref)
804 {
805 switch (SDATA(ref).sv.sv) {
806 case SV_PHYSID: return 0;
807 case SV_CLOCK: return 1;
808 case SV_VERTEX_STRIDE: return 3;
809 // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
810 case SV_SAMPLE_INDEX: return 8;
811 default:
812 assert(!"no sreg for system value");
813 return 0;
814 }
815 }
816
817 void
emitRDSV(const Instruction * i)818 CodeEmitterNV50::emitRDSV(const Instruction *i)
819 {
820 code[0] = 0x00000001;
821 code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
822 defId(i->def(0), 2);
823 emitFlagsRd(i);
824 }
825
826 void
emitNOP()827 CodeEmitterNV50::emitNOP()
828 {
829 code[0] = 0xf0000001;
830 code[1] = 0xe0000000;
831 }
832
833 void
emitQUADOP(const Instruction * i,uint8_t lane,uint8_t quOp)834 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
835 {
836 code[0] = 0xc0000000 | (lane << 16);
837 code[1] = 0x80000000;
838
839 code[0] |= (quOp & 0x03) << 20;
840 code[1] |= (quOp & 0xfc) << 20;
841
842 emitForm_ADD(i);
843
844 if (!i->srcExists(1) || i->predSrc == 1)
845 srcId(i->src(0), 32 + 14);
846 }
847
848 /* NOTE: This returns the base address of a vertex inside the primitive.
849 * src0 is an immediate, the index (not offset) of the vertex
850 * inside the primitive. XXX: signed or unsigned ?
851 * src1 (may be NULL) should use whatever units the hardware requires
852 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
853 */
854 void
emitPFETCH(const Instruction * i)855 CodeEmitterNV50::emitPFETCH(const Instruction *i)
856 {
857 const uint32_t prim = i->src(0).get()->reg.data.u32;
858 assert(prim <= 127);
859
860 if (i->def(0).getFile() == FILE_ADDRESS) {
861 // shl $aX a[] 0
862 code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
863 code[1] = 0xc0200000;
864 code[0] |= prim << 9;
865 assert(!i->srcExists(1));
866 } else
867 if (i->srcExists(1)) {
868 // ld b32 $rX a[$aX+base]
869 code[0] = 0x00000001;
870 code[1] = 0x04200000 | (0xf << 14);
871 defId(i->def(0), 2);
872 code[0] |= prim << 9;
873 setARegBits(SDATA(i->src(1)).id + 1);
874 } else {
875 // mov b32 $rX a[]
876 code[0] = 0x10000001;
877 code[1] = 0x04200000 | (0xf << 14);
878 defId(i->def(0), 2);
879 code[0] |= prim << 9;
880 }
881 emitFlagsRd(i);
882 }
883
884 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)885 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
886 {
887 int ipa = entry->ipa;
888 int encSize = entry->reg;
889 int loc = entry->loc;
890
891 if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
892 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
893 if (data.force_persample_interp) {
894 if (encSize == 8)
895 code[loc + 1] |= 1 << 16;
896 else
897 code[loc + 0] |= 1 << 24;
898 } else {
899 if (encSize == 8)
900 code[loc + 1] &= ~(1 << 16);
901 else
902 code[loc + 0] &= ~(1 << 24);
903 }
904 }
905 }
906
907 void
emitINTERP(const Instruction * i)908 CodeEmitterNV50::emitINTERP(const Instruction *i)
909 {
910 code[0] = 0x80000000;
911
912 defId(i->def(0), 2);
913 srcAddr8(i->src(0), 16);
914 setAReg16(i, 0);
915
916 if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
917 code[0] |= 1 << 8;
918 } else {
919 if (i->op == OP_PINTERP) {
920 code[0] |= 1 << 25;
921 srcId(i->src(1), 9);
922 }
923 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
924 code[0] |= 1 << 24;
925 }
926
927 if (i->encSize == 8) {
928 if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
929 code[1] = 4 << 16;
930 else
931 code[1] = (code[0] & (3 << 24)) >> (24 - 16);
932 code[0] &= ~0x03000000;
933 code[0] |= 1;
934 emitFlagsRd(i);
935 }
936
937 addInterp(i->ipa, i->encSize, interpApply);
938 }
939
940 void
emitMINMAX(const Instruction * i)941 CodeEmitterNV50::emitMINMAX(const Instruction *i)
942 {
943 if (i->dType == TYPE_F64) {
944 code[0] = 0xe0000000;
945 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
946 } else {
947 code[0] = 0x30000000;
948 code[1] = 0x80000000;
949 if (i->op == OP_MIN)
950 code[1] |= 0x20000000;
951
952 switch (i->dType) {
953 case TYPE_F32: code[0] |= 0x80000000; break;
954 case TYPE_S32: code[1] |= 0x8c000000; break;
955 case TYPE_U32: code[1] |= 0x84000000; break;
956 case TYPE_S16: code[1] |= 0x80000000; break;
957 case TYPE_U16: break;
958 default:
959 assert(0);
960 break;
961 }
962 }
963
964 code[1] |= i->src(0).mod.abs() << 20;
965 code[1] |= i->src(0).mod.neg() << 26;
966 code[1] |= i->src(1).mod.abs() << 19;
967 code[1] |= i->src(1).mod.neg() << 27;
968
969 emitForm_MAD(i);
970 }
971
972 void
emitFMAD(const Instruction * i)973 CodeEmitterNV50::emitFMAD(const Instruction *i)
974 {
975 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
976 const int neg_add = i->src(2).mod.neg();
977
978 code[0] = 0xe0000000;
979
980 if (i->src(1).getFile() == FILE_IMMEDIATE) {
981 code[1] = 0;
982 emitForm_IMM(i);
983 code[0] |= neg_mul << 15;
984 code[0] |= neg_add << 22;
985 if (i->saturate)
986 code[0] |= 1 << 8;
987 } else
988 if (i->encSize == 4) {
989 emitForm_MUL(i);
990 code[0] |= neg_mul << 15;
991 code[0] |= neg_add << 22;
992 if (i->saturate)
993 code[0] |= 1 << 8;
994 } else {
995 code[1] = neg_mul << 26;
996 code[1] |= neg_add << 27;
997 if (i->saturate)
998 code[1] |= 1 << 29;
999 emitForm_MAD(i);
1000 }
1001 }
1002
1003 void
emitDMAD(const Instruction * i)1004 CodeEmitterNV50::emitDMAD(const Instruction *i)
1005 {
1006 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1007 const int neg_add = i->src(2).mod.neg();
1008
1009 assert(i->encSize == 8);
1010 assert(!i->saturate);
1011
1012 code[1] = 0x40000000;
1013 code[0] = 0xe0000000;
1014
1015 code[1] |= neg_mul << 26;
1016 code[1] |= neg_add << 27;
1017
1018 roundMode_MAD(i);
1019
1020 emitForm_MAD(i);
1021 }
1022
1023 void
emitFADD(const Instruction * i)1024 CodeEmitterNV50::emitFADD(const Instruction *i)
1025 {
1026 const int neg0 = i->src(0).mod.neg();
1027 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1028
1029 code[0] = 0xb0000000;
1030
1031 assert(!(i->src(0).mod | i->src(1).mod).abs());
1032
1033 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1034 code[1] = 0;
1035 emitForm_IMM(i);
1036 code[0] |= neg0 << 15;
1037 code[0] |= neg1 << 22;
1038 if (i->saturate)
1039 code[0] |= 1 << 8;
1040 } else
1041 if (i->encSize == 8) {
1042 code[1] = 0;
1043 emitForm_ADD(i);
1044 code[1] |= neg0 << 26;
1045 code[1] |= neg1 << 27;
1046 if (i->saturate)
1047 code[1] |= 1 << 29;
1048 } else {
1049 emitForm_MUL(i);
1050 code[0] |= neg0 << 15;
1051 code[0] |= neg1 << 22;
1052 if (i->saturate)
1053 code[0] |= 1 << 8;
1054 }
1055 }
1056
1057 void
emitDADD(const Instruction * i)1058 CodeEmitterNV50::emitDADD(const Instruction *i)
1059 {
1060 const int neg0 = i->src(0).mod.neg();
1061 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1062
1063 assert(!(i->src(0).mod | i->src(1).mod).abs());
1064 assert(!i->saturate);
1065 assert(i->encSize == 8);
1066
1067 code[1] = 0x60000000;
1068 code[0] = 0xe0000000;
1069
1070 emitForm_ADD(i);
1071
1072 code[1] |= neg0 << 26;
1073 code[1] |= neg1 << 27;
1074 }
1075
1076 void
emitUADD(const Instruction * i)1077 CodeEmitterNV50::emitUADD(const Instruction *i)
1078 {
1079 const int neg0 = i->src(0).mod.neg();
1080 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1081
1082 code[0] = 0x20008000;
1083
1084 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1085 code[1] = 0;
1086 emitForm_IMM(i);
1087 } else
1088 if (i->encSize == 8) {
1089 code[0] = 0x20000000;
1090 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1091 emitForm_ADD(i);
1092 } else {
1093 emitForm_MUL(i);
1094 }
1095 assert(!(neg0 && neg1));
1096 code[0] |= neg0 << 28;
1097 code[0] |= neg1 << 22;
1098
1099 if (i->flagsSrc >= 0) {
1100 // addc == sub | subr
1101 assert(!(code[0] & 0x10400000) && !i->getPredicate());
1102 code[0] |= 0x10400000;
1103 srcId(i->src(i->flagsSrc), 32 + 12);
1104 }
1105 }
1106
1107 void
emitAADD(const Instruction * i)1108 CodeEmitterNV50::emitAADD(const Instruction *i)
1109 {
1110 const int s = (i->op == OP_MOV) ? 0 : 1;
1111
1112 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1113 code[1] = 0x20000000;
1114
1115 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1116
1117 emitFlagsRd(i);
1118
1119 if (s && i->srcExists(0))
1120 setARegBits(SDATA(i->src(0)).id + 1);
1121 }
1122
1123 void
emitIMUL(const Instruction * i)1124 CodeEmitterNV50::emitIMUL(const Instruction *i)
1125 {
1126 code[0] = 0x40000000;
1127
1128 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1129 if (i->sType == TYPE_S16)
1130 code[0] |= 0x8100;
1131 code[1] = 0;
1132 emitForm_IMM(i);
1133 } else
1134 if (i->encSize == 8) {
1135 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1136 emitForm_MAD(i);
1137 } else {
1138 if (i->sType == TYPE_S16)
1139 code[0] |= 0x8100;
1140 emitForm_MUL(i);
1141 }
1142 }
1143
1144 void
emitFMUL(const Instruction * i)1145 CodeEmitterNV50::emitFMUL(const Instruction *i)
1146 {
1147 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1148
1149 code[0] = 0xc0000000;
1150
1151 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1152 code[1] = 0;
1153 emitForm_IMM(i);
1154 if (neg)
1155 code[0] |= 0x8000;
1156 if (i->saturate)
1157 code[0] |= 1 << 8;
1158 } else
1159 if (i->encSize == 8) {
1160 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1161 if (neg)
1162 code[1] |= 0x08000000;
1163 if (i->saturate)
1164 code[1] |= 1 << 20;
1165 emitForm_MAD(i);
1166 } else {
1167 emitForm_MUL(i);
1168 if (neg)
1169 code[0] |= 0x8000;
1170 if (i->saturate)
1171 code[0] |= 1 << 8;
1172 }
1173 }
1174
1175 void
emitDMUL(const Instruction * i)1176 CodeEmitterNV50::emitDMUL(const Instruction *i)
1177 {
1178 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1179
1180 assert(!i->saturate);
1181 assert(i->encSize == 8);
1182
1183 code[1] = 0x80000000;
1184 code[0] = 0xe0000000;
1185
1186 if (neg)
1187 code[1] |= 0x08000000;
1188
1189 roundMode_CVT(i->rnd);
1190
1191 emitForm_MAD(i);
1192 }
1193
1194 void
emitIMAD(const Instruction * i)1195 CodeEmitterNV50::emitIMAD(const Instruction *i)
1196 {
1197 int mode;
1198 code[0] = 0x60000000;
1199
1200 assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
1201 if (!isSignedType(i->sType))
1202 mode = 0;
1203 else if (i->saturate)
1204 mode = 2;
1205 else
1206 mode = 1;
1207
1208 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1209 code[1] = 0;
1210 emitForm_IMM(i);
1211 code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1212 if (i->flagsSrc >= 0) {
1213 assert(!(code[0] & 0x10400000));
1214 assert(SDATA(i->src(i->flagsSrc)).id == 0);
1215 code[0] |= 0x10400000;
1216 }
1217 } else
1218 if (i->encSize == 4) {
1219 emitForm_MUL(i);
1220 code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1221 if (i->flagsSrc >= 0) {
1222 assert(!(code[0] & 0x10400000));
1223 assert(SDATA(i->src(i->flagsSrc)).id == 0);
1224 code[0] |= 0x10400000;
1225 }
1226 } else {
1227 code[1] = mode << 29;
1228 emitForm_MAD(i);
1229
1230 if (i->flagsSrc >= 0) {
1231 // add with carry from $cX
1232 assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1233 code[1] |= 0xc << 24;
1234 srcId(i->src(i->flagsSrc), 32 + 12);
1235 }
1236 }
1237 }
1238
1239 void
emitISAD(const Instruction * i)1240 CodeEmitterNV50::emitISAD(const Instruction *i)
1241 {
1242 if (i->encSize == 8) {
1243 code[0] = 0x50000000;
1244 switch (i->sType) {
1245 case TYPE_U32: code[1] = 0x04000000; break;
1246 case TYPE_S32: code[1] = 0x0c000000; break;
1247 case TYPE_U16: code[1] = 0x00000000; break;
1248 case TYPE_S16: code[1] = 0x08000000; break;
1249 default:
1250 assert(0);
1251 break;
1252 }
1253 emitForm_MAD(i);
1254 } else {
1255 switch (i->sType) {
1256 case TYPE_U32: code[0] = 0x50008000; break;
1257 case TYPE_S32: code[0] = 0x50008100; break;
1258 case TYPE_U16: code[0] = 0x50000000; break;
1259 case TYPE_S16: code[0] = 0x50000100; break;
1260 default:
1261 assert(0);
1262 break;
1263 }
1264 emitForm_MUL(i);
1265 }
1266 }
1267
1268 static void
alphatestSet(const FixupEntry * entry,uint32_t * code,const FixupData & data)1269 alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1270 {
1271 int loc = entry->loc;
1272 int enc;
1273
1274 switch (data.alphatest) {
1275 case PIPE_FUNC_NEVER: enc = 0x0; break;
1276 case PIPE_FUNC_LESS: enc = 0x1; break;
1277 case PIPE_FUNC_EQUAL: enc = 0x2; break;
1278 case PIPE_FUNC_LEQUAL: enc = 0x3; break;
1279 case PIPE_FUNC_GREATER: enc = 0x4; break;
1280 case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
1281 case PIPE_FUNC_GEQUAL: enc = 0x6; break;
1282 default:
1283 case PIPE_FUNC_ALWAYS: enc = 0xf; break;
1284 }
1285
1286 code[loc + 1] &= ~(0x1f << 14);
1287 code[loc + 1] |= enc << 14;
1288 }
1289
1290 void
emitSET(const Instruction * i)1291 CodeEmitterNV50::emitSET(const Instruction *i)
1292 {
1293 code[0] = 0x30000000;
1294 code[1] = 0x60000000;
1295
1296 switch (i->sType) {
1297 case TYPE_F64:
1298 code[0] = 0xe0000000;
1299 code[1] = 0xe0000000;
1300 break;
1301 case TYPE_F32: code[0] |= 0x80000000; break;
1302 case TYPE_S32: code[1] |= 0x0c000000; break;
1303 case TYPE_U32: code[1] |= 0x04000000; break;
1304 case TYPE_S16: code[1] |= 0x08000000; break;
1305 case TYPE_U16: break;
1306 default:
1307 assert(0);
1308 break;
1309 }
1310
1311 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1312
1313 if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1314 if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1315 if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1316 if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1317
1318 emitForm_MAD(i);
1319
1320 if (i->subOp == 1) {
1321 addInterp(0, 0, alphatestSet);
1322 }
1323 }
1324
1325 void
roundMode_CVT(RoundMode rnd)1326 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1327 {
1328 switch (rnd) {
1329 case ROUND_NI: code[1] |= 0x08000000; break;
1330 case ROUND_M: code[1] |= 0x00020000; break;
1331 case ROUND_MI: code[1] |= 0x08020000; break;
1332 case ROUND_P: code[1] |= 0x00040000; break;
1333 case ROUND_PI: code[1] |= 0x08040000; break;
1334 case ROUND_Z: code[1] |= 0x00060000; break;
1335 case ROUND_ZI: code[1] |= 0x08060000; break;
1336 default:
1337 assert(rnd == ROUND_N);
1338 break;
1339 }
1340 }
1341
1342 void
emitCVT(const Instruction * i)1343 CodeEmitterNV50::emitCVT(const Instruction *i)
1344 {
1345 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1346 RoundMode rnd;
1347 DataType dType;
1348
1349 switch (i->op) {
1350 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1351 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1352 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1353 default:
1354 rnd = i->rnd;
1355 break;
1356 }
1357
1358 if (i->op == OP_NEG && i->dType == TYPE_U32)
1359 dType = TYPE_S32;
1360 else
1361 dType = i->dType;
1362
1363 code[0] = 0xa0000000;
1364
1365 switch (dType) {
1366 case TYPE_F64:
1367 switch (i->sType) {
1368 case TYPE_F64: code[1] = 0xc4404000; break;
1369 case TYPE_S64: code[1] = 0x44414000; break;
1370 case TYPE_U64: code[1] = 0x44404000; break;
1371 case TYPE_F32: code[1] = 0xc4400000; break;
1372 case TYPE_S32: code[1] = 0x44410000; break;
1373 case TYPE_U32: code[1] = 0x44400000; break;
1374 default:
1375 assert(0);
1376 break;
1377 }
1378 break;
1379 case TYPE_S64:
1380 switch (i->sType) {
1381 case TYPE_F64: code[1] = 0x8c404000; break;
1382 case TYPE_F32: code[1] = 0x8c400000; break;
1383 default:
1384 assert(0);
1385 break;
1386 }
1387 break;
1388 case TYPE_U64:
1389 switch (i->sType) {
1390 case TYPE_F64: code[1] = 0x84404000; break;
1391 case TYPE_F32: code[1] = 0x84400000; break;
1392 default:
1393 assert(0);
1394 break;
1395 }
1396 break;
1397 case TYPE_F32:
1398 switch (i->sType) {
1399 case TYPE_F64: code[1] = 0xc0404000; break;
1400 case TYPE_S64: code[1] = 0x40414000; break;
1401 case TYPE_U64: code[1] = 0x40404000; break;
1402 case TYPE_F32: code[1] = 0xc4004000; break;
1403 case TYPE_S32: code[1] = 0x44014000; break;
1404 case TYPE_U32: code[1] = 0x44004000; break;
1405 case TYPE_F16: code[1] = 0xc4000000; break;
1406 case TYPE_U16: code[1] = 0x44000000; break;
1407 default:
1408 assert(0);
1409 break;
1410 }
1411 break;
1412 case TYPE_S32:
1413 switch (i->sType) {
1414 case TYPE_F64: code[1] = 0x88404000; break;
1415 case TYPE_F32: code[1] = 0x8c004000; break;
1416 case TYPE_S32: code[1] = 0x0c014000; break;
1417 case TYPE_U32: code[1] = 0x0c004000; break;
1418 case TYPE_F16: code[1] = 0x8c000000; break;
1419 case TYPE_S16: code[1] = 0x0c010000; break;
1420 case TYPE_U16: code[1] = 0x0c000000; break;
1421 case TYPE_S8: code[1] = 0x0c018000; break;
1422 case TYPE_U8: code[1] = 0x0c008000; break;
1423 default:
1424 assert(0);
1425 break;
1426 }
1427 break;
1428 case TYPE_U32:
1429 switch (i->sType) {
1430 case TYPE_F64: code[1] = 0x80404000; break;
1431 case TYPE_F32: code[1] = 0x84004000; break;
1432 case TYPE_S32: code[1] = 0x04014000; break;
1433 case TYPE_U32: code[1] = 0x04004000; break;
1434 case TYPE_F16: code[1] = 0x84000000; break;
1435 case TYPE_S16: code[1] = 0x04010000; break;
1436 case TYPE_U16: code[1] = 0x04000000; break;
1437 case TYPE_S8: code[1] = 0x04018000; break;
1438 case TYPE_U8: code[1] = 0x04008000; break;
1439 default:
1440 assert(0);
1441 break;
1442 }
1443 break;
1444 case TYPE_S16:
1445 case TYPE_U16:
1446 case TYPE_S8:
1447 case TYPE_U8:
1448 default:
1449 assert(0);
1450 break;
1451 }
1452 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1453 code[1] |= 0x00004000;
1454
1455 roundMode_CVT(rnd);
1456
1457 switch (i->op) {
1458 case OP_ABS: code[1] |= 1 << 20; break;
1459 case OP_SAT: code[1] |= 1 << 19; break;
1460 case OP_NEG: code[1] |= 1 << 29; break;
1461 default:
1462 break;
1463 }
1464 code[1] ^= i->src(0).mod.neg() << 29;
1465 code[1] |= i->src(0).mod.abs() << 20;
1466 if (i->saturate)
1467 code[1] |= 1 << 19;
1468
1469 assert(i->op != OP_ABS || !i->src(0).mod.neg());
1470
1471 emitForm_MAD(i);
1472 }
1473
1474 void
emitPreOp(const Instruction * i)1475 CodeEmitterNV50::emitPreOp(const Instruction *i)
1476 {
1477 code[0] = 0xb0000000;
1478 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1479
1480 code[1] |= i->src(0).mod.abs() << 20;
1481 code[1] |= i->src(0).mod.neg() << 26;
1482
1483 emitForm_MAD(i);
1484 }
1485
1486 void
emitSFnOp(const Instruction * i,uint8_t subOp)1487 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1488 {
1489 code[0] = 0x90000000;
1490
1491 if (i->encSize == 4) {
1492 assert(i->op == OP_RCP);
1493 assert(!i->saturate);
1494 code[0] |= i->src(0).mod.abs() << 15;
1495 code[0] |= i->src(0).mod.neg() << 22;
1496 emitForm_MUL(i);
1497 } else {
1498 code[1] = subOp << 29;
1499 code[1] |= i->src(0).mod.abs() << 20;
1500 code[1] |= i->src(0).mod.neg() << 26;
1501 if (i->saturate) {
1502 assert(subOp == 6 && i->op == OP_EX2);
1503 code[1] |= 1 << 27;
1504 }
1505 emitForm_MAD(i);
1506 }
1507 }
1508
1509 void
emitNOT(const Instruction * i)1510 CodeEmitterNV50::emitNOT(const Instruction *i)
1511 {
1512 code[0] = 0xd0000000;
1513 code[1] = 0x0002c000;
1514
1515 switch (i->sType) {
1516 case TYPE_U32:
1517 case TYPE_S32:
1518 code[1] |= 0x04000000;
1519 break;
1520 default:
1521 break;
1522 }
1523 emitForm_MAD(i);
1524 setSrc(i, 0, 1);
1525 }
1526
1527 void
emitLogicOp(const Instruction * i)1528 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1529 {
1530 code[0] = 0xd0000000;
1531 code[1] = 0;
1532
1533 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1534 switch (i->op) {
1535 case OP_OR: code[0] |= 0x0100; break;
1536 case OP_XOR: code[0] |= 0x8000; break;
1537 default:
1538 assert(i->op == OP_AND);
1539 break;
1540 }
1541 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1542 code[0] |= 1 << 22;
1543
1544 emitForm_IMM(i);
1545 } else {
1546 switch (i->op) {
1547 case OP_AND: code[1] = 0x04000000; break;
1548 case OP_OR: code[1] = 0x04004000; break;
1549 case OP_XOR: code[1] = 0x04008000; break;
1550 default:
1551 assert(0);
1552 break;
1553 }
1554 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1555 code[1] |= 1 << 16;
1556 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1557 code[1] |= 1 << 17;
1558
1559 emitForm_MAD(i);
1560 }
1561 }
1562
1563 void
emitARL(const Instruction * i,unsigned int shl)1564 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1565 {
1566 code[0] = 0x00000001 | (shl << 16);
1567 code[1] = 0xc0000000;
1568
1569 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1570
1571 setSrcFileBits(i, NV50_OP_ENC_IMM);
1572 setSrc(i, 0, 0);
1573 emitFlagsRd(i);
1574 }
1575
1576 void
emitShift(const Instruction * i)1577 CodeEmitterNV50::emitShift(const Instruction *i)
1578 {
1579 if (i->def(0).getFile() == FILE_ADDRESS) {
1580 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1581 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1582 } else {
1583 code[0] = 0x30000001;
1584 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1585 if (i->op == OP_SHR && isSignedType(i->sType))
1586 code[1] |= 1 << 27;
1587
1588 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1589 code[1] |= 1 << 20;
1590 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1591 defId(i->def(0), 2);
1592 srcId(i->src(0), 9);
1593 emitFlagsRd(i);
1594 } else {
1595 emitForm_MAD(i);
1596 }
1597 }
1598 }
1599
1600 void
emitOUT(const Instruction * i)1601 CodeEmitterNV50::emitOUT(const Instruction *i)
1602 {
1603 code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1604 code[1] = 0xc0000000;
1605
1606 emitFlagsRd(i);
1607 }
1608
1609 void
emitTEX(const TexInstruction * i)1610 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1611 {
1612 code[0] = 0xf0000001;
1613 code[1] = 0x00000000;
1614
1615 switch (i->op) {
1616 case OP_TXB:
1617 code[1] = 0x20000000;
1618 break;
1619 case OP_TXL:
1620 code[1] = 0x40000000;
1621 break;
1622 case OP_TXF:
1623 code[0] |= 0x01000000;
1624 break;
1625 case OP_TXG:
1626 code[0] |= 0x01000000;
1627 code[1] = 0x80000000;
1628 break;
1629 case OP_TXLQ:
1630 code[1] = 0x60020000;
1631 break;
1632 default:
1633 assert(i->op == OP_TEX);
1634 break;
1635 }
1636
1637 code[0] |= i->tex.r << 9;
1638 code[0] |= i->tex.s << 17;
1639
1640 int argc = i->tex.target.getArgCount();
1641
1642 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1643 argc += 1;
1644 if (i->tex.target.isShadow())
1645 argc += 1;
1646 assert(argc <= 4);
1647
1648 code[0] |= (argc - 1) << 22;
1649
1650 if (i->tex.target.isCube()) {
1651 code[0] |= 0x08000000;
1652 } else
1653 if (i->tex.useOffsets) {
1654 code[1] |= (i->tex.offset[0] & 0xf) << 24;
1655 code[1] |= (i->tex.offset[1] & 0xf) << 20;
1656 code[1] |= (i->tex.offset[2] & 0xf) << 16;
1657 }
1658
1659 code[0] |= (i->tex.mask & 0x3) << 25;
1660 code[1] |= (i->tex.mask & 0xc) << 12;
1661
1662 if (i->tex.liveOnly)
1663 code[1] |= 1 << 2;
1664 if (i->tex.derivAll)
1665 code[1] |= 1 << 3;
1666
1667 defId(i->def(0), 2);
1668
1669 emitFlagsRd(i);
1670 }
1671
1672 void
emitTXQ(const TexInstruction * i)1673 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1674 {
1675 assert(i->tex.query == TXQ_DIMS);
1676
1677 code[0] = 0xf0000001;
1678 code[1] = 0x60000000;
1679
1680 code[0] |= i->tex.r << 9;
1681 code[0] |= i->tex.s << 17;
1682
1683 code[0] |= (i->tex.mask & 0x3) << 25;
1684 code[1] |= (i->tex.mask & 0xc) << 12;
1685
1686 defId(i->def(0), 2);
1687
1688 emitFlagsRd(i);
1689 }
1690
1691 void
emitTEXPREP(const TexInstruction * i)1692 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1693 {
1694 code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1695 code[1] = 0x60010000;
1696
1697 code[0] |= (i->tex.mask & 0x3) << 25;
1698 code[1] |= (i->tex.mask & 0xc) << 12;
1699 defId(i->def(0), 2);
1700
1701 emitFlagsRd(i);
1702 }
1703
1704 void
emitPRERETEmu(const FlowInstruction * i)1705 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1706 {
1707 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1708
1709 code[0] = 0x10000003; // bra
1710 code[1] = 0x00000780; // always
1711
1712 switch (i->subOp) {
1713 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1714 break;
1715 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1716 pos += 8;
1717 break;
1718 default:
1719 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1720 code[0] = 0x20000003; // call
1721 code[1] = 0x00000000; // no predicate
1722 break;
1723 }
1724 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1725 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1726 }
1727
1728 void
emitFlow(const Instruction * i,uint8_t flowOp)1729 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1730 {
1731 const FlowInstruction *f = i->asFlow();
1732 bool hasPred = false;
1733 bool hasTarg = false;
1734
1735 code[0] = 0x00000003 | (flowOp << 28);
1736 code[1] = 0x00000000;
1737
1738 switch (i->op) {
1739 case OP_BRA:
1740 hasPred = true;
1741 hasTarg = true;
1742 break;
1743 case OP_BREAK:
1744 case OP_BRKPT:
1745 case OP_DISCARD:
1746 case OP_RET:
1747 hasPred = true;
1748 break;
1749 case OP_CALL:
1750 case OP_PREBREAK:
1751 case OP_JOINAT:
1752 hasTarg = true;
1753 break;
1754 case OP_PRERET:
1755 hasTarg = true;
1756 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1757 emitPRERETEmu(f);
1758 return;
1759 }
1760 break;
1761 default:
1762 break;
1763 }
1764
1765 if (hasPred)
1766 emitFlagsRd(i);
1767
1768 if (hasTarg && f) {
1769 uint32_t pos;
1770
1771 if (f->op == OP_CALL) {
1772 if (f->builtin) {
1773 pos = targNV50->getBuiltinOffset(f->target.builtin);
1774 } else {
1775 pos = f->target.fn->binPos;
1776 }
1777 } else {
1778 pos = f->target.bb->binPos;
1779 }
1780
1781 code[0] |= ((pos >> 2) & 0xffff) << 11;
1782 code[1] |= ((pos >> 18) & 0x003f) << 14;
1783
1784 RelocEntry::Type relocTy;
1785
1786 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1787
1788 addReloc(relocTy, 0, pos, 0x07fff800, 9);
1789 addReloc(relocTy, 1, pos, 0x000fc000, -4);
1790 }
1791 }
1792
1793 void
emitBAR(const Instruction * i)1794 CodeEmitterNV50::emitBAR(const Instruction *i)
1795 {
1796 ImmediateValue *barId = i->getSrc(0)->asImm();
1797 assert(barId);
1798
1799 code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1800 code[1] = 0x00004000;
1801
1802 if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1803 code[0] |= 1 << 26;
1804 }
1805
1806 void
emitATOM(const Instruction * i)1807 CodeEmitterNV50::emitATOM(const Instruction *i)
1808 {
1809 uint8_t subOp;
1810 switch (i->subOp) {
1811 case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
1812 case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
1813 case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
1814 case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
1815 case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
1816 case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
1817 case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
1818 case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
1819 case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
1820 case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1821 default:
1822 assert(!"invalid subop");
1823 return;
1824 }
1825 code[0] = 0xd0000001;
1826 code[1] = 0xe0c00000 | (subOp << 2);
1827 if (isSignedType(i->dType))
1828 code[1] |= 1 << 21;
1829
1830 // args
1831 emitFlagsRd(i);
1832 setDst(i, 0);
1833 setSrc(i, 1, 1);
1834 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1835 setSrc(i, 2, 2);
1836
1837 // g[] pointer
1838 code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1839 srcId(i->getIndirect(0, 0), 9);
1840 }
1841
1842 bool
emitInstruction(Instruction * insn)1843 CodeEmitterNV50::emitInstruction(Instruction *insn)
1844 {
1845 if (!insn->encSize) {
1846 ERROR("skipping unencodable instruction: "); insn->print();
1847 return false;
1848 } else
1849 if (codeSize + insn->encSize > codeSizeLimit) {
1850 ERROR("code emitter output buffer too small\n");
1851 return false;
1852 }
1853
1854 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1855 INFO("EMIT: "); insn->print();
1856 }
1857
1858 switch (insn->op) {
1859 case OP_MOV:
1860 emitMOV(insn);
1861 break;
1862 case OP_EXIT:
1863 case OP_NOP:
1864 case OP_JOIN:
1865 emitNOP();
1866 break;
1867 case OP_VFETCH:
1868 case OP_LOAD:
1869 emitLOAD(insn);
1870 break;
1871 case OP_EXPORT:
1872 case OP_STORE:
1873 emitSTORE(insn);
1874 break;
1875 case OP_PFETCH:
1876 emitPFETCH(insn);
1877 break;
1878 case OP_RDSV:
1879 emitRDSV(insn);
1880 break;
1881 case OP_LINTERP:
1882 case OP_PINTERP:
1883 emitINTERP(insn);
1884 break;
1885 case OP_ADD:
1886 case OP_SUB:
1887 if (insn->dType == TYPE_F64)
1888 emitDADD(insn);
1889 else if (isFloatType(insn->dType))
1890 emitFADD(insn);
1891 else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1892 emitAADD(insn);
1893 else
1894 emitUADD(insn);
1895 break;
1896 case OP_MUL:
1897 if (insn->dType == TYPE_F64)
1898 emitDMUL(insn);
1899 else if (isFloatType(insn->dType))
1900 emitFMUL(insn);
1901 else
1902 emitIMUL(insn);
1903 break;
1904 case OP_MAD:
1905 case OP_FMA:
1906 if (insn->dType == TYPE_F64)
1907 emitDMAD(insn);
1908 else if (isFloatType(insn->dType))
1909 emitFMAD(insn);
1910 else
1911 emitIMAD(insn);
1912 break;
1913 case OP_SAD:
1914 emitISAD(insn);
1915 break;
1916 case OP_NOT:
1917 emitNOT(insn);
1918 break;
1919 case OP_AND:
1920 case OP_OR:
1921 case OP_XOR:
1922 emitLogicOp(insn);
1923 break;
1924 case OP_SHL:
1925 case OP_SHR:
1926 emitShift(insn);
1927 break;
1928 case OP_SET:
1929 emitSET(insn);
1930 break;
1931 case OP_MIN:
1932 case OP_MAX:
1933 emitMINMAX(insn);
1934 break;
1935 case OP_CEIL:
1936 case OP_FLOOR:
1937 case OP_TRUNC:
1938 case OP_ABS:
1939 case OP_NEG:
1940 case OP_SAT:
1941 emitCVT(insn);
1942 break;
1943 case OP_CVT:
1944 if (insn->def(0).getFile() == FILE_ADDRESS)
1945 emitARL(insn, 0);
1946 else
1947 if (insn->def(0).getFile() == FILE_FLAGS ||
1948 insn->src(0).getFile() == FILE_FLAGS ||
1949 insn->src(0).getFile() == FILE_ADDRESS)
1950 emitMOV(insn);
1951 else
1952 emitCVT(insn);
1953 break;
1954 case OP_RCP:
1955 emitSFnOp(insn, 0);
1956 break;
1957 case OP_RSQ:
1958 emitSFnOp(insn, 2);
1959 break;
1960 case OP_LG2:
1961 emitSFnOp(insn, 3);
1962 break;
1963 case OP_SIN:
1964 emitSFnOp(insn, 4);
1965 break;
1966 case OP_COS:
1967 emitSFnOp(insn, 5);
1968 break;
1969 case OP_EX2:
1970 emitSFnOp(insn, 6);
1971 break;
1972 case OP_PRESIN:
1973 case OP_PREEX2:
1974 emitPreOp(insn);
1975 break;
1976 case OP_TEX:
1977 case OP_TXB:
1978 case OP_TXL:
1979 case OP_TXF:
1980 case OP_TXG:
1981 case OP_TXLQ:
1982 emitTEX(insn->asTex());
1983 break;
1984 case OP_TXQ:
1985 emitTXQ(insn->asTex());
1986 break;
1987 case OP_TEXPREP:
1988 emitTEXPREP(insn->asTex());
1989 break;
1990 case OP_EMIT:
1991 case OP_RESTART:
1992 emitOUT(insn);
1993 break;
1994 case OP_DISCARD:
1995 emitFlow(insn, 0x0);
1996 break;
1997 case OP_BRA:
1998 emitFlow(insn, 0x1);
1999 break;
2000 case OP_CALL:
2001 emitFlow(insn, 0x2);
2002 break;
2003 case OP_RET:
2004 emitFlow(insn, 0x3);
2005 break;
2006 case OP_PREBREAK:
2007 emitFlow(insn, 0x4);
2008 break;
2009 case OP_BREAK:
2010 emitFlow(insn, 0x5);
2011 break;
2012 case OP_QUADON:
2013 emitFlow(insn, 0x6);
2014 break;
2015 case OP_QUADPOP:
2016 emitFlow(insn, 0x7);
2017 break;
2018 case OP_JOINAT:
2019 emitFlow(insn, 0xa);
2020 break;
2021 case OP_PRERET:
2022 emitFlow(insn, 0xd);
2023 break;
2024 case OP_QUADOP:
2025 emitQUADOP(insn, insn->lanes, insn->subOp);
2026 break;
2027 case OP_DFDX:
2028 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
2029 break;
2030 case OP_DFDY:
2031 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
2032 break;
2033 case OP_ATOM:
2034 emitATOM(insn);
2035 break;
2036 case OP_BAR:
2037 emitBAR(insn);
2038 break;
2039 case OP_PHI:
2040 case OP_UNION:
2041 case OP_CONSTRAINT:
2042 ERROR("operation should have been eliminated\n");
2043 return false;
2044 case OP_EXP:
2045 case OP_LOG:
2046 case OP_SQRT:
2047 case OP_POW:
2048 case OP_SELP:
2049 case OP_SLCT:
2050 case OP_TXD:
2051 case OP_PRECONT:
2052 case OP_CONT:
2053 case OP_POPCNT:
2054 case OP_INSBF:
2055 case OP_EXTBF:
2056 ERROR("operation should have been lowered\n");
2057 return false;
2058 default:
2059 ERROR("unknown op: %u\n", insn->op);
2060 return false;
2061 }
2062 if (insn->join || insn->op == OP_JOIN)
2063 code[1] |= 0x2;
2064 else
2065 if (insn->exit || insn->op == OP_EXIT)
2066 code[1] |= 0x1;
2067
2068 assert((insn->encSize == 8) == (code[0] & 1));
2069
2070 code += insn->encSize / 4;
2071 codeSize += insn->encSize;
2072 return true;
2073 }
2074
2075 uint32_t
getMinEncodingSize(const Instruction * i) const2076 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2077 {
2078 const Target::OpInfo &info = targ->getOpInfo(i);
2079
2080 if (info.minEncSize > 4 || i->dType == TYPE_F64)
2081 return 8;
2082
2083 // check constraints on dst and src operands
2084 for (int d = 0; i->defExists(d); ++d) {
2085 if (i->def(d).rep()->reg.data.id > 63 ||
2086 i->def(d).rep()->reg.file != FILE_GPR)
2087 return 8;
2088 }
2089
2090 for (int s = 0; i->srcExists(s); ++s) {
2091 DataFile sf = i->src(s).getFile();
2092 if (sf != FILE_GPR)
2093 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2094 return 8;
2095 if (i->src(s).rep()->reg.data.id > 63)
2096 return 8;
2097 }
2098
2099 // check modifiers & rounding
2100 if (i->join || i->lanes != 0xf || i->exit)
2101 return 8;
2102 if (i->op == OP_MUL && i->rnd != ROUND_N)
2103 return 8;
2104
2105 if (i->asTex())
2106 return 8; // TODO: short tex encoding
2107
2108 // check constraints on short MAD
2109 if (info.srcNr >= 2 && i->srcExists(2)) {
2110 if (!i->defExists(0) ||
2111 (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
2112 DDATA(i->def(0)).id != SDATA(i->src(2)).id)
2113 return 8;
2114 }
2115
2116 return info.minEncSize;
2117 }
2118
2119 // Change the encoding size of an instruction after BBs have been scheduled.
2120 static void
makeInstructionLong(Instruction * insn)2121 makeInstructionLong(Instruction *insn)
2122 {
2123 if (insn->encSize == 8)
2124 return;
2125 Function *fn = insn->bb->getFunction();
2126 int n = 0;
2127 int adj = 4;
2128
2129 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2130
2131 if (n & 1) {
2132 adj = 8;
2133 insn->next->encSize = 8;
2134 } else
2135 if (insn->prev && insn->prev->encSize == 4) {
2136 adj = 8;
2137 insn->prev->encSize = 8;
2138 }
2139 insn->encSize = 8;
2140
2141 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2142 fn->bbArray[i]->binPos += adj;
2143 }
2144 fn->binSize += adj;
2145 insn->bb->binSize += adj;
2146 }
2147
2148 static bool
trySetExitModifier(Instruction * insn)2149 trySetExitModifier(Instruction *insn)
2150 {
2151 if (insn->op == OP_DISCARD ||
2152 insn->op == OP_QUADON ||
2153 insn->op == OP_QUADPOP)
2154 return false;
2155 for (int s = 0; insn->srcExists(s); ++s)
2156 if (insn->src(s).getFile() == FILE_IMMEDIATE)
2157 return false;
2158 if (insn->asFlow()) {
2159 if (insn->op == OP_CALL) // side effects !
2160 return false;
2161 if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2162 return false;
2163 insn->op = OP_EXIT;
2164 }
2165 insn->exit = 1;
2166 makeInstructionLong(insn);
2167 return true;
2168 }
2169
2170 static void
replaceExitWithModifier(Function * func)2171 replaceExitWithModifier(Function *func)
2172 {
2173 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2174
2175 if (!epilogue->getExit() ||
2176 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2177 return;
2178
2179 if (epilogue->getEntry()->op != OP_EXIT) {
2180 Instruction *insn = epilogue->getExit()->prev;
2181 if (!insn || !trySetExitModifier(insn))
2182 return;
2183 insn->exit = 1;
2184 } else {
2185 for (Graph::EdgeIterator ei = func->cfgExit->incident();
2186 !ei.end(); ei.next()) {
2187 BasicBlock *bb = BasicBlock::get(ei.getNode());
2188 Instruction *i = bb->getExit();
2189
2190 if (!i || !trySetExitModifier(i))
2191 return;
2192 }
2193 }
2194
2195 int adj = epilogue->getExit()->encSize;
2196 epilogue->binSize -= adj;
2197 func->binSize -= adj;
2198 delete_Instruction(func->getProgram(), epilogue->getExit());
2199
2200 // There may be BB's that are laid out after the exit block
2201 for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
2202 func->bbArray[i]->binPos -= adj;
2203 }
2204 }
2205
2206 void
prepareEmission(Function * func)2207 CodeEmitterNV50::prepareEmission(Function *func)
2208 {
2209 CodeEmitter::prepareEmission(func);
2210
2211 replaceExitWithModifier(func);
2212 }
2213
CodeEmitterNV50(const TargetNV50 * target)2214 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
2215 CodeEmitter(target), targNV50(target)
2216 {
2217 targ = target; // specialized
2218 code = NULL;
2219 codeSize = codeSizeLimit = 0;
2220 relocInfo = NULL;
2221 }
2222
2223 CodeEmitter *
getCodeEmitter(Program::Type type)2224 TargetNV50::getCodeEmitter(Program::Type type)
2225 {
2226 CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
2227 emit->setProgramType(type);
2228 return emit;
2229 }
2230
2231 } // namespace nv50_ir
2232