1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "nv50_ir_target_gm107.h"
26 #include "nv50_ir_sched_gm107.h"
27
28 //#define GM107_DEBUG_SCHED_DATA
29
30 namespace nv50_ir {
31
32 class CodeEmitterGM107 : public CodeEmitter
33 {
34 public:
35 CodeEmitterGM107(const TargetGM107 *);
36
37 virtual bool emitInstruction(Instruction *);
38 virtual uint32_t getMinEncodingSize(const Instruction *) const;
39
40 virtual void prepareEmission(Program *);
41 virtual void prepareEmission(Function *);
42
setProgramType(Program::Type pType)43 inline void setProgramType(Program::Type pType) { progType = pType; }
44
45 private:
46 const TargetGM107 *targGM107;
47
48 Program::Type progType;
49
50 const Instruction *insn;
51 const bool writeIssueDelays;
52 uint32_t *data;
53
54 private:
55 inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)56 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
57
58 inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)59 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60 inline void emitPred();
61 inline void emitGPR(int, const Value *);
emitGPR(int pos)62 inline void emitGPR(int pos) {
63 emitGPR(pos, (const Value *)NULL);
64 }
emitGPR(int pos,const ValueRef & ref)65 inline void emitGPR(int pos, const ValueRef &ref) {
66 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
67 }
emitGPR(int pos,const ValueRef * ref)68 inline void emitGPR(int pos, const ValueRef *ref) {
69 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
70 }
emitGPR(int pos,const ValueDef & def)71 inline void emitGPR(int pos, const ValueDef &def) {
72 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
73 }
74 inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)75 inline void emitSYS(int pos, const ValueRef &ref) {
76 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
77 }
78 inline void emitPRED(int, const Value *);
emitPRED(int pos)79 inline void emitPRED(int pos) {
80 emitPRED(pos, (const Value *)NULL);
81 }
emitPRED(int pos,const ValueRef & ref)82 inline void emitPRED(int pos, const ValueRef &ref) {
83 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
84 }
emitPRED(int pos,const ValueDef & def)85 inline void emitPRED(int pos, const ValueDef &def) {
86 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
87 }
88 inline void emitADDR(int, int, int, int, const ValueRef &);
89 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90 inline bool longIMMD(const ValueRef &);
91 inline void emitIMMD(int, int, const ValueRef &);
92
93 void emitCond3(int, CondCode);
94 void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)95 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96 inline void emitO(int);
97 inline void emitP(int);
98 inline void emitSAT(int);
99 inline void emitCC(int);
100 inline void emitX(int);
101 inline void emitABS(int, const ValueRef &);
102 inline void emitNEG(int, const ValueRef &);
103 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104 inline void emitFMZ(int, int);
105 inline void emitRND(int, RoundMode, int);
emitRND(int pos)106 inline void emitRND(int pos) {
107 emitRND(pos, insn->rnd, -1);
108 }
109 inline void emitPDIV(int);
110 inline void emitINV(int, const ValueRef &);
111
112 void emitEXIT();
113 void emitBRA();
114 void emitCAL();
115 void emitPCNT();
116 void emitCONT();
117 void emitPBK();
118 void emitBRK();
119 void emitPRET();
120 void emitRET();
121 void emitSSY();
122 void emitSYNC();
123 void emitSAM();
124 void emitRAM();
125
126 void emitPSETP();
127
128 void emitMOV();
129 void emitS2R();
130 void emitCS2R();
131 void emitF2F();
132 void emitF2I();
133 void emitI2F();
134 void emitI2I();
135 void emitSEL();
136 void emitSHFL();
137
138 void emitDADD();
139 void emitDMUL();
140 void emitDFMA();
141 void emitDMNMX();
142 void emitDSET();
143 void emitDSETP();
144
145 void emitFADD();
146 void emitFMUL();
147 void emitFFMA();
148 void emitMUFU();
149 void emitFMNMX();
150 void emitRRO();
151 void emitFCMP();
152 void emitFSET();
153 void emitFSETP();
154 void emitFSWZADD();
155
156 void emitLOP();
157 void emitNOT();
158 void emitIADD();
159 void emitIMUL();
160 void emitIMAD();
161 void emitISCADD();
162 void emitXMAD();
163 void emitIMNMX();
164 void emitICMP();
165 void emitISET();
166 void emitISETP();
167 void emitSHL();
168 void emitSHR();
169 void emitSHF();
170 void emitPOPC();
171 void emitBFI();
172 void emitBFE();
173 void emitFLO();
174 void emitPRMT();
175
176 void emitLDSTs(int, DataType);
177 void emitLDSTc(int);
178 void emitLDC();
179 void emitLDL();
180 void emitLDS();
181 void emitLD();
182 void emitSTL();
183 void emitSTS();
184 void emitST();
185 void emitALD();
186 void emitAST();
187 void emitISBERD();
188 void emitAL2P();
189 void emitIPA();
190 void emitATOM();
191 void emitATOMS();
192 void emitRED();
193 void emitCCTL();
194
195 void emitPIXLD();
196
197 void emitTEXs(int);
198 void emitTEX();
199 void emitTEXS();
200 void emitTLD();
201 void emitTLD4();
202 void emitTXD();
203 void emitTXQ();
204 void emitTMML();
205 void emitDEPBAR();
206
207 void emitNOP();
208 void emitKIL();
209 void emitOUT();
210
211 void emitBAR();
212 void emitMEMBAR();
213
214 void emitVOTE();
215
216 void emitSUTarget();
217 void emitSUHandle(const int s);
218 void emitSUSTx();
219 void emitSULDx();
220 void emitSUREDx();
221 };
222
223 /*******************************************************************************
224 * general instruction layout/fields
225 ******************************************************************************/
226
227 void
emitField(uint32_t * data,int b,int s,uint32_t v)228 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
229 {
230 if (b >= 0) {
231 uint32_t m = ((1ULL << s) - 1);
232 uint64_t d = (uint64_t)(v & m) << b;
233 assert(!(v & ~m) || (v & ~m) == ~m);
234 data[1] |= d >> 32;
235 data[0] |= d;
236 }
237 }
238
239 void
emitPred()240 CodeEmitterGM107::emitPred()
241 {
242 if (insn->predSrc >= 0) {
243 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244 emitField(19, 1, insn->cc == CC_NOT_P);
245 } else {
246 emitField(16, 3, 7);
247 }
248 }
249
250 void
emitInsn(uint32_t hi,bool pred)251 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
252 {
253 code[0] = 0x00000000;
254 code[1] = hi;
255 if (pred)
256 emitPred();
257 }
258
259 void
emitGPR(int pos,const Value * val)260 CodeEmitterGM107::emitGPR(int pos, const Value *val)
261 {
262 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263 val->reg.data.id : 255);
264 }
265
266 void
emitSYS(int pos,const Value * val)267 CodeEmitterGM107::emitSYS(int pos, const Value *val)
268 {
269 int id = val ? val->reg.data.id : -1;
270
271 switch (id) {
272 case SV_LANEID : id = 0x00; break;
273 case SV_VERTEX_COUNT : id = 0x10; break;
274 case SV_INVOCATION_ID : id = 0x11; break;
275 case SV_THREAD_KILL : id = 0x13; break;
276 case SV_INVOCATION_INFO: id = 0x1d; break;
277 case SV_COMBINED_TID : id = 0x20; break;
278 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
279 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
280 case SV_LANEMASK_EQ : id = 0x38; break;
281 case SV_LANEMASK_LT : id = 0x39; break;
282 case SV_LANEMASK_LE : id = 0x3a; break;
283 case SV_LANEMASK_GT : id = 0x3b; break;
284 case SV_LANEMASK_GE : id = 0x3c; break;
285 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
286 default:
287 assert(!"invalid system value");
288 id = 0;
289 break;
290 }
291
292 emitField(pos, 8, id);
293 }
294
295 void
emitPRED(int pos,const Value * val)296 CodeEmitterGM107::emitPRED(int pos, const Value *val)
297 {
298 emitField(pos, 3, val ? val->reg.data.id : 7);
299 }
300
301 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)302 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
303 const ValueRef &ref)
304 {
305 const Value *v = ref.get();
306 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
307 if (gpr >= 0)
308 emitGPR(gpr, ref.getIndirect(0));
309 emitField(off, len, v->reg.data.offset >> shr);
310 }
311
312 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)313 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
314 const ValueRef &ref)
315 {
316 const Value *v = ref.get();
317 const Symbol *s = v->asSym();
318
319 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
320
321 emitField(buf, 5, v->reg.fileIndex);
322 if (gpr >= 0)
323 emitGPR(gpr, ref.getIndirect(0));
324 emitField(off, 16, s->reg.data.offset >> shr);
325 }
326
327 bool
longIMMD(const ValueRef & ref)328 CodeEmitterGM107::longIMMD(const ValueRef &ref)
329 {
330 if (ref.getFile() == FILE_IMMEDIATE) {
331 const ImmediateValue *imm = ref.get()->asImm();
332 if (isFloatType(insn->sType))
333 return imm->reg.data.u32 & 0xfff;
334 else
335 return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
336 }
337 return false;
338 }
339
340 void
emitIMMD(int pos,int len,const ValueRef & ref)341 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
342 {
343 const ImmediateValue *imm = ref.get()->asImm();
344 uint32_t val = imm->reg.data.u32;
345
346 if (len == 19) {
347 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348 assert(!(val & 0x00000fff));
349 val >>= 12;
350 } else if (insn->sType == TYPE_F64) {
351 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352 val = imm->reg.data.u64 >> 44;
353 } else {
354 assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
355 }
356 emitField( 56, 1, (val & 0x80000) >> 19);
357 emitField(pos, len, (val & 0x7ffff));
358 } else {
359 emitField(pos, len, val);
360 }
361 }
362
363 /*******************************************************************************
364 * modifiers
365 ******************************************************************************/
366
367 void
emitCond3(int pos,CondCode code)368 CodeEmitterGM107::emitCond3(int pos, CondCode code)
369 {
370 int data = 0;
371
372 switch (code) {
373 case CC_FL : data = 0x00; break;
374 case CC_LTU:
375 case CC_LT : data = 0x01; break;
376 case CC_EQU:
377 case CC_EQ : data = 0x02; break;
378 case CC_LEU:
379 case CC_LE : data = 0x03; break;
380 case CC_GTU:
381 case CC_GT : data = 0x04; break;
382 case CC_NEU:
383 case CC_NE : data = 0x05; break;
384 case CC_GEU:
385 case CC_GE : data = 0x06; break;
386 case CC_TR : data = 0x07; break;
387 default:
388 assert(!"invalid cond3");
389 break;
390 }
391
392 emitField(pos, 3, data);
393 }
394
395 void
emitCond4(int pos,CondCode code)396 CodeEmitterGM107::emitCond4(int pos, CondCode code)
397 {
398 int data = 0;
399
400 switch (code) {
401 case CC_FL: data = 0x00; break;
402 case CC_LT: data = 0x01; break;
403 case CC_EQ: data = 0x02; break;
404 case CC_LE: data = 0x03; break;
405 case CC_GT: data = 0x04; break;
406 case CC_NE: data = 0x05; break;
407 case CC_GE: data = 0x06; break;
408 // case CC_NUM: data = 0x07; break;
409 // case CC_NAN: data = 0x08; break;
410 case CC_LTU: data = 0x09; break;
411 case CC_EQU: data = 0x0a; break;
412 case CC_LEU: data = 0x0b; break;
413 case CC_GTU: data = 0x0c; break;
414 case CC_NEU: data = 0x0d; break;
415 case CC_GEU: data = 0x0e; break;
416 case CC_TR: data = 0x0f; break;
417 default:
418 assert(!"invalid cond4");
419 break;
420 }
421
422 emitField(pos, 4, data);
423 }
424
425 void
emitO(int pos)426 CodeEmitterGM107::emitO(int pos)
427 {
428 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
429 }
430
431 void
emitP(int pos)432 CodeEmitterGM107::emitP(int pos)
433 {
434 emitField(pos, 1, insn->perPatch);
435 }
436
437 void
emitSAT(int pos)438 CodeEmitterGM107::emitSAT(int pos)
439 {
440 emitField(pos, 1, insn->saturate);
441 }
442
443 void
emitCC(int pos)444 CodeEmitterGM107::emitCC(int pos)
445 {
446 emitField(pos, 1, insn->flagsDef >= 0);
447 }
448
449 void
emitX(int pos)450 CodeEmitterGM107::emitX(int pos)
451 {
452 emitField(pos, 1, insn->flagsSrc >= 0);
453 }
454
455 void
emitABS(int pos,const ValueRef & ref)456 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
457 {
458 emitField(pos, 1, ref.mod.abs());
459 }
460
461 void
emitNEG(int pos,const ValueRef & ref)462 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
463 {
464 emitField(pos, 1, ref.mod.neg());
465 }
466
467 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)468 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
469 {
470 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
471 }
472
473 void
emitFMZ(int pos,int len)474 CodeEmitterGM107::emitFMZ(int pos, int len)
475 {
476 emitField(pos, len, insn->dnz << 1 | insn->ftz);
477 }
478
479 void
emitRND(int rmp,RoundMode rnd,int rip)480 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
481 {
482 int rm = 0, ri = 0;
483 switch (rnd) {
484 case ROUND_NI: ri = 1;
485 case ROUND_N : rm = 0; break;
486 case ROUND_MI: ri = 1;
487 case ROUND_M : rm = 1; break;
488 case ROUND_PI: ri = 1;
489 case ROUND_P : rm = 2; break;
490 case ROUND_ZI: ri = 1;
491 case ROUND_Z : rm = 3; break;
492 default:
493 assert(!"invalid round mode");
494 break;
495 }
496 emitField(rip, 1, ri);
497 emitField(rmp, 2, rm);
498 }
499
500 void
emitPDIV(int pos)501 CodeEmitterGM107::emitPDIV(int pos)
502 {
503 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504 if (insn->postFactor > 0)
505 emitField(pos, 3, 7 - insn->postFactor);
506 else
507 emitField(pos, 3, 0 - insn->postFactor);
508 }
509
510 void
emitINV(int pos,const ValueRef & ref)511 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
512 {
513 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
514 }
515
516 /*******************************************************************************
517 * control flow
518 ******************************************************************************/
519
520 void
emitEXIT()521 CodeEmitterGM107::emitEXIT()
522 {
523 emitInsn (0xe3000000);
524 emitCond5(0x00, CC_TR);
525 }
526
527 void
emitBRA()528 CodeEmitterGM107::emitBRA()
529 {
530 const FlowInstruction *insn = this->insn->asFlow();
531 int gpr = -1;
532
533 if (insn->indirect) {
534 if (insn->absolute)
535 emitInsn(0xe2000000); // JMX
536 else
537 emitInsn(0xe2500000); // BRX
538 gpr = 0x08;
539 } else {
540 if (insn->absolute)
541 emitInsn(0xe2100000); // JMP
542 else
543 emitInsn(0xe2400000); // BRA
544 emitField(0x07, 1, insn->allWarp);
545 }
546
547 emitField(0x06, 1, insn->limit);
548 emitCond5(0x00, CC_TR);
549
550 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551 int32_t pos = insn->target.bb->binPos;
552 if (writeIssueDelays && !(pos & 0x1f))
553 pos += 8;
554 if (!insn->absolute)
555 emitField(0x14, 24, pos - (codeSize + 8));
556 else
557 emitField(0x14, 32, pos);
558 } else {
559 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560 emitField(0x05, 1, 1);
561 }
562 }
563
564 void
emitCAL()565 CodeEmitterGM107::emitCAL()
566 {
567 const FlowInstruction *insn = this->insn->asFlow();
568
569 if (insn->absolute) {
570 emitInsn(0xe2200000, false); // JCAL
571 } else {
572 emitInsn(0xe2600000, false); // CAL
573 }
574
575 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
576 if (!insn->absolute)
577 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
578 else {
579 if (insn->builtin) {
580 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
582 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
583 } else {
584 emitField(0x14, 32, insn->target.bb->binPos);
585 }
586 }
587 } else {
588 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589 emitField(0x05, 1, 1);
590 }
591 }
592
593 void
emitPCNT()594 CodeEmitterGM107::emitPCNT()
595 {
596 const FlowInstruction *insn = this->insn->asFlow();
597
598 emitInsn(0xe2b00000, false);
599
600 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
602 } else {
603 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604 emitField(0x05, 1, 1);
605 }
606 }
607
608 void
emitCONT()609 CodeEmitterGM107::emitCONT()
610 {
611 emitInsn (0xe3500000);
612 emitCond5(0x00, CC_TR);
613 }
614
615 void
emitPBK()616 CodeEmitterGM107::emitPBK()
617 {
618 const FlowInstruction *insn = this->insn->asFlow();
619
620 emitInsn(0xe2a00000, false);
621
622 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
624 } else {
625 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626 emitField(0x05, 1, 1);
627 }
628 }
629
630 void
emitBRK()631 CodeEmitterGM107::emitBRK()
632 {
633 emitInsn (0xe3400000);
634 emitCond5(0x00, CC_TR);
635 }
636
637 void
emitPRET()638 CodeEmitterGM107::emitPRET()
639 {
640 const FlowInstruction *insn = this->insn->asFlow();
641
642 emitInsn(0xe2700000, false);
643
644 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
646 } else {
647 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648 emitField(0x05, 1, 1);
649 }
650 }
651
652 void
emitRET()653 CodeEmitterGM107::emitRET()
654 {
655 emitInsn (0xe3200000);
656 emitCond5(0x00, CC_TR);
657 }
658
659 void
emitSSY()660 CodeEmitterGM107::emitSSY()
661 {
662 const FlowInstruction *insn = this->insn->asFlow();
663
664 emitInsn(0xe2900000, false);
665
666 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
668 } else {
669 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670 emitField(0x05, 1, 1);
671 }
672 }
673
674 void
emitSYNC()675 CodeEmitterGM107::emitSYNC()
676 {
677 emitInsn (0xf0f80000);
678 emitCond5(0x00, CC_TR);
679 }
680
681 void
emitSAM()682 CodeEmitterGM107::emitSAM()
683 {
684 emitInsn(0xe3700000, false);
685 }
686
687 void
emitRAM()688 CodeEmitterGM107::emitRAM()
689 {
690 emitInsn(0xe3800000, false);
691 }
692
693 /*******************************************************************************
694 * predicate/cc
695 ******************************************************************************/
696
697 void
emitPSETP()698 CodeEmitterGM107::emitPSETP()
699 {
700
701 emitInsn(0x50900000);
702
703 switch (insn->op) {
704 case OP_AND: emitField(0x18, 3, 0); break;
705 case OP_OR: emitField(0x18, 3, 1); break;
706 case OP_XOR: emitField(0x18, 3, 2); break;
707 default:
708 assert(!"unexpected operation");
709 break;
710 }
711
712 // emitINV (0x2a);
713 emitPRED(0x27); // TODO: support 3-arg
714 emitINV (0x20, insn->src(1));
715 emitPRED(0x1d, insn->src(1));
716 emitINV (0x0f, insn->src(0));
717 emitPRED(0x0c, insn->src(0));
718 emitPRED(0x03, insn->def(0));
719 emitPRED(0x00);
720 }
721
722 /*******************************************************************************
723 * movement / conversion
724 ******************************************************************************/
725
726 void
emitMOV()727 CodeEmitterGM107::emitMOV()
728 {
729 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730 switch (insn->src(0).getFile()) {
731 case FILE_GPR:
732 if (insn->def(0).getFile() == FILE_PREDICATE) {
733 emitInsn(0x5b6a0000);
734 emitGPR (0x08);
735 } else {
736 emitInsn(0x5c980000);
737 }
738 emitGPR (0x14, insn->src(0));
739 break;
740 case FILE_MEMORY_CONST:
741 emitInsn(0x4c980000);
742 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
743 break;
744 case FILE_IMMEDIATE:
745 emitInsn(0x38980000);
746 emitIMMD(0x14, 19, insn->src(0));
747 break;
748 case FILE_PREDICATE:
749 emitInsn(0x50880000);
750 emitPRED(0x0c, insn->src(0));
751 emitPRED(0x1d);
752 emitPRED(0x27);
753 break;
754 default:
755 assert(!"bad src file");
756 break;
757 }
758 if (insn->def(0).getFile() != FILE_PREDICATE &&
759 insn->src(0).getFile() != FILE_PREDICATE)
760 emitField(0x27, 4, insn->lanes);
761 } else {
762 emitInsn (0x01000000);
763 emitIMMD (0x14, 32, insn->src(0));
764 emitField(0x0c, 4, insn->lanes);
765 }
766
767 if (insn->def(0).getFile() == FILE_PREDICATE) {
768 emitPRED(0x27);
769 emitPRED(0x03, insn->def(0));
770 emitPRED(0x00);
771 } else {
772 emitGPR(0x00, insn->def(0));
773 }
774 }
775
776 void
emitS2R()777 CodeEmitterGM107::emitS2R()
778 {
779 emitInsn(0xf0c80000);
780 emitSYS (0x14, insn->src(0));
781 emitGPR (0x00, insn->def(0));
782 }
783
784 void
emitCS2R()785 CodeEmitterGM107::emitCS2R()
786 {
787 emitInsn(0x50c80000);
788 emitSYS (0x14, insn->src(0));
789 emitGPR (0x00, insn->def(0));
790 }
791
792 void
emitF2F()793 CodeEmitterGM107::emitF2F()
794 {
795 RoundMode rnd = insn->rnd;
796
797 switch (insn->op) {
798 case OP_FLOOR: rnd = ROUND_MI; break;
799 case OP_CEIL : rnd = ROUND_PI; break;
800 case OP_TRUNC: rnd = ROUND_ZI; break;
801 default:
802 break;
803 }
804
805 switch (insn->src(0).getFile()) {
806 case FILE_GPR:
807 emitInsn(0x5ca80000);
808 emitGPR (0x14, insn->src(0));
809 break;
810 case FILE_MEMORY_CONST:
811 emitInsn(0x4ca80000);
812 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
813 break;
814 case FILE_IMMEDIATE:
815 emitInsn(0x38a80000);
816 emitIMMD(0x14, 19, insn->src(0));
817 break;
818 default:
819 assert(!"bad src0 file");
820 break;
821 }
822
823 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
825 emitCC (0x2f);
826 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
827 emitFMZ (0x2c, 1);
828 emitField(0x29, 1, insn->subOp);
829 emitRND (0x27, rnd, 0x2a);
830 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832 emitGPR (0x00, insn->def(0));
833 }
834
835 void
emitF2I()836 CodeEmitterGM107::emitF2I()
837 {
838 RoundMode rnd = insn->rnd;
839
840 switch (insn->op) {
841 case OP_FLOOR: rnd = ROUND_M; break;
842 case OP_CEIL : rnd = ROUND_P; break;
843 case OP_TRUNC: rnd = ROUND_Z; break;
844 default:
845 break;
846 }
847
848 switch (insn->src(0).getFile()) {
849 case FILE_GPR:
850 emitInsn(0x5cb00000);
851 emitGPR (0x14, insn->src(0));
852 break;
853 case FILE_MEMORY_CONST:
854 emitInsn(0x4cb00000);
855 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
856 break;
857 case FILE_IMMEDIATE:
858 emitInsn(0x38b00000);
859 emitIMMD(0x14, 19, insn->src(0));
860 break;
861 default:
862 assert(!"bad src0 file");
863 break;
864 }
865
866 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
867 emitCC (0x2f);
868 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
869 emitFMZ (0x2c, 1);
870 emitRND (0x27, rnd, 0x2a);
871 emitField(0x0c, 1, isSignedType(insn->dType));
872 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874 emitGPR (0x00, insn->def(0));
875 }
876
877 void
emitI2F()878 CodeEmitterGM107::emitI2F()
879 {
880 RoundMode rnd = insn->rnd;
881
882 switch (insn->op) {
883 case OP_FLOOR: rnd = ROUND_M; break;
884 case OP_CEIL : rnd = ROUND_P; break;
885 case OP_TRUNC: rnd = ROUND_Z; break;
886 default:
887 break;
888 }
889
890 switch (insn->src(0).getFile()) {
891 case FILE_GPR:
892 emitInsn(0x5cb80000);
893 emitGPR (0x14, insn->src(0));
894 break;
895 case FILE_MEMORY_CONST:
896 emitInsn(0x4cb80000);
897 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
898 break;
899 case FILE_IMMEDIATE:
900 emitInsn(0x38b80000);
901 emitIMMD(0x14, 19, insn->src(0));
902 break;
903 default:
904 assert(!"bad src0 file");
905 break;
906 }
907
908 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
909 emitCC (0x2f);
910 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911 emitField(0x29, 2, insn->subOp);
912 emitRND (0x27, rnd, -1);
913 emitField(0x0d, 1, isSignedType(insn->sType));
914 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916 emitGPR (0x00, insn->def(0));
917 }
918
919 void
emitI2I()920 CodeEmitterGM107::emitI2I()
921 {
922 switch (insn->src(0).getFile()) {
923 case FILE_GPR:
924 emitInsn(0x5ce00000);
925 emitGPR (0x14, insn->src(0));
926 break;
927 case FILE_MEMORY_CONST:
928 emitInsn(0x4ce00000);
929 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
930 break;
931 case FILE_IMMEDIATE:
932 emitInsn(0x38e00000);
933 emitIMMD(0x14, 19, insn->src(0));
934 break;
935 default:
936 assert(!"bad src0 file");
937 break;
938 }
939
940 emitSAT (0x32);
941 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
942 emitCC (0x2f);
943 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944 emitField(0x29, 2, insn->subOp);
945 emitField(0x0d, 1, isSignedType(insn->sType));
946 emitField(0x0c, 1, isSignedType(insn->dType));
947 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949 emitGPR (0x00, insn->def(0));
950 }
951
952 void
gm107_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)953 gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
954 {
955 int loc = entry->loc;
956 bool val = false;
957 switch (entry->ipa) {
958 case 0:
959 val = data.force_persample_interp;
960 break;
961 case 1:
962 val = data.msaa;
963 break;
964 }
965 if (val)
966 code[loc + 1] |= 1 << 10;
967 else
968 code[loc + 1] &= ~(1 << 10);
969 }
970
971 void
emitSEL()972 CodeEmitterGM107::emitSEL()
973 {
974 switch (insn->src(1).getFile()) {
975 case FILE_GPR:
976 emitInsn(0x5ca00000);
977 emitGPR (0x14, insn->src(1));
978 break;
979 case FILE_MEMORY_CONST:
980 emitInsn(0x4ca00000);
981 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
982 break;
983 case FILE_IMMEDIATE:
984 emitInsn(0x38a00000);
985 emitIMMD(0x14, 19, insn->src(1));
986 break;
987 default:
988 assert(!"bad src1 file");
989 break;
990 }
991
992 emitINV (0x2a, insn->src(2));
993 emitPRED(0x27, insn->src(2));
994 emitGPR (0x08, insn->src(0));
995 emitGPR (0x00, insn->def(0));
996
997 if (insn->subOp >= 1) {
998 addInterp(insn->subOp - 1, 0, gm107_selpFlip);
999 }
1000 }
1001
1002 void
emitSHFL()1003 CodeEmitterGM107::emitSHFL()
1004 {
1005 int type = 0;
1006
1007 emitInsn (0xef100000);
1008
1009 switch (insn->src(1).getFile()) {
1010 case FILE_GPR:
1011 emitGPR(0x14, insn->src(1));
1012 break;
1013 case FILE_IMMEDIATE:
1014 emitIMMD(0x14, 5, insn->src(1));
1015 type |= 1;
1016 break;
1017 default:
1018 assert(!"invalid src1 file");
1019 break;
1020 }
1021
1022 switch (insn->src(2).getFile()) {
1023 case FILE_GPR:
1024 emitGPR(0x27, insn->src(2));
1025 break;
1026 case FILE_IMMEDIATE:
1027 emitIMMD(0x22, 13, insn->src(2));
1028 type |= 2;
1029 break;
1030 default:
1031 assert(!"invalid src2 file");
1032 break;
1033 }
1034
1035 if (!insn->defExists(1))
1036 emitPRED(0x30);
1037 else {
1038 assert(insn->def(1).getFile() == FILE_PREDICATE);
1039 emitPRED(0x30, insn->def(1));
1040 }
1041
1042 emitField(0x1e, 2, insn->subOp);
1043 emitField(0x1c, 2, type);
1044 emitGPR (0x08, insn->src(0));
1045 emitGPR (0x00, insn->def(0));
1046 }
1047
1048 /*******************************************************************************
1049 * double
1050 ******************************************************************************/
1051
1052 void
emitDADD()1053 CodeEmitterGM107::emitDADD()
1054 {
1055 switch (insn->src(1).getFile()) {
1056 case FILE_GPR:
1057 emitInsn(0x5c700000);
1058 emitGPR (0x14, insn->src(1));
1059 break;
1060 case FILE_MEMORY_CONST:
1061 emitInsn(0x4c700000);
1062 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1063 break;
1064 case FILE_IMMEDIATE:
1065 emitInsn(0x38700000);
1066 emitIMMD(0x14, 19, insn->src(1));
1067 break;
1068 default:
1069 assert(!"bad src1 file");
1070 break;
1071 }
1072 emitABS(0x31, insn->src(1));
1073 emitNEG(0x30, insn->src(0));
1074 emitCC (0x2f);
1075 emitABS(0x2e, insn->src(0));
1076 emitNEG(0x2d, insn->src(1));
1077
1078 if (insn->op == OP_SUB)
1079 code[1] ^= 0x00002000;
1080
1081 emitGPR(0x08, insn->src(0));
1082 emitGPR(0x00, insn->def(0));
1083 }
1084
1085 void
emitDMUL()1086 CodeEmitterGM107::emitDMUL()
1087 {
1088 switch (insn->src(1).getFile()) {
1089 case FILE_GPR:
1090 emitInsn(0x5c800000);
1091 emitGPR (0x14, insn->src(1));
1092 break;
1093 case FILE_MEMORY_CONST:
1094 emitInsn(0x4c800000);
1095 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096 break;
1097 case FILE_IMMEDIATE:
1098 emitInsn(0x38800000);
1099 emitIMMD(0x14, 19, insn->src(1));
1100 break;
1101 default:
1102 assert(!"bad src1 file");
1103 break;
1104 }
1105
1106 emitNEG2(0x30, insn->src(0), insn->src(1));
1107 emitCC (0x2f);
1108 emitRND (0x27);
1109 emitGPR (0x08, insn->src(0));
1110 emitGPR (0x00, insn->def(0));
1111 }
1112
1113 void
emitDFMA()1114 CodeEmitterGM107::emitDFMA()
1115 {
1116 switch(insn->src(2).getFile()) {
1117 case FILE_GPR:
1118 switch (insn->src(1).getFile()) {
1119 case FILE_GPR:
1120 emitInsn(0x5b700000);
1121 emitGPR (0x14, insn->src(1));
1122 break;
1123 case FILE_MEMORY_CONST:
1124 emitInsn(0x4b700000);
1125 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1126 break;
1127 case FILE_IMMEDIATE:
1128 emitInsn(0x36700000);
1129 emitIMMD(0x14, 19, insn->src(1));
1130 break;
1131 default:
1132 assert(!"bad src1 file");
1133 break;
1134 }
1135 emitGPR (0x27, insn->src(2));
1136 break;
1137 case FILE_MEMORY_CONST:
1138 emitInsn(0x53700000);
1139 emitGPR (0x27, insn->src(1));
1140 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1141 break;
1142 default:
1143 assert(!"bad src2 file");
1144 break;
1145 }
1146
1147 emitRND (0x32);
1148 emitNEG (0x31, insn->src(2));
1149 emitNEG2(0x30, insn->src(0), insn->src(1));
1150 emitCC (0x2f);
1151 emitGPR (0x08, insn->src(0));
1152 emitGPR (0x00, insn->def(0));
1153 }
1154
1155 void
emitDMNMX()1156 CodeEmitterGM107::emitDMNMX()
1157 {
1158 switch (insn->src(1).getFile()) {
1159 case FILE_GPR:
1160 emitInsn(0x5c500000);
1161 emitGPR (0x14, insn->src(1));
1162 break;
1163 case FILE_MEMORY_CONST:
1164 emitInsn(0x4c500000);
1165 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1166 break;
1167 case FILE_IMMEDIATE:
1168 emitInsn(0x38500000);
1169 emitIMMD(0x14, 19, insn->src(1));
1170 break;
1171 default:
1172 assert(!"bad src1 file");
1173 break;
1174 }
1175
1176 emitABS (0x31, insn->src(1));
1177 emitNEG (0x30, insn->src(0));
1178 emitCC (0x2f);
1179 emitABS (0x2e, insn->src(0));
1180 emitNEG (0x2d, insn->src(1));
1181 emitField(0x2a, 1, insn->op == OP_MAX);
1182 emitPRED (0x27);
1183 emitGPR (0x08, insn->src(0));
1184 emitGPR (0x00, insn->def(0));
1185 }
1186
1187 void
emitDSET()1188 CodeEmitterGM107::emitDSET()
1189 {
1190 const CmpInstruction *insn = this->insn->asCmp();
1191
1192 switch (insn->src(1).getFile()) {
1193 case FILE_GPR:
1194 emitInsn(0x59000000);
1195 emitGPR (0x14, insn->src(1));
1196 break;
1197 case FILE_MEMORY_CONST:
1198 emitInsn(0x49000000);
1199 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1200 break;
1201 case FILE_IMMEDIATE:
1202 emitInsn(0x32000000);
1203 emitIMMD(0x14, 19, insn->src(1));
1204 break;
1205 default:
1206 assert(!"bad src1 file");
1207 break;
1208 }
1209
1210 if (insn->op != OP_SET) {
1211 switch (insn->op) {
1212 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1213 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1214 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1215 default:
1216 assert(!"invalid set op");
1217 break;
1218 }
1219 emitPRED(0x27, insn->src(2));
1220 } else {
1221 emitPRED(0x27);
1222 }
1223
1224 emitABS (0x36, insn->src(0));
1225 emitNEG (0x35, insn->src(1));
1226 emitField(0x34, 1, insn->dType == TYPE_F32);
1227 emitCond4(0x30, insn->setCond);
1228 emitCC (0x2f);
1229 emitABS (0x2c, insn->src(1));
1230 emitNEG (0x2b, insn->src(0));
1231 emitGPR (0x08, insn->src(0));
1232 emitGPR (0x00, insn->def(0));
1233 }
1234
1235 void
emitDSETP()1236 CodeEmitterGM107::emitDSETP()
1237 {
1238 const CmpInstruction *insn = this->insn->asCmp();
1239
1240 switch (insn->src(1).getFile()) {
1241 case FILE_GPR:
1242 emitInsn(0x5b800000);
1243 emitGPR (0x14, insn->src(1));
1244 break;
1245 case FILE_MEMORY_CONST:
1246 emitInsn(0x4b800000);
1247 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1248 break;
1249 case FILE_IMMEDIATE:
1250 emitInsn(0x36800000);
1251 emitIMMD(0x14, 19, insn->src(1));
1252 break;
1253 default:
1254 assert(!"bad src1 file");
1255 break;
1256 }
1257
1258 if (insn->op != OP_SET) {
1259 switch (insn->op) {
1260 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1261 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1262 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1263 default:
1264 assert(!"invalid set op");
1265 break;
1266 }
1267 emitPRED(0x27, insn->src(2));
1268 } else {
1269 emitPRED(0x27);
1270 }
1271
1272 emitCond4(0x30, insn->setCond);
1273 emitABS (0x2c, insn->src(1));
1274 emitNEG (0x2b, insn->src(0));
1275 emitGPR (0x08, insn->src(0));
1276 emitABS (0x07, insn->src(0));
1277 emitNEG (0x06, insn->src(1));
1278 emitPRED (0x03, insn->def(0));
1279 if (insn->defExists(1))
1280 emitPRED(0x00, insn->def(1));
1281 else
1282 emitPRED(0x00);
1283 }
1284
1285 /*******************************************************************************
1286 * float
1287 ******************************************************************************/
1288
1289 void
emitFADD()1290 CodeEmitterGM107::emitFADD()
1291 {
1292 if (!longIMMD(insn->src(1))) {
1293 switch (insn->src(1).getFile()) {
1294 case FILE_GPR:
1295 emitInsn(0x5c580000);
1296 emitGPR (0x14, insn->src(1));
1297 break;
1298 case FILE_MEMORY_CONST:
1299 emitInsn(0x4c580000);
1300 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1301 break;
1302 case FILE_IMMEDIATE:
1303 emitInsn(0x38580000);
1304 emitIMMD(0x14, 19, insn->src(1));
1305 break;
1306 default:
1307 assert(!"bad src1 file");
1308 break;
1309 }
1310 emitSAT(0x32);
1311 emitABS(0x31, insn->src(1));
1312 emitNEG(0x30, insn->src(0));
1313 emitCC (0x2f);
1314 emitABS(0x2e, insn->src(0));
1315 emitNEG(0x2d, insn->src(1));
1316 emitFMZ(0x2c, 1);
1317
1318 if (insn->op == OP_SUB)
1319 code[1] ^= 0x00002000;
1320 } else {
1321 emitInsn(0x08000000);
1322 emitABS(0x39, insn->src(1));
1323 emitNEG(0x38, insn->src(0));
1324 emitFMZ(0x37, 1);
1325 emitABS(0x36, insn->src(0));
1326 emitNEG(0x35, insn->src(1));
1327 emitCC (0x34);
1328 emitIMMD(0x14, 32, insn->src(1));
1329
1330 if (insn->op == OP_SUB)
1331 code[1] ^= 0x00080000;
1332 }
1333
1334 emitGPR(0x08, insn->src(0));
1335 emitGPR(0x00, insn->def(0));
1336 }
1337
1338 void
emitFMUL()1339 CodeEmitterGM107::emitFMUL()
1340 {
1341 if (!longIMMD(insn->src(1))) {
1342 switch (insn->src(1).getFile()) {
1343 case FILE_GPR:
1344 emitInsn(0x5c680000);
1345 emitGPR (0x14, insn->src(1));
1346 break;
1347 case FILE_MEMORY_CONST:
1348 emitInsn(0x4c680000);
1349 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1350 break;
1351 case FILE_IMMEDIATE:
1352 emitInsn(0x38680000);
1353 emitIMMD(0x14, 19, insn->src(1));
1354 break;
1355 default:
1356 assert(!"bad src1 file");
1357 break;
1358 }
1359 emitSAT (0x32);
1360 emitNEG2(0x30, insn->src(0), insn->src(1));
1361 emitCC (0x2f);
1362 emitFMZ (0x2c, 2);
1363 emitPDIV(0x29);
1364 emitRND (0x27);
1365 } else {
1366 emitInsn(0x1e000000);
1367 emitSAT (0x37);
1368 emitFMZ (0x35, 2);
1369 emitCC (0x34);
1370 emitIMMD(0x14, 32, insn->src(1));
1371 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1372 code[1] ^= 0x00080000; /* flip immd sign bit */
1373 }
1374
1375 emitGPR(0x08, insn->src(0));
1376 emitGPR(0x00, insn->def(0));
1377 }
1378
1379 void
emitFFMA()1380 CodeEmitterGM107::emitFFMA()
1381 {
1382 bool isLongIMMD = false;
1383 switch(insn->src(2).getFile()) {
1384 case FILE_GPR:
1385 switch (insn->src(1).getFile()) {
1386 case FILE_GPR:
1387 emitInsn(0x59800000);
1388 emitGPR (0x14, insn->src(1));
1389 break;
1390 case FILE_MEMORY_CONST:
1391 emitInsn(0x49800000);
1392 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1393 break;
1394 case FILE_IMMEDIATE:
1395 if (longIMMD(insn->getSrc(1))) {
1396 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1397 isLongIMMD = true;
1398 emitInsn(0x0c000000);
1399 emitIMMD(0x14, 32, insn->src(1));
1400 } else {
1401 emitInsn(0x32800000);
1402 emitIMMD(0x14, 19, insn->src(1));
1403 }
1404 break;
1405 default:
1406 assert(!"bad src1 file");
1407 break;
1408 }
1409 if (!isLongIMMD)
1410 emitGPR (0x27, insn->src(2));
1411 break;
1412 case FILE_MEMORY_CONST:
1413 emitInsn(0x51800000);
1414 emitGPR (0x27, insn->src(1));
1415 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1416 break;
1417 default:
1418 assert(!"bad src2 file");
1419 break;
1420 }
1421
1422 if (isLongIMMD) {
1423 emitNEG (0x39, insn->src(2));
1424 emitNEG2(0x38, insn->src(0), insn->src(1));
1425 emitSAT (0x37);
1426 emitCC (0x34);
1427 } else {
1428 emitRND (0x33);
1429 emitSAT (0x32);
1430 emitNEG (0x31, insn->src(2));
1431 emitNEG2(0x30, insn->src(0), insn->src(1));
1432 emitCC (0x2f);
1433 }
1434
1435 emitFMZ(0x35, 2);
1436 emitGPR(0x08, insn->src(0));
1437 emitGPR(0x00, insn->def(0));
1438 }
1439
1440 void
emitMUFU()1441 CodeEmitterGM107::emitMUFU()
1442 {
1443 int mufu = 0;
1444
1445 switch (insn->op) {
1446 case OP_COS: mufu = 0; break;
1447 case OP_SIN: mufu = 1; break;
1448 case OP_EX2: mufu = 2; break;
1449 case OP_LG2: mufu = 3; break;
1450 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1451 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1452 case OP_SQRT: mufu = 8; break;
1453 default:
1454 assert(!"invalid mufu");
1455 break;
1456 }
1457
1458 emitInsn (0x50800000);
1459 emitSAT (0x32);
1460 emitNEG (0x30, insn->src(0));
1461 emitABS (0x2e, insn->src(0));
1462 emitField(0x14, 4, mufu);
1463 emitGPR (0x08, insn->src(0));
1464 emitGPR (0x00, insn->def(0));
1465 }
1466
1467 void
emitFMNMX()1468 CodeEmitterGM107::emitFMNMX()
1469 {
1470 switch (insn->src(1).getFile()) {
1471 case FILE_GPR:
1472 emitInsn(0x5c600000);
1473 emitGPR (0x14, insn->src(1));
1474 break;
1475 case FILE_MEMORY_CONST:
1476 emitInsn(0x4c600000);
1477 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1478 break;
1479 case FILE_IMMEDIATE:
1480 emitInsn(0x38600000);
1481 emitIMMD(0x14, 19, insn->src(1));
1482 break;
1483 default:
1484 assert(!"bad src1 file");
1485 break;
1486 }
1487
1488 emitField(0x2a, 1, insn->op == OP_MAX);
1489 emitPRED (0x27);
1490
1491 emitABS(0x31, insn->src(1));
1492 emitNEG(0x30, insn->src(0));
1493 emitCC (0x2f);
1494 emitABS(0x2e, insn->src(0));
1495 emitNEG(0x2d, insn->src(1));
1496 emitFMZ(0x2c, 1);
1497 emitGPR(0x08, insn->src(0));
1498 emitGPR(0x00, insn->def(0));
1499 }
1500
1501 void
emitRRO()1502 CodeEmitterGM107::emitRRO()
1503 {
1504 switch (insn->src(0).getFile()) {
1505 case FILE_GPR:
1506 emitInsn(0x5c900000);
1507 emitGPR (0x14, insn->src(0));
1508 break;
1509 case FILE_MEMORY_CONST:
1510 emitInsn(0x4c900000);
1511 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1512 break;
1513 case FILE_IMMEDIATE:
1514 emitInsn(0x38900000);
1515 emitIMMD(0x14, 19, insn->src(0));
1516 break;
1517 default:
1518 assert(!"bad src file");
1519 break;
1520 }
1521
1522 emitABS (0x31, insn->src(0));
1523 emitNEG (0x2d, insn->src(0));
1524 emitField(0x27, 1, insn->op == OP_PREEX2);
1525 emitGPR (0x00, insn->def(0));
1526 }
1527
1528 void
emitFCMP()1529 CodeEmitterGM107::emitFCMP()
1530 {
1531 const CmpInstruction *insn = this->insn->asCmp();
1532 CondCode cc = insn->setCond;
1533
1534 if (insn->src(2).mod.neg())
1535 cc = reverseCondCode(cc);
1536
1537 switch(insn->src(2).getFile()) {
1538 case FILE_GPR:
1539 switch (insn->src(1).getFile()) {
1540 case FILE_GPR:
1541 emitInsn(0x5ba00000);
1542 emitGPR (0x14, insn->src(1));
1543 break;
1544 case FILE_MEMORY_CONST:
1545 emitInsn(0x4ba00000);
1546 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1547 break;
1548 case FILE_IMMEDIATE:
1549 emitInsn(0x36a00000);
1550 emitIMMD(0x14, 19, insn->src(1));
1551 break;
1552 default:
1553 assert(!"bad src1 file");
1554 break;
1555 }
1556 emitGPR (0x27, insn->src(2));
1557 break;
1558 case FILE_MEMORY_CONST:
1559 emitInsn(0x53a00000);
1560 emitGPR (0x27, insn->src(1));
1561 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1562 break;
1563 default:
1564 assert(!"bad src2 file");
1565 break;
1566 }
1567
1568 emitCond4(0x30, cc);
1569 emitFMZ (0x2f, 1);
1570 emitGPR (0x08, insn->src(0));
1571 emitGPR (0x00, insn->def(0));
1572 }
1573
1574 void
emitFSET()1575 CodeEmitterGM107::emitFSET()
1576 {
1577 const CmpInstruction *insn = this->insn->asCmp();
1578
1579 switch (insn->src(1).getFile()) {
1580 case FILE_GPR:
1581 emitInsn(0x58000000);
1582 emitGPR (0x14, insn->src(1));
1583 break;
1584 case FILE_MEMORY_CONST:
1585 emitInsn(0x48000000);
1586 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1587 break;
1588 case FILE_IMMEDIATE:
1589 emitInsn(0x30000000);
1590 emitIMMD(0x14, 19, insn->src(1));
1591 break;
1592 default:
1593 assert(!"bad src1 file");
1594 break;
1595 }
1596
1597 if (insn->op != OP_SET) {
1598 switch (insn->op) {
1599 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1600 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1601 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1602 default:
1603 assert(!"invalid set op");
1604 break;
1605 }
1606 emitPRED(0x27, insn->src(2));
1607 } else {
1608 emitPRED(0x27);
1609 }
1610
1611 emitFMZ (0x37, 1);
1612 emitABS (0x36, insn->src(0));
1613 emitNEG (0x35, insn->src(1));
1614 emitField(0x34, 1, insn->dType == TYPE_F32);
1615 emitCond4(0x30, insn->setCond);
1616 emitCC (0x2f);
1617 emitABS (0x2c, insn->src(1));
1618 emitNEG (0x2b, insn->src(0));
1619 emitGPR (0x08, insn->src(0));
1620 emitGPR (0x00, insn->def(0));
1621 }
1622
1623 void
emitFSETP()1624 CodeEmitterGM107::emitFSETP()
1625 {
1626 const CmpInstruction *insn = this->insn->asCmp();
1627
1628 switch (insn->src(1).getFile()) {
1629 case FILE_GPR:
1630 emitInsn(0x5bb00000);
1631 emitGPR (0x14, insn->src(1));
1632 break;
1633 case FILE_MEMORY_CONST:
1634 emitInsn(0x4bb00000);
1635 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1636 break;
1637 case FILE_IMMEDIATE:
1638 emitInsn(0x36b00000);
1639 emitIMMD(0x14, 19, insn->src(1));
1640 break;
1641 default:
1642 assert(!"bad src1 file");
1643 break;
1644 }
1645
1646 if (insn->op != OP_SET) {
1647 switch (insn->op) {
1648 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1649 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1650 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1651 default:
1652 assert(!"invalid set op");
1653 break;
1654 }
1655 emitPRED(0x27, insn->src(2));
1656 } else {
1657 emitPRED(0x27);
1658 }
1659
1660 emitCond4(0x30, insn->setCond);
1661 emitFMZ (0x2f, 1);
1662 emitABS (0x2c, insn->src(1));
1663 emitNEG (0x2b, insn->src(0));
1664 emitGPR (0x08, insn->src(0));
1665 emitABS (0x07, insn->src(0));
1666 emitNEG (0x06, insn->src(1));
1667 emitPRED (0x03, insn->def(0));
1668 if (insn->defExists(1))
1669 emitPRED(0x00, insn->def(1));
1670 else
1671 emitPRED(0x00);
1672 }
1673
1674 void
emitFSWZADD()1675 CodeEmitterGM107::emitFSWZADD()
1676 {
1677 emitInsn (0x50f80000);
1678 emitCC (0x2f);
1679 emitFMZ (0x2c, 1);
1680 emitRND (0x27);
1681 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1682 emitField(0x1c, 8, insn->subOp);
1683 if (insn->predSrc != 1)
1684 emitGPR (0x14, insn->src(1));
1685 else
1686 emitGPR (0x14);
1687 emitGPR (0x08, insn->src(0));
1688 emitGPR (0x00, insn->def(0));
1689 }
1690
1691 /*******************************************************************************
1692 * integer
1693 ******************************************************************************/
1694
1695 void
emitLOP()1696 CodeEmitterGM107::emitLOP()
1697 {
1698 int lop = 0;
1699
1700 switch (insn->op) {
1701 case OP_AND: lop = 0; break;
1702 case OP_OR : lop = 1; break;
1703 case OP_XOR: lop = 2; break;
1704 default:
1705 assert(!"invalid lop");
1706 break;
1707 }
1708
1709 if (!longIMMD(insn->src(1))) {
1710 switch (insn->src(1).getFile()) {
1711 case FILE_GPR:
1712 emitInsn(0x5c400000);
1713 emitGPR (0x14, insn->src(1));
1714 break;
1715 case FILE_MEMORY_CONST:
1716 emitInsn(0x4c400000);
1717 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1718 break;
1719 case FILE_IMMEDIATE:
1720 emitInsn(0x38400000);
1721 emitIMMD(0x14, 19, insn->src(1));
1722 break;
1723 default:
1724 assert(!"bad src1 file");
1725 break;
1726 }
1727 emitPRED (0x30);
1728 emitCC (0x2f);
1729 emitX (0x2b);
1730 emitField(0x29, 2, lop);
1731 emitINV (0x28, insn->src(1));
1732 emitINV (0x27, insn->src(0));
1733 } else {
1734 emitInsn (0x04000000);
1735 emitX (0x39);
1736 emitINV (0x38, insn->src(1));
1737 emitINV (0x37, insn->src(0));
1738 emitField(0x35, 2, lop);
1739 emitCC (0x34);
1740 emitIMMD (0x14, 32, insn->src(1));
1741 }
1742
1743 emitGPR (0x08, insn->src(0));
1744 emitGPR (0x00, insn->def(0));
1745 }
1746
1747 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1748 void
emitNOT()1749 CodeEmitterGM107::emitNOT()
1750 {
1751 if (!longIMMD(insn->src(0))) {
1752 switch (insn->src(0).getFile()) {
1753 case FILE_GPR:
1754 emitInsn(0x5c400700);
1755 emitGPR (0x14, insn->src(0));
1756 break;
1757 case FILE_MEMORY_CONST:
1758 emitInsn(0x4c400700);
1759 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1760 break;
1761 case FILE_IMMEDIATE:
1762 emitInsn(0x38400700);
1763 emitIMMD(0x14, 19, insn->src(0));
1764 break;
1765 default:
1766 assert(!"bad src1 file");
1767 break;
1768 }
1769 emitPRED (0x30);
1770 } else {
1771 emitInsn (0x05600000);
1772 emitIMMD (0x14, 32, insn->src(1));
1773 }
1774
1775 emitGPR(0x08);
1776 emitGPR(0x00, insn->def(0));
1777 }
1778
1779 void
emitIADD()1780 CodeEmitterGM107::emitIADD()
1781 {
1782 if (!longIMMD(insn->src(1))) {
1783 switch (insn->src(1).getFile()) {
1784 case FILE_GPR:
1785 emitInsn(0x5c100000);
1786 emitGPR (0x14, insn->src(1));
1787 break;
1788 case FILE_MEMORY_CONST:
1789 emitInsn(0x4c100000);
1790 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1791 break;
1792 case FILE_IMMEDIATE:
1793 emitInsn(0x38100000);
1794 emitIMMD(0x14, 19, insn->src(1));
1795 break;
1796 default:
1797 assert(!"bad src1 file");
1798 break;
1799 }
1800 emitSAT(0x32);
1801 emitNEG(0x31, insn->src(0));
1802 emitNEG(0x30, insn->src(1));
1803 emitCC (0x2f);
1804 emitX (0x2b);
1805 } else {
1806 emitInsn(0x1c000000);
1807 emitNEG (0x38, insn->src(0));
1808 emitSAT (0x36);
1809 emitX (0x35);
1810 emitCC (0x34);
1811 emitIMMD(0x14, 32, insn->src(1));
1812 }
1813
1814 if (insn->op == OP_SUB)
1815 code[1] ^= 0x00010000;
1816
1817 emitGPR(0x08, insn->src(0));
1818 emitGPR(0x00, insn->def(0));
1819 }
1820
1821 void
emitIMUL()1822 CodeEmitterGM107::emitIMUL()
1823 {
1824 if (!longIMMD(insn->src(1))) {
1825 switch (insn->src(1).getFile()) {
1826 case FILE_GPR:
1827 emitInsn(0x5c380000);
1828 emitGPR (0x14, insn->src(1));
1829 break;
1830 case FILE_MEMORY_CONST:
1831 emitInsn(0x4c380000);
1832 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1833 break;
1834 case FILE_IMMEDIATE:
1835 emitInsn(0x38380000);
1836 emitIMMD(0x14, 19, insn->src(1));
1837 break;
1838 default:
1839 assert(!"bad src1 file");
1840 break;
1841 }
1842 emitCC (0x2f);
1843 emitField(0x29, 1, isSignedType(insn->sType));
1844 emitField(0x28, 1, isSignedType(insn->dType));
1845 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1846 } else {
1847 emitInsn (0x1f000000);
1848 emitField(0x37, 1, isSignedType(insn->sType));
1849 emitField(0x36, 1, isSignedType(insn->dType));
1850 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1851 emitCC (0x34);
1852 emitIMMD (0x14, 32, insn->src(1));
1853 }
1854
1855 emitGPR(0x08, insn->src(0));
1856 emitGPR(0x00, insn->def(0));
1857 }
1858
1859 void
emitIMAD()1860 CodeEmitterGM107::emitIMAD()
1861 {
1862 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1863 switch(insn->src(2).getFile()) {
1864 case FILE_GPR:
1865 switch (insn->src(1).getFile()) {
1866 case FILE_GPR:
1867 emitInsn(0x5a000000);
1868 emitGPR (0x14, insn->src(1));
1869 break;
1870 case FILE_MEMORY_CONST:
1871 emitInsn(0x4a000000);
1872 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1873 break;
1874 case FILE_IMMEDIATE:
1875 emitInsn(0x34000000);
1876 emitIMMD(0x14, 19, insn->src(1));
1877 break;
1878 default:
1879 assert(!"bad src1 file");
1880 break;
1881 }
1882 emitGPR (0x27, insn->src(2));
1883 break;
1884 case FILE_MEMORY_CONST:
1885 emitInsn(0x52000000);
1886 emitGPR (0x27, insn->src(1));
1887 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1888 break;
1889 default:
1890 assert(!"bad src2 file");
1891 break;
1892 }
1893
1894 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1895 emitField(0x35, 1, isSignedType(insn->sType));
1896 emitNEG (0x34, insn->src(2));
1897 emitNEG2 (0x33, insn->src(0), insn->src(1));
1898 emitSAT (0x32);
1899 emitX (0x31);
1900 emitField(0x30, 1, isSignedType(insn->dType));
1901 emitCC (0x2f);
1902 emitGPR (0x08, insn->src(0));
1903 emitGPR (0x00, insn->def(0));
1904 }
1905
1906 void
emitISCADD()1907 CodeEmitterGM107::emitISCADD()
1908 {
1909 assert(insn->src(1).get()->asImm());
1910
1911 switch (insn->src(2).getFile()) {
1912 case FILE_GPR:
1913 emitInsn(0x5c180000);
1914 emitGPR (0x14, insn->src(2));
1915 break;
1916 case FILE_MEMORY_CONST:
1917 emitInsn(0x4c180000);
1918 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1919 break;
1920 case FILE_IMMEDIATE:
1921 emitInsn(0x38180000);
1922 emitIMMD(0x14, 19, insn->src(2));
1923 break;
1924 default:
1925 assert(!"bad src1 file");
1926 break;
1927 }
1928 emitNEG (0x31, insn->src(0));
1929 emitNEG (0x30, insn->src(2));
1930 emitCC (0x2f);
1931 emitIMMD(0x27, 5, insn->src(1));
1932 emitGPR (0x08, insn->src(0));
1933 emitGPR (0x00, insn->def(0));
1934 }
1935
1936 void
emitXMAD()1937 CodeEmitterGM107::emitXMAD()
1938 {
1939 assert(insn->src(0).getFile() == FILE_GPR);
1940
1941 bool constbuf = false;
1942 bool psl_mrg = true;
1943 bool immediate = false;
1944 if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1945 assert(insn->src(1).getFile() == FILE_GPR);
1946 constbuf = true;
1947 psl_mrg = false;
1948 emitInsn(0x51000000);
1949 emitGPR(0x27, insn->src(1));
1950 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1951 } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1952 assert(insn->src(2).getFile() == FILE_GPR);
1953 constbuf = true;
1954 emitInsn(0x4e000000);
1955 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1956 emitGPR(0x27, insn->src(2));
1957 } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1958 assert(insn->src(2).getFile() == FILE_GPR);
1959 assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1960 immediate = true;
1961 emitInsn(0x36000000);
1962 emitIMMD(0x14, 16, insn->src(1));
1963 emitGPR(0x27, insn->src(2));
1964 } else {
1965 assert(insn->src(1).getFile() == FILE_GPR);
1966 assert(insn->src(2).getFile() == FILE_GPR);
1967 emitInsn(0x5b000000);
1968 emitGPR(0x14, insn->src(1));
1969 emitGPR(0x27, insn->src(2));
1970 }
1971
1972 if (psl_mrg)
1973 emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1974
1975 unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1976 cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1977 emitField(0x32, constbuf ? 2 : 3, cmode);
1978
1979 emitX(constbuf ? 0x36 : 0x26);
1980 emitCC(0x2f);
1981
1982 emitGPR(0x0, insn->def(0));
1983 emitGPR(0x8, insn->src(0));
1984
1985 // source flags
1986 if (isSignedType(insn->sType)) {
1987 uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1988 emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1989 }
1990 emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1991 if (!immediate) {
1992 bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1993 emitField(constbuf ? 0x34 : 0x23, 1, h1);
1994 }
1995 }
1996
1997 void
emitIMNMX()1998 CodeEmitterGM107::emitIMNMX()
1999 {
2000 switch (insn->src(1).getFile()) {
2001 case FILE_GPR:
2002 emitInsn(0x5c200000);
2003 emitGPR (0x14, insn->src(1));
2004 break;
2005 case FILE_MEMORY_CONST:
2006 emitInsn(0x4c200000);
2007 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2008 break;
2009 case FILE_IMMEDIATE:
2010 emitInsn(0x38200000);
2011 emitIMMD(0x14, 19, insn->src(1));
2012 break;
2013 default:
2014 assert(!"bad src1 file");
2015 break;
2016 }
2017
2018 emitField(0x30, 1, isSignedType(insn->dType));
2019 emitCC (0x2f);
2020 emitField(0x2b, 2, insn->subOp);
2021 emitField(0x2a, 1, insn->op == OP_MAX);
2022 emitPRED (0x27);
2023 emitGPR (0x08, insn->src(0));
2024 emitGPR (0x00, insn->def(0));
2025 }
2026
2027 void
emitICMP()2028 CodeEmitterGM107::emitICMP()
2029 {
2030 const CmpInstruction *insn = this->insn->asCmp();
2031 CondCode cc = insn->setCond;
2032
2033 if (insn->src(2).mod.neg())
2034 cc = reverseCondCode(cc);
2035
2036 switch(insn->src(2).getFile()) {
2037 case FILE_GPR:
2038 switch (insn->src(1).getFile()) {
2039 case FILE_GPR:
2040 emitInsn(0x5b400000);
2041 emitGPR (0x14, insn->src(1));
2042 break;
2043 case FILE_MEMORY_CONST:
2044 emitInsn(0x4b400000);
2045 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2046 break;
2047 case FILE_IMMEDIATE:
2048 emitInsn(0x36400000);
2049 emitIMMD(0x14, 19, insn->src(1));
2050 break;
2051 default:
2052 assert(!"bad src1 file");
2053 break;
2054 }
2055 emitGPR (0x27, insn->src(2));
2056 break;
2057 case FILE_MEMORY_CONST:
2058 emitInsn(0x53400000);
2059 emitGPR (0x27, insn->src(1));
2060 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2061 break;
2062 default:
2063 assert(!"bad src2 file");
2064 break;
2065 }
2066
2067 emitCond3(0x31, cc);
2068 emitField(0x30, 1, isSignedType(insn->sType));
2069 emitGPR (0x08, insn->src(0));
2070 emitGPR (0x00, insn->def(0));
2071 }
2072
2073 void
emitISET()2074 CodeEmitterGM107::emitISET()
2075 {
2076 const CmpInstruction *insn = this->insn->asCmp();
2077
2078 switch (insn->src(1).getFile()) {
2079 case FILE_GPR:
2080 emitInsn(0x5b500000);
2081 emitGPR (0x14, insn->src(1));
2082 break;
2083 case FILE_MEMORY_CONST:
2084 emitInsn(0x4b500000);
2085 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2086 break;
2087 case FILE_IMMEDIATE:
2088 emitInsn(0x36500000);
2089 emitIMMD(0x14, 19, insn->src(1));
2090 break;
2091 default:
2092 assert(!"bad src1 file");
2093 break;
2094 }
2095
2096 if (insn->op != OP_SET) {
2097 switch (insn->op) {
2098 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2099 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2100 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2101 default:
2102 assert(!"invalid set op");
2103 break;
2104 }
2105 emitPRED(0x27, insn->src(2));
2106 } else {
2107 emitPRED(0x27);
2108 }
2109
2110 emitCond3(0x31, insn->setCond);
2111 emitField(0x30, 1, isSignedType(insn->sType));
2112 emitCC (0x2f);
2113 emitField(0x2c, 1, insn->dType == TYPE_F32);
2114 emitX (0x2b);
2115 emitGPR (0x08, insn->src(0));
2116 emitGPR (0x00, insn->def(0));
2117 }
2118
2119 void
emitISETP()2120 CodeEmitterGM107::emitISETP()
2121 {
2122 const CmpInstruction *insn = this->insn->asCmp();
2123
2124 switch (insn->src(1).getFile()) {
2125 case FILE_GPR:
2126 emitInsn(0x5b600000);
2127 emitGPR (0x14, insn->src(1));
2128 break;
2129 case FILE_MEMORY_CONST:
2130 emitInsn(0x4b600000);
2131 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2132 break;
2133 case FILE_IMMEDIATE:
2134 emitInsn(0x36600000);
2135 emitIMMD(0x14, 19, insn->src(1));
2136 break;
2137 default:
2138 assert(!"bad src1 file");
2139 break;
2140 }
2141
2142 if (insn->op != OP_SET) {
2143 switch (insn->op) {
2144 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2145 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2146 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2147 default:
2148 assert(!"invalid set op");
2149 break;
2150 }
2151 emitPRED(0x27, insn->src(2));
2152 } else {
2153 emitPRED(0x27);
2154 }
2155
2156 emitCond3(0x31, insn->setCond);
2157 emitField(0x30, 1, isSignedType(insn->sType));
2158 emitX (0x2b);
2159 emitGPR (0x08, insn->src(0));
2160 emitPRED (0x03, insn->def(0));
2161 if (insn->defExists(1))
2162 emitPRED(0x00, insn->def(1));
2163 else
2164 emitPRED(0x00);
2165 }
2166
2167 void
emitSHL()2168 CodeEmitterGM107::emitSHL()
2169 {
2170 switch (insn->src(1).getFile()) {
2171 case FILE_GPR:
2172 emitInsn(0x5c480000);
2173 emitGPR (0x14, insn->src(1));
2174 break;
2175 case FILE_MEMORY_CONST:
2176 emitInsn(0x4c480000);
2177 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2178 break;
2179 case FILE_IMMEDIATE:
2180 emitInsn(0x38480000);
2181 emitIMMD(0x14, 19, insn->src(1));
2182 break;
2183 default:
2184 assert(!"bad src1 file");
2185 break;
2186 }
2187
2188 emitCC (0x2f);
2189 emitX (0x2b);
2190 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2191 emitGPR (0x08, insn->src(0));
2192 emitGPR (0x00, insn->def(0));
2193 }
2194
2195 void
emitSHR()2196 CodeEmitterGM107::emitSHR()
2197 {
2198 switch (insn->src(1).getFile()) {
2199 case FILE_GPR:
2200 emitInsn(0x5c280000);
2201 emitGPR (0x14, insn->src(1));
2202 break;
2203 case FILE_MEMORY_CONST:
2204 emitInsn(0x4c280000);
2205 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2206 break;
2207 case FILE_IMMEDIATE:
2208 emitInsn(0x38280000);
2209 emitIMMD(0x14, 19, insn->src(1));
2210 break;
2211 default:
2212 assert(!"bad src1 file");
2213 break;
2214 }
2215
2216 emitField(0x30, 1, isSignedType(insn->dType));
2217 emitCC (0x2f);
2218 emitX (0x2c);
2219 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2220 emitGPR (0x08, insn->src(0));
2221 emitGPR (0x00, insn->def(0));
2222 }
2223
2224 void
emitSHF()2225 CodeEmitterGM107::emitSHF()
2226 {
2227 unsigned type;
2228
2229 switch (insn->src(1).getFile()) {
2230 case FILE_GPR:
2231 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2232 emitGPR(0x14, insn->src(1));
2233 break;
2234 case FILE_IMMEDIATE:
2235 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2236 emitIMMD(0x14, 19, insn->src(1));
2237 break;
2238 default:
2239 assert(!"bad src1 file");
2240 break;
2241 }
2242
2243 switch (insn->sType) {
2244 case TYPE_U64:
2245 type = 2;
2246 break;
2247 case TYPE_S64:
2248 type = 3;
2249 break;
2250 default:
2251 type = 0;
2252 break;
2253 }
2254
2255 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2256 emitX (0x31);
2257 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2258 emitCC (0x2f);
2259 emitGPR (0x27, insn->src(2));
2260 emitField(0x25, 2, type);
2261 emitGPR (0x08, insn->src(0));
2262 emitGPR (0x00, insn->def(0));
2263 }
2264
2265 void
emitPOPC()2266 CodeEmitterGM107::emitPOPC()
2267 {
2268 switch (insn->src(0).getFile()) {
2269 case FILE_GPR:
2270 emitInsn(0x5c080000);
2271 emitGPR (0x14, insn->src(0));
2272 break;
2273 case FILE_MEMORY_CONST:
2274 emitInsn(0x4c080000);
2275 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2276 break;
2277 case FILE_IMMEDIATE:
2278 emitInsn(0x38080000);
2279 emitIMMD(0x14, 19, insn->src(0));
2280 break;
2281 default:
2282 assert(!"bad src1 file");
2283 break;
2284 }
2285
2286 emitINV(0x28, insn->src(0));
2287 emitGPR(0x00, insn->def(0));
2288 }
2289
2290 void
emitBFI()2291 CodeEmitterGM107::emitBFI()
2292 {
2293 switch(insn->src(2).getFile()) {
2294 case FILE_GPR:
2295 switch (insn->src(1).getFile()) {
2296 case FILE_GPR:
2297 emitInsn(0x5bf00000);
2298 emitGPR (0x14, insn->src(1));
2299 break;
2300 case FILE_MEMORY_CONST:
2301 emitInsn(0x4bf00000);
2302 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2303 break;
2304 case FILE_IMMEDIATE:
2305 emitInsn(0x36f00000);
2306 emitIMMD(0x14, 19, insn->src(1));
2307 break;
2308 default:
2309 assert(!"bad src1 file");
2310 break;
2311 }
2312 emitGPR (0x27, insn->src(2));
2313 break;
2314 case FILE_MEMORY_CONST:
2315 emitInsn(0x53f00000);
2316 emitGPR (0x27, insn->src(1));
2317 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2318 break;
2319 default:
2320 assert(!"bad src2 file");
2321 break;
2322 }
2323
2324 emitCC (0x2f);
2325 emitGPR (0x08, insn->src(0));
2326 emitGPR (0x00, insn->def(0));
2327 }
2328
2329 void
emitBFE()2330 CodeEmitterGM107::emitBFE()
2331 {
2332 switch (insn->src(1).getFile()) {
2333 case FILE_GPR:
2334 emitInsn(0x5c000000);
2335 emitGPR (0x14, insn->src(1));
2336 break;
2337 case FILE_MEMORY_CONST:
2338 emitInsn(0x4c000000);
2339 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2340 break;
2341 case FILE_IMMEDIATE:
2342 emitInsn(0x38000000);
2343 emitIMMD(0x14, 19, insn->src(1));
2344 break;
2345 default:
2346 assert(!"bad src1 file");
2347 break;
2348 }
2349
2350 emitField(0x30, 1, isSignedType(insn->dType));
2351 emitCC (0x2f);
2352 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2353 emitGPR (0x08, insn->src(0));
2354 emitGPR (0x00, insn->def(0));
2355 }
2356
2357 void
emitFLO()2358 CodeEmitterGM107::emitFLO()
2359 {
2360 switch (insn->src(0).getFile()) {
2361 case FILE_GPR:
2362 emitInsn(0x5c300000);
2363 emitGPR (0x14, insn->src(0));
2364 break;
2365 case FILE_MEMORY_CONST:
2366 emitInsn(0x4c300000);
2367 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2368 break;
2369 case FILE_IMMEDIATE:
2370 emitInsn(0x38300000);
2371 emitIMMD(0x14, 19, insn->src(0));
2372 break;
2373 default:
2374 assert(!"bad src1 file");
2375 break;
2376 }
2377
2378 emitField(0x30, 1, isSignedType(insn->dType));
2379 emitCC (0x2f);
2380 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2381 emitINV (0x28, insn->src(0));
2382 emitGPR (0x00, insn->def(0));
2383 }
2384
2385 void
emitPRMT()2386 CodeEmitterGM107::emitPRMT()
2387 {
2388 switch (insn->src(1).getFile()) {
2389 case FILE_GPR:
2390 emitInsn(0x5bc00000);
2391 emitGPR (0x14, insn->src(1));
2392 break;
2393 case FILE_MEMORY_CONST:
2394 emitInsn(0x4bc00000);
2395 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2396 break;
2397 case FILE_IMMEDIATE:
2398 emitInsn(0x36c00000);
2399 emitIMMD(0x14, 19, insn->src(1));
2400 break;
2401 default:
2402 assert(!"bad src1 file");
2403 break;
2404 }
2405
2406 emitField(0x30, 3, insn->subOp);
2407 emitGPR (0x27, insn->src(2));
2408 emitGPR (0x08, insn->src(0));
2409 emitGPR (0x00, insn->def(0));
2410 }
2411
2412 /*******************************************************************************
2413 * memory
2414 ******************************************************************************/
2415
2416 void
emitLDSTs(int pos,DataType type)2417 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2418 {
2419 int data = 0;
2420
2421 switch (typeSizeof(type)) {
2422 case 1: data = isSignedType(type) ? 1 : 0; break;
2423 case 2: data = isSignedType(type) ? 3 : 2; break;
2424 case 4: data = 4; break;
2425 case 8: data = 5; break;
2426 case 16: data = 6; break;
2427 default:
2428 assert(!"bad type");
2429 break;
2430 }
2431
2432 emitField(pos, 3, data);
2433 }
2434
2435 void
emitLDSTc(int pos)2436 CodeEmitterGM107::emitLDSTc(int pos)
2437 {
2438 int mode = 0;
2439
2440 switch (insn->cache) {
2441 case CACHE_CA: mode = 0; break;
2442 case CACHE_CG: mode = 1; break;
2443 case CACHE_CS: mode = 2; break;
2444 case CACHE_CV: mode = 3; break;
2445 default:
2446 assert(!"invalid caching mode");
2447 break;
2448 }
2449
2450 emitField(pos, 2, mode);
2451 }
2452
2453 void
emitLDC()2454 CodeEmitterGM107::emitLDC()
2455 {
2456 emitInsn (0xef900000);
2457 emitLDSTs(0x30, insn->dType);
2458 emitField(0x2c, 2, insn->subOp);
2459 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2460 emitGPR (0x00, insn->def(0));
2461 }
2462
2463 void
emitLDL()2464 CodeEmitterGM107::emitLDL()
2465 {
2466 emitInsn (0xef400000);
2467 emitLDSTs(0x30, insn->dType);
2468 emitLDSTc(0x2c);
2469 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470 emitGPR (0x00, insn->def(0));
2471 }
2472
2473 void
emitLDS()2474 CodeEmitterGM107::emitLDS()
2475 {
2476 emitInsn (0xef480000);
2477 emitLDSTs(0x30, insn->dType);
2478 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2479 emitGPR (0x00, insn->def(0));
2480 }
2481
2482 void
emitLD()2483 CodeEmitterGM107::emitLD()
2484 {
2485 emitInsn (0x80000000);
2486 emitPRED (0x3a);
2487 emitLDSTc(0x38);
2488 emitLDSTs(0x35, insn->dType);
2489 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2490 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2491 emitGPR (0x00, insn->def(0));
2492 }
2493
2494 void
emitSTL()2495 CodeEmitterGM107::emitSTL()
2496 {
2497 emitInsn (0xef500000);
2498 emitLDSTs(0x30, insn->dType);
2499 emitLDSTc(0x2c);
2500 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501 emitGPR (0x00, insn->src(1));
2502 }
2503
2504 void
emitSTS()2505 CodeEmitterGM107::emitSTS()
2506 {
2507 emitInsn (0xef580000);
2508 emitLDSTs(0x30, insn->dType);
2509 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2510 emitGPR (0x00, insn->src(1));
2511 }
2512
2513 void
emitST()2514 CodeEmitterGM107::emitST()
2515 {
2516 emitInsn (0xa0000000);
2517 emitPRED (0x3a);
2518 emitLDSTc(0x38);
2519 emitLDSTs(0x35, insn->dType);
2520 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2521 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2522 emitGPR (0x00, insn->src(1));
2523 }
2524
2525 void
emitALD()2526 CodeEmitterGM107::emitALD()
2527 {
2528 emitInsn (0xefd80000);
2529 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2530 emitGPR (0x27, insn->src(0).getIndirect(1));
2531 emitO (0x20);
2532 emitP (0x1f);
2533 emitADDR (0x08, 20, 10, 0, insn->src(0));
2534 emitGPR (0x00, insn->def(0));
2535 }
2536
2537 void
emitAST()2538 CodeEmitterGM107::emitAST()
2539 {
2540 emitInsn (0xeff00000);
2541 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2542 emitGPR (0x27, insn->src(0).getIndirect(1));
2543 emitP (0x1f);
2544 emitADDR (0x08, 20, 10, 0, insn->src(0));
2545 emitGPR (0x00, insn->src(1));
2546 }
2547
2548 void
emitISBERD()2549 CodeEmitterGM107::emitISBERD()
2550 {
2551 emitInsn(0xefd00000);
2552 emitGPR (0x08, insn->src(0));
2553 emitGPR (0x00, insn->def(0));
2554 }
2555
2556 void
emitAL2P()2557 CodeEmitterGM107::emitAL2P()
2558 {
2559 emitInsn (0xefa00000);
2560 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2561 emitPRED (0x2c);
2562 emitO (0x20);
2563 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2564 emitGPR (0x08, insn->src(0).getIndirect(0));
2565 emitGPR (0x00, insn->def(0));
2566 }
2567
2568 void
gm107_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2569 gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2570 {
2571 int ipa = entry->ipa;
2572 int reg = entry->reg;
2573 int loc = entry->loc;
2574
2575 if (data.flatshade &&
2576 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2577 ipa = NV50_IR_INTERP_FLAT;
2578 reg = 0xff;
2579 } else if (data.force_persample_interp &&
2580 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2581 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2582 ipa |= NV50_IR_INTERP_CENTROID;
2583 }
2584 code[loc + 1] &= ~(0xf << 0x14);
2585 code[loc + 1] |= (ipa & 0x3) << 0x16;
2586 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2587 code[loc + 0] &= ~(0xff << 0x14);
2588 code[loc + 0] |= reg << 0x14;
2589 }
2590
2591 void
emitIPA()2592 CodeEmitterGM107::emitIPA()
2593 {
2594 int ipam = 0, ipas = 0;
2595
2596 switch (insn->getInterpMode()) {
2597 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2598 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2599 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2600 case NV50_IR_INTERP_SC : ipam = 3; break;
2601 default:
2602 assert(!"invalid ipa mode");
2603 break;
2604 }
2605
2606 switch (insn->getSampleMode()) {
2607 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2608 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2609 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2610 default:
2611 assert(!"invalid ipa sample mode");
2612 break;
2613 }
2614
2615 emitInsn (0xe0000000);
2616 emitField(0x36, 2, ipam);
2617 emitField(0x34, 2, ipas);
2618 emitSAT (0x33);
2619 emitField(0x2f, 3, 7);
2620 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2621 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2622 code[1] |= 0x00000040; /* .idx */
2623 emitGPR(0x00, insn->def(0));
2624
2625 if (insn->op == OP_PINTERP) {
2626 emitGPR(0x14, insn->src(1));
2627 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2628 emitGPR(0x27, insn->src(2));
2629 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2630 } else {
2631 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2632 emitGPR(0x27, insn->src(1));
2633 emitGPR(0x14);
2634 addInterp(insn->ipa, 0xff, gm107_interpApply);
2635 }
2636
2637 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2638 emitGPR(0x27);
2639 }
2640
2641 void
emitATOM()2642 CodeEmitterGM107::emitATOM()
2643 {
2644 unsigned dType, subOp;
2645
2646 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2647 switch (insn->dType) {
2648 case TYPE_U32: dType = 0; break;
2649 case TYPE_U64: dType = 1; break;
2650 default: assert(!"unexpected dType"); dType = 0; break;
2651 }
2652 subOp = 15;
2653
2654 emitInsn (0xee000000);
2655 } else {
2656 switch (insn->dType) {
2657 case TYPE_U32: dType = 0; break;
2658 case TYPE_S32: dType = 1; break;
2659 case TYPE_U64: dType = 2; break;
2660 case TYPE_F32: dType = 3; break;
2661 case TYPE_B128: dType = 4; break;
2662 case TYPE_S64: dType = 5; break;
2663 default: assert(!"unexpected dType"); dType = 0; break;
2664 }
2665 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2666 subOp = 8;
2667 else
2668 subOp = insn->subOp;
2669
2670 emitInsn (0xed000000);
2671 }
2672
2673 emitField(0x34, 4, subOp);
2674 emitField(0x31, 3, dType);
2675 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2676 emitGPR (0x14, insn->src(1));
2677 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2678 emitGPR (0x00, insn->def(0));
2679 }
2680
2681 void
emitATOMS()2682 CodeEmitterGM107::emitATOMS()
2683 {
2684 unsigned dType, subOp;
2685
2686 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2687 switch (insn->dType) {
2688 case TYPE_U32: dType = 0; break;
2689 case TYPE_U64: dType = 1; break;
2690 default: assert(!"unexpected dType"); dType = 0; break;
2691 }
2692 subOp = 4;
2693
2694 emitInsn (0xee000000);
2695 emitField(0x34, 1, dType);
2696 } else {
2697 switch (insn->dType) {
2698 case TYPE_U32: dType = 0; break;
2699 case TYPE_S32: dType = 1; break;
2700 case TYPE_U64: dType = 2; break;
2701 case TYPE_S64: dType = 3; break;
2702 default: assert(!"unexpected dType"); dType = 0; break;
2703 }
2704
2705 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2706 subOp = 8;
2707 else
2708 subOp = insn->subOp;
2709
2710 emitInsn (0xec000000);
2711 emitField(0x1c, 3, dType);
2712 }
2713
2714 emitField(0x34, 4, subOp);
2715 emitGPR (0x14, insn->src(1));
2716 emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2717 emitGPR (0x00, insn->def(0));
2718 }
2719
2720 void
emitRED()2721 CodeEmitterGM107::emitRED()
2722 {
2723 unsigned dType;
2724
2725 switch (insn->dType) {
2726 case TYPE_U32: dType = 0; break;
2727 case TYPE_S32: dType = 1; break;
2728 case TYPE_U64: dType = 2; break;
2729 case TYPE_F32: dType = 3; break;
2730 case TYPE_B128: dType = 4; break;
2731 case TYPE_S64: dType = 5; break;
2732 default: assert(!"unexpected dType"); dType = 0; break;
2733 }
2734
2735 emitInsn (0xebf80000);
2736 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2737 emitField(0x17, 3, insn->subOp);
2738 emitField(0x14, 3, dType);
2739 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2740 emitGPR (0x00, insn->src(1));
2741 }
2742
2743 void
emitCCTL()2744 CodeEmitterGM107::emitCCTL()
2745 {
2746 unsigned width;
2747 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2748 emitInsn(0xef600000);
2749 width = 30;
2750 } else {
2751 emitInsn(0xef800000);
2752 width = 22;
2753 }
2754 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2755 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2756 emitField(0x00, 4, insn->subOp);
2757 }
2758
2759 /*******************************************************************************
2760 * surface
2761 ******************************************************************************/
2762
2763 void
emitPIXLD()2764 CodeEmitterGM107::emitPIXLD()
2765 {
2766 emitInsn (0xefe80000);
2767 emitPRED (0x2d);
2768 emitField(0x1f, 3, insn->subOp);
2769 emitGPR (0x08, insn->src(0));
2770 emitGPR (0x00, insn->def(0));
2771 }
2772
2773 /*******************************************************************************
2774 * texture
2775 ******************************************************************************/
2776
2777 void
emitTEXs(int pos)2778 CodeEmitterGM107::emitTEXs(int pos)
2779 {
2780 int src1 = insn->predSrc == 1 ? 2 : 1;
2781 if (insn->srcExists(src1))
2782 emitGPR(pos, insn->src(src1));
2783 else
2784 emitGPR(pos);
2785 }
2786
2787 static uint8_t
getTEXSMask(uint8_t mask)2788 getTEXSMask(uint8_t mask)
2789 {
2790 switch (mask) {
2791 case 0x1: return 0x0;
2792 case 0x2: return 0x1;
2793 case 0x3: return 0x4;
2794 case 0x4: return 0x2;
2795 case 0x7: return 0x0;
2796 case 0x8: return 0x3;
2797 case 0x9: return 0x5;
2798 case 0xa: return 0x6;
2799 case 0xb: return 0x1;
2800 case 0xc: return 0x7;
2801 case 0xd: return 0x2;
2802 case 0xe: return 0x3;
2803 case 0xf: return 0x4;
2804 default:
2805 assert(!"invalid mask");
2806 return 0;
2807 }
2808 }
2809
2810 static uint8_t
getTEXSTarget(const TexInstruction * tex)2811 getTEXSTarget(const TexInstruction *tex)
2812 {
2813 assert(tex->op == OP_TEX || tex->op == OP_TXL);
2814
2815 switch (tex->tex.target.getEnum()) {
2816 case TEX_TARGET_1D:
2817 assert(tex->tex.levelZero);
2818 return 0x0;
2819 case TEX_TARGET_2D:
2820 case TEX_TARGET_RECT:
2821 if (tex->tex.levelZero)
2822 return 0x2;
2823 if (tex->op == OP_TXL)
2824 return 0x3;
2825 return 0x1;
2826 case TEX_TARGET_2D_SHADOW:
2827 case TEX_TARGET_RECT_SHADOW:
2828 if (tex->tex.levelZero)
2829 return 0x6;
2830 if (tex->op == OP_TXL)
2831 return 0x5;
2832 return 0x4;
2833 case TEX_TARGET_2D_ARRAY:
2834 if (tex->tex.levelZero)
2835 return 0x8;
2836 return 0x7;
2837 case TEX_TARGET_2D_ARRAY_SHADOW:
2838 assert(tex->tex.levelZero);
2839 return 0x9;
2840 case TEX_TARGET_3D:
2841 if (tex->tex.levelZero)
2842 return 0xb;
2843 assert(tex->op != OP_TXL);
2844 return 0xa;
2845 case TEX_TARGET_CUBE:
2846 assert(!tex->tex.levelZero);
2847 if (tex->op == OP_TXL)
2848 return 0xd;
2849 return 0xc;
2850 default:
2851 assert(false);
2852 return 0x0;
2853 }
2854 }
2855
2856 static uint8_t
getTLDSTarget(const TexInstruction * tex)2857 getTLDSTarget(const TexInstruction *tex)
2858 {
2859 switch (tex->tex.target.getEnum()) {
2860 case TEX_TARGET_1D:
2861 if (tex->tex.levelZero)
2862 return 0x0;
2863 return 0x1;
2864 case TEX_TARGET_2D:
2865 case TEX_TARGET_RECT:
2866 if (tex->tex.levelZero)
2867 return tex->tex.useOffsets ? 0x4 : 0x2;
2868 return tex->tex.useOffsets ? 0xc : 0x5;
2869 case TEX_TARGET_2D_MS:
2870 assert(tex->tex.levelZero);
2871 return 0x6;
2872 case TEX_TARGET_3D:
2873 assert(tex->tex.levelZero);
2874 return 0x7;
2875 case TEX_TARGET_2D_ARRAY:
2876 assert(tex->tex.levelZero);
2877 return 0x8;
2878
2879 default:
2880 assert(false);
2881 return 0x0;
2882 }
2883 }
2884
2885 void
emitTEX()2886 CodeEmitterGM107::emitTEX()
2887 {
2888 const TexInstruction *insn = this->insn->asTex();
2889 int lodm = 0;
2890
2891 if (!insn->tex.levelZero) {
2892 switch (insn->op) {
2893 case OP_TEX: lodm = 0; break;
2894 case OP_TXB: lodm = 2; break;
2895 case OP_TXL: lodm = 3; break;
2896 default:
2897 assert(!"invalid tex op");
2898 break;
2899 }
2900 } else {
2901 lodm = 1;
2902 }
2903
2904 if (insn->tex.rIndirectSrc >= 0) {
2905 emitInsn (0xdeb80000);
2906 emitField(0x25, 2, lodm);
2907 emitField(0x24, 1, insn->tex.useOffsets == 1);
2908 } else {
2909 emitInsn (0xc0380000);
2910 emitField(0x37, 2, lodm);
2911 emitField(0x36, 1, insn->tex.useOffsets == 1);
2912 emitField(0x24, 13, insn->tex.r);
2913 }
2914
2915 emitField(0x32, 1, insn->tex.target.isShadow());
2916 emitField(0x31, 1, insn->tex.liveOnly);
2917 emitField(0x23, 1, insn->tex.derivAll);
2918 emitField(0x1f, 4, insn->tex.mask);
2919 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2920 insn->tex.target.getDim() - 1);
2921 emitField(0x1c, 1, insn->tex.target.isArray());
2922 emitTEXs (0x14);
2923 emitGPR (0x08, insn->src(0));
2924 emitGPR (0x00, insn->def(0));
2925 }
2926
2927 void
emitTEXS()2928 CodeEmitterGM107::emitTEXS()
2929 {
2930 const TexInstruction *insn = this->insn->asTex();
2931 assert(!insn->tex.derivAll);
2932
2933 switch (insn->op) {
2934 case OP_TEX:
2935 case OP_TXL:
2936 emitInsn (0xd8000000);
2937 emitField(0x35, 4, getTEXSTarget(insn));
2938 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2939 break;
2940 case OP_TXF:
2941 emitInsn (0xda000000);
2942 emitField(0x35, 4, getTLDSTarget(insn));
2943 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2944 break;
2945 case OP_TXG:
2946 assert(insn->tex.useOffsets != 4);
2947 emitInsn (0xdf000000);
2948 emitField(0x34, 2, insn->tex.gatherComp);
2949 emitField(0x33, 1, insn->tex.useOffsets == 1);
2950 emitField(0x32, 1, insn->tex.target.isShadow());
2951 break;
2952 default:
2953 unreachable("unknown op in emitTEXS()");
2954 break;
2955 }
2956
2957 emitField(0x31, 1, insn->tex.liveOnly);
2958 emitField(0x24, 13, insn->tex.r);
2959 if (insn->defExists(1))
2960 emitGPR(0x1c, insn->def(1));
2961 else
2962 emitGPR(0x1c);
2963 if (insn->srcExists(1))
2964 emitGPR(0x14, insn->getSrc(1));
2965 else
2966 emitGPR(0x14);
2967 emitGPR (0x08, insn->src(0));
2968 emitGPR (0x00, insn->def(0));
2969 }
2970
2971 void
emitTLD()2972 CodeEmitterGM107::emitTLD()
2973 {
2974 const TexInstruction *insn = this->insn->asTex();
2975
2976 if (insn->tex.rIndirectSrc >= 0) {
2977 emitInsn (0xdd380000);
2978 } else {
2979 emitInsn (0xdc380000);
2980 emitField(0x24, 13, insn->tex.r);
2981 }
2982
2983 emitField(0x37, 1, insn->tex.levelZero == 0);
2984 emitField(0x32, 1, insn->tex.target.isMS());
2985 emitField(0x31, 1, insn->tex.liveOnly);
2986 emitField(0x23, 1, insn->tex.useOffsets == 1);
2987 emitField(0x1f, 4, insn->tex.mask);
2988 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2989 insn->tex.target.getDim() - 1);
2990 emitField(0x1c, 1, insn->tex.target.isArray());
2991 emitTEXs (0x14);
2992 emitGPR (0x08, insn->src(0));
2993 emitGPR (0x00, insn->def(0));
2994 }
2995
2996 void
emitTLD4()2997 CodeEmitterGM107::emitTLD4()
2998 {
2999 const TexInstruction *insn = this->insn->asTex();
3000
3001 if (insn->tex.rIndirectSrc >= 0) {
3002 emitInsn (0xdef80000);
3003 emitField(0x26, 2, insn->tex.gatherComp);
3004 emitField(0x25, 2, insn->tex.useOffsets == 4);
3005 emitField(0x24, 2, insn->tex.useOffsets == 1);
3006 } else {
3007 emitInsn (0xc8380000);
3008 emitField(0x38, 2, insn->tex.gatherComp);
3009 emitField(0x37, 2, insn->tex.useOffsets == 4);
3010 emitField(0x36, 2, insn->tex.useOffsets == 1);
3011 emitField(0x24, 13, insn->tex.r);
3012 }
3013
3014 emitField(0x32, 1, insn->tex.target.isShadow());
3015 emitField(0x31, 1, insn->tex.liveOnly);
3016 emitField(0x23, 1, insn->tex.derivAll);
3017 emitField(0x1f, 4, insn->tex.mask);
3018 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3019 insn->tex.target.getDim() - 1);
3020 emitField(0x1c, 1, insn->tex.target.isArray());
3021 emitTEXs (0x14);
3022 emitGPR (0x08, insn->src(0));
3023 emitGPR (0x00, insn->def(0));
3024 }
3025
3026 void
emitTXD()3027 CodeEmitterGM107::emitTXD()
3028 {
3029 const TexInstruction *insn = this->insn->asTex();
3030
3031 if (insn->tex.rIndirectSrc >= 0) {
3032 emitInsn (0xde780000);
3033 } else {
3034 emitInsn (0xde380000);
3035 emitField(0x24, 13, insn->tex.r);
3036 }
3037
3038 emitField(0x31, 1, insn->tex.liveOnly);
3039 emitField(0x23, 1, insn->tex.useOffsets == 1);
3040 emitField(0x1f, 4, insn->tex.mask);
3041 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3042 insn->tex.target.getDim() - 1);
3043 emitField(0x1c, 1, insn->tex.target.isArray());
3044 emitTEXs (0x14);
3045 emitGPR (0x08, insn->src(0));
3046 emitGPR (0x00, insn->def(0));
3047 }
3048
3049 void
emitTMML()3050 CodeEmitterGM107::emitTMML()
3051 {
3052 const TexInstruction *insn = this->insn->asTex();
3053
3054 if (insn->tex.rIndirectSrc >= 0) {
3055 emitInsn (0xdf600000);
3056 } else {
3057 emitInsn (0xdf580000);
3058 emitField(0x24, 13, insn->tex.r);
3059 }
3060
3061 emitField(0x31, 1, insn->tex.liveOnly);
3062 emitField(0x23, 1, insn->tex.derivAll);
3063 emitField(0x1f, 4, insn->tex.mask);
3064 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3065 insn->tex.target.getDim() - 1);
3066 emitField(0x1c, 1, insn->tex.target.isArray());
3067 emitTEXs (0x14);
3068 emitGPR (0x08, insn->src(0));
3069 emitGPR (0x00, insn->def(0));
3070 }
3071
3072 void
emitTXQ()3073 CodeEmitterGM107::emitTXQ()
3074 {
3075 const TexInstruction *insn = this->insn->asTex();
3076 int type = 0;
3077
3078 switch (insn->tex.query) {
3079 case TXQ_DIMS : type = 0x01; break;
3080 case TXQ_TYPE : type = 0x02; break;
3081 case TXQ_SAMPLE_POSITION: type = 0x05; break;
3082 case TXQ_FILTER : type = 0x10; break;
3083 case TXQ_LOD : type = 0x12; break;
3084 case TXQ_WRAP : type = 0x14; break;
3085 case TXQ_BORDER_COLOUR : type = 0x16; break;
3086 default:
3087 assert(!"invalid txq query");
3088 break;
3089 }
3090
3091 if (insn->tex.rIndirectSrc >= 0) {
3092 emitInsn (0xdf500000);
3093 } else {
3094 emitInsn (0xdf480000);
3095 emitField(0x24, 13, insn->tex.r);
3096 }
3097
3098 emitField(0x31, 1, insn->tex.liveOnly);
3099 emitField(0x1f, 4, insn->tex.mask);
3100 emitField(0x16, 6, type);
3101 emitGPR (0x08, insn->src(0));
3102 emitGPR (0x00, insn->def(0));
3103 }
3104
3105 void
emitDEPBAR()3106 CodeEmitterGM107::emitDEPBAR()
3107 {
3108 emitInsn (0xf0f00000);
3109 emitField(0x1d, 1, 1); /* le */
3110 emitField(0x1a, 3, 5);
3111 emitField(0x14, 6, insn->subOp);
3112 emitField(0x00, 6, insn->subOp);
3113 }
3114
3115 /*******************************************************************************
3116 * misc
3117 ******************************************************************************/
3118
3119 void
emitNOP()3120 CodeEmitterGM107::emitNOP()
3121 {
3122 emitInsn(0x50b00000);
3123 }
3124
3125 void
emitKIL()3126 CodeEmitterGM107::emitKIL()
3127 {
3128 emitInsn (0xe3300000);
3129 emitCond5(0x00, CC_TR);
3130 }
3131
3132 void
emitOUT()3133 CodeEmitterGM107::emitOUT()
3134 {
3135 const int cut = insn->op == OP_RESTART || insn->subOp;
3136 const int emit = insn->op == OP_EMIT;
3137
3138 switch (insn->src(1).getFile()) {
3139 case FILE_GPR:
3140 emitInsn(0xfbe00000);
3141 emitGPR (0x14, insn->src(1));
3142 break;
3143 case FILE_IMMEDIATE:
3144 emitInsn(0xf6e00000);
3145 emitIMMD(0x14, 19, insn->src(1));
3146 break;
3147 case FILE_MEMORY_CONST:
3148 emitInsn(0xebe00000);
3149 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3150 break;
3151 default:
3152 assert(!"bad src1 file");
3153 break;
3154 }
3155
3156 emitField(0x27, 2, (cut << 1) | emit);
3157 emitGPR (0x08, insn->src(0));
3158 emitGPR (0x00, insn->def(0));
3159 }
3160
3161 void
emitBAR()3162 CodeEmitterGM107::emitBAR()
3163 {
3164 uint8_t subop;
3165
3166 emitInsn (0xf0a80000);
3167
3168 switch (insn->subOp) {
3169 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3170 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
3171 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
3172 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
3173 default:
3174 subop = 0x80;
3175 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3176 break;
3177 }
3178
3179 emitField(0x20, 8, subop);
3180
3181 // barrier id
3182 if (insn->src(0).getFile() == FILE_GPR) {
3183 emitGPR(0x08, insn->src(0));
3184 } else {
3185 ImmediateValue *imm = insn->getSrc(0)->asImm();
3186 assert(imm);
3187 emitField(0x08, 8, imm->reg.data.u32);
3188 emitField(0x2b, 1, 1);
3189 }
3190
3191 // thread count
3192 if (insn->src(1).getFile() == FILE_GPR) {
3193 emitGPR(0x14, insn->src(1));
3194 } else {
3195 ImmediateValue *imm = insn->getSrc(0)->asImm();
3196 assert(imm);
3197 emitField(0x14, 12, imm->reg.data.u32);
3198 emitField(0x2c, 1, 1);
3199 }
3200
3201 if (insn->srcExists(2) && (insn->predSrc != 2)) {
3202 emitPRED (0x27, insn->src(2));
3203 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3204 } else {
3205 emitField(0x27, 3, 7);
3206 }
3207 }
3208
3209 void
emitMEMBAR()3210 CodeEmitterGM107::emitMEMBAR()
3211 {
3212 emitInsn (0xef980000);
3213 emitField(0x08, 2, insn->subOp >> 2);
3214 }
3215
3216 void
emitVOTE()3217 CodeEmitterGM107::emitVOTE()
3218 {
3219 const ImmediateValue *imm;
3220 uint32_t u32;
3221
3222 int r = -1, p = -1;
3223 for (int i = 0; insn->defExists(i); i++) {
3224 if (insn->def(i).getFile() == FILE_GPR)
3225 r = i;
3226 else if (insn->def(i).getFile() == FILE_PREDICATE)
3227 p = i;
3228 }
3229
3230 emitInsn (0x50d80000);
3231 emitField(0x30, 2, insn->subOp);
3232 if (r >= 0)
3233 emitGPR (0x00, insn->def(r));
3234 else
3235 emitGPR (0x00);
3236 if (p >= 0)
3237 emitPRED (0x2d, insn->def(p));
3238 else
3239 emitPRED (0x2d);
3240
3241 switch (insn->src(0).getFile()) {
3242 case FILE_PREDICATE:
3243 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3244 emitPRED (0x27, insn->src(0));
3245 break;
3246 case FILE_IMMEDIATE:
3247 imm = insn->getSrc(0)->asImm();
3248 assert(imm);
3249 u32 = imm->reg.data.u32;
3250 assert(u32 == 0 || u32 == 1);
3251 emitPRED(0x27);
3252 emitField(0x2a, 1, u32 == 0);
3253 break;
3254 default:
3255 assert(!"Unhandled src");
3256 break;
3257 }
3258 }
3259
3260 void
emitSUTarget()3261 CodeEmitterGM107::emitSUTarget()
3262 {
3263 const TexInstruction *insn = this->insn->asTex();
3264 int target = 0;
3265
3266 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3267
3268 if (insn->tex.target == TEX_TARGET_BUFFER) {
3269 target = 2;
3270 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3271 target = 4;
3272 } else if (insn->tex.target == TEX_TARGET_2D ||
3273 insn->tex.target == TEX_TARGET_RECT) {
3274 target = 6;
3275 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3276 insn->tex.target == TEX_TARGET_CUBE ||
3277 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3278 target = 8;
3279 } else if (insn->tex.target == TEX_TARGET_3D) {
3280 target = 10;
3281 } else {
3282 assert(insn->tex.target == TEX_TARGET_1D);
3283 }
3284 emitField(0x20, 4, target);
3285 }
3286
3287 void
emitSUHandle(const int s)3288 CodeEmitterGM107::emitSUHandle(const int s)
3289 {
3290 const TexInstruction *insn = this->insn->asTex();
3291
3292 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3293
3294 if (insn->src(s).getFile() == FILE_GPR) {
3295 emitGPR(0x27, insn->src(s));
3296 } else {
3297 ImmediateValue *imm = insn->getSrc(s)->asImm();
3298 assert(imm);
3299 emitField(0x33, 1, 1);
3300 emitField(0x24, 13, imm->reg.data.u32);
3301 }
3302 }
3303
3304 void
emitSUSTx()3305 CodeEmitterGM107::emitSUSTx()
3306 {
3307 const TexInstruction *insn = this->insn->asTex();
3308
3309 emitInsn(0xeb200000);
3310 if (insn->op == OP_SUSTB)
3311 emitField(0x34, 1, 1);
3312 emitSUTarget();
3313
3314 emitLDSTc(0x18);
3315 emitField(0x14, 4, 0xf); // rgba
3316 emitGPR (0x08, insn->src(0));
3317 emitGPR (0x00, insn->src(1));
3318
3319 emitSUHandle(2);
3320 }
3321
3322 void
emitSULDx()3323 CodeEmitterGM107::emitSULDx()
3324 {
3325 const TexInstruction *insn = this->insn->asTex();
3326 int type = 0;
3327
3328 emitInsn(0xeb000000);
3329 if (insn->op == OP_SULDB)
3330 emitField(0x34, 1, 1);
3331 emitSUTarget();
3332
3333 switch (insn->dType) {
3334 case TYPE_S8: type = 1; break;
3335 case TYPE_U16: type = 2; break;
3336 case TYPE_S16: type = 3; break;
3337 case TYPE_U32: type = 4; break;
3338 case TYPE_U64: type = 5; break;
3339 case TYPE_B128: type = 6; break;
3340 default:
3341 assert(insn->dType == TYPE_U8);
3342 break;
3343 }
3344 emitLDSTc(0x18);
3345 emitField(0x14, 3, type);
3346 emitGPR (0x00, insn->def(0));
3347 emitGPR (0x08, insn->src(0));
3348
3349 emitSUHandle(1);
3350 }
3351
3352 void
emitSUREDx()3353 CodeEmitterGM107::emitSUREDx()
3354 {
3355 const TexInstruction *insn = this->insn->asTex();
3356 uint8_t type = 0, subOp;
3357
3358 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3359 emitInsn(0xeac00000);
3360 else
3361 emitInsn(0xea600000);
3362
3363 if (insn->op == OP_SUREDB)
3364 emitField(0x34, 1, 1);
3365 emitSUTarget();
3366
3367 // destination type
3368 switch (insn->dType) {
3369 case TYPE_S32: type = 1; break;
3370 case TYPE_U64: type = 2; break;
3371 case TYPE_F32: type = 3; break;
3372 case TYPE_S64: type = 5; break;
3373 default:
3374 assert(insn->dType == TYPE_U32);
3375 break;
3376 }
3377
3378 // atomic operation
3379 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3380 subOp = 0;
3381 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3382 subOp = 8;
3383 } else {
3384 subOp = insn->subOp;
3385 }
3386
3387 emitField(0x24, 3, type);
3388 emitField(0x1d, 4, subOp);
3389 emitGPR (0x14, insn->src(1));
3390 emitGPR (0x08, insn->src(0));
3391 emitGPR (0x00, insn->def(0));
3392
3393 emitSUHandle(2);
3394 }
3395
3396 /*******************************************************************************
3397 * assembler front-end
3398 ******************************************************************************/
3399
3400 bool
emitInstruction(Instruction * i)3401 CodeEmitterGM107::emitInstruction(Instruction *i)
3402 {
3403 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3404 bool ret = true;
3405
3406 insn = i;
3407
3408 if (insn->encSize != 8) {
3409 ERROR("skipping undecodable instruction: "); insn->print();
3410 return false;
3411 } else
3412 if (codeSize + size > codeSizeLimit) {
3413 ERROR("code emitter output buffer too small\n");
3414 return false;
3415 }
3416
3417 if (writeIssueDelays) {
3418 int n = ((codeSize & 0x1f) / 8) - 1;
3419 if (n < 0) {
3420 data = code;
3421 data[0] = 0x00000000;
3422 data[1] = 0x00000000;
3423 code += 2;
3424 codeSize += 8;
3425 n++;
3426 }
3427
3428 emitField(data, n * 21, 21, insn->sched);
3429 }
3430
3431 switch (insn->op) {
3432 case OP_EXIT:
3433 emitEXIT();
3434 break;
3435 case OP_BRA:
3436 emitBRA();
3437 break;
3438 case OP_CALL:
3439 emitCAL();
3440 break;
3441 case OP_PRECONT:
3442 emitPCNT();
3443 break;
3444 case OP_CONT:
3445 emitCONT();
3446 break;
3447 case OP_PREBREAK:
3448 emitPBK();
3449 break;
3450 case OP_BREAK:
3451 emitBRK();
3452 break;
3453 case OP_PRERET:
3454 emitPRET();
3455 break;
3456 case OP_RET:
3457 emitRET();
3458 break;
3459 case OP_JOINAT:
3460 emitSSY();
3461 break;
3462 case OP_JOIN:
3463 emitSYNC();
3464 break;
3465 case OP_QUADON:
3466 emitSAM();
3467 break;
3468 case OP_QUADPOP:
3469 emitRAM();
3470 break;
3471 case OP_MOV:
3472 emitMOV();
3473 break;
3474 case OP_RDSV:
3475 if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3476 emitCS2R();
3477 else
3478 emitS2R();
3479 break;
3480 case OP_ABS:
3481 case OP_NEG:
3482 case OP_SAT:
3483 case OP_FLOOR:
3484 case OP_CEIL:
3485 case OP_TRUNC:
3486 case OP_CVT:
3487 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3488 insn->src(0).getFile() == FILE_PREDICATE)) {
3489 emitMOV();
3490 } else if (isFloatType(insn->dType)) {
3491 if (isFloatType(insn->sType))
3492 emitF2F();
3493 else
3494 emitI2F();
3495 } else {
3496 if (isFloatType(insn->sType))
3497 emitF2I();
3498 else
3499 emitI2I();
3500 }
3501 break;
3502 case OP_SHFL:
3503 emitSHFL();
3504 break;
3505 case OP_ADD:
3506 case OP_SUB:
3507 if (isFloatType(insn->dType)) {
3508 if (insn->dType == TYPE_F64)
3509 emitDADD();
3510 else
3511 emitFADD();
3512 } else {
3513 emitIADD();
3514 }
3515 break;
3516 case OP_MUL:
3517 if (isFloatType(insn->dType)) {
3518 if (insn->dType == TYPE_F64)
3519 emitDMUL();
3520 else
3521 emitFMUL();
3522 } else {
3523 emitIMUL();
3524 }
3525 break;
3526 case OP_MAD:
3527 case OP_FMA:
3528 if (isFloatType(insn->dType)) {
3529 if (insn->dType == TYPE_F64)
3530 emitDFMA();
3531 else
3532 emitFFMA();
3533 } else {
3534 emitIMAD();
3535 }
3536 break;
3537 case OP_SHLADD:
3538 emitISCADD();
3539 break;
3540 case OP_XMAD:
3541 emitXMAD();
3542 break;
3543 case OP_MIN:
3544 case OP_MAX:
3545 if (isFloatType(insn->dType)) {
3546 if (insn->dType == TYPE_F64)
3547 emitDMNMX();
3548 else
3549 emitFMNMX();
3550 } else {
3551 emitIMNMX();
3552 }
3553 break;
3554 case OP_SHL:
3555 if (typeSizeof(insn->sType) == 8)
3556 emitSHF();
3557 else
3558 emitSHL();
3559 break;
3560 case OP_SHR:
3561 if (typeSizeof(insn->sType) == 8)
3562 emitSHF();
3563 else
3564 emitSHR();
3565 break;
3566 case OP_POPCNT:
3567 emitPOPC();
3568 break;
3569 case OP_INSBF:
3570 emitBFI();
3571 break;
3572 case OP_EXTBF:
3573 emitBFE();
3574 break;
3575 case OP_BFIND:
3576 emitFLO();
3577 break;
3578 case OP_PERMT:
3579 emitPRMT();
3580 break;
3581 case OP_SLCT:
3582 if (isFloatType(insn->dType))
3583 emitFCMP();
3584 else
3585 emitICMP();
3586 break;
3587 case OP_SET:
3588 case OP_SET_AND:
3589 case OP_SET_OR:
3590 case OP_SET_XOR:
3591 if (insn->def(0).getFile() != FILE_PREDICATE) {
3592 if (isFloatType(insn->sType))
3593 if (insn->sType == TYPE_F64)
3594 emitDSET();
3595 else
3596 emitFSET();
3597 else
3598 emitISET();
3599 } else {
3600 if (isFloatType(insn->sType))
3601 if (insn->sType == TYPE_F64)
3602 emitDSETP();
3603 else
3604 emitFSETP();
3605 else
3606 emitISETP();
3607 }
3608 break;
3609 case OP_SELP:
3610 emitSEL();
3611 break;
3612 case OP_PRESIN:
3613 case OP_PREEX2:
3614 emitRRO();
3615 break;
3616 case OP_COS:
3617 case OP_SIN:
3618 case OP_EX2:
3619 case OP_LG2:
3620 case OP_RCP:
3621 case OP_RSQ:
3622 case OP_SQRT:
3623 emitMUFU();
3624 break;
3625 case OP_AND:
3626 case OP_OR:
3627 case OP_XOR:
3628 switch (insn->def(0).getFile()) {
3629 case FILE_GPR: emitLOP(); break;
3630 case FILE_PREDICATE: emitPSETP(); break;
3631 default:
3632 assert(!"invalid bool op");
3633 }
3634 break;
3635 case OP_NOT:
3636 emitNOT();
3637 break;
3638 case OP_LOAD:
3639 switch (insn->src(0).getFile()) {
3640 case FILE_MEMORY_CONST : emitLDC(); break;
3641 case FILE_MEMORY_LOCAL : emitLDL(); break;
3642 case FILE_MEMORY_SHARED: emitLDS(); break;
3643 case FILE_MEMORY_GLOBAL: emitLD(); break;
3644 default:
3645 assert(!"invalid load");
3646 emitNOP();
3647 break;
3648 }
3649 break;
3650 case OP_STORE:
3651 switch (insn->src(0).getFile()) {
3652 case FILE_MEMORY_LOCAL : emitSTL(); break;
3653 case FILE_MEMORY_SHARED: emitSTS(); break;
3654 case FILE_MEMORY_GLOBAL: emitST(); break;
3655 default:
3656 assert(!"invalid store");
3657 emitNOP();
3658 break;
3659 }
3660 break;
3661 case OP_ATOM:
3662 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3663 emitATOMS();
3664 else
3665 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3666 emitRED();
3667 else
3668 emitATOM();
3669 break;
3670 case OP_CCTL:
3671 emitCCTL();
3672 break;
3673 case OP_VFETCH:
3674 emitALD();
3675 break;
3676 case OP_EXPORT:
3677 emitAST();
3678 break;
3679 case OP_PFETCH:
3680 emitISBERD();
3681 break;
3682 case OP_AFETCH:
3683 emitAL2P();
3684 break;
3685 case OP_LINTERP:
3686 case OP_PINTERP:
3687 emitIPA();
3688 break;
3689 case OP_PIXLD:
3690 emitPIXLD();
3691 break;
3692 case OP_TEX:
3693 case OP_TXL:
3694 if (insn->asTex()->tex.scalar)
3695 emitTEXS();
3696 else
3697 emitTEX();
3698 break;
3699 case OP_TXB:
3700 emitTEX();
3701 break;
3702 case OP_TXF:
3703 if (insn->asTex()->tex.scalar)
3704 emitTEXS();
3705 else
3706 emitTLD();
3707 break;
3708 case OP_TXG:
3709 if (insn->asTex()->tex.scalar)
3710 emitTEXS();
3711 else
3712 emitTLD4();
3713 break;
3714 case OP_TXD:
3715 emitTXD();
3716 break;
3717 case OP_TXQ:
3718 emitTXQ();
3719 break;
3720 case OP_TXLQ:
3721 emitTMML();
3722 break;
3723 case OP_TEXBAR:
3724 emitDEPBAR();
3725 break;
3726 case OP_QUADOP:
3727 emitFSWZADD();
3728 break;
3729 case OP_NOP:
3730 emitNOP();
3731 break;
3732 case OP_DISCARD:
3733 emitKIL();
3734 break;
3735 case OP_EMIT:
3736 case OP_RESTART:
3737 emitOUT();
3738 break;
3739 case OP_BAR:
3740 emitBAR();
3741 break;
3742 case OP_MEMBAR:
3743 emitMEMBAR();
3744 break;
3745 case OP_VOTE:
3746 emitVOTE();
3747 break;
3748 case OP_SUSTB:
3749 case OP_SUSTP:
3750 emitSUSTx();
3751 break;
3752 case OP_SULDB:
3753 case OP_SULDP:
3754 emitSULDx();
3755 break;
3756 case OP_SUREDB:
3757 case OP_SUREDP:
3758 emitSUREDx();
3759 break;
3760 default:
3761 assert(!"invalid opcode");
3762 emitNOP();
3763 ret = false;
3764 break;
3765 }
3766
3767 if (insn->join) {
3768 /*XXX*/
3769 }
3770
3771 code += 2;
3772 codeSize += 8;
3773 return ret;
3774 }
3775
3776 uint32_t
getMinEncodingSize(const Instruction * i) const3777 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3778 {
3779 return 8;
3780 }
3781
3782 /*******************************************************************************
3783 * sched data calculator
3784 ******************************************************************************/
3785
3786 inline void
emitStall(Instruction * insn,uint8_t cnt)3787 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3788 {
3789 assert(cnt < 16);
3790 insn->sched |= cnt;
3791 }
3792
3793 inline void
emitYield(Instruction * insn)3794 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3795 {
3796 insn->sched |= 1 << 4;
3797 }
3798
3799 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3800 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3801 {
3802 assert(id < 6);
3803 if ((insn->sched & 0xe0) == 0xe0)
3804 insn->sched ^= 0xe0;
3805 insn->sched |= id << 5;
3806 }
3807
3808 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3809 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3810 {
3811 assert(id < 6);
3812 if ((insn->sched & 0x700) == 0x700)
3813 insn->sched ^= 0x700;
3814 insn->sched |= id << 8;
3815 }
3816
3817 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3818 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3819 {
3820 assert(id < 6);
3821 insn->sched |= 1 << (11 + id);
3822 }
3823
3824 inline void
emitReuse(Instruction * insn,uint8_t id)3825 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3826 {
3827 assert(id < 4);
3828 insn->sched |= 1 << (17 + id);
3829 }
3830
3831 inline void
printSchedInfo(int cycle,const Instruction * insn) const3832 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3833 const Instruction *insn) const
3834 {
3835 uint8_t st, yl, wr, rd, wt, ru;
3836
3837 st = (insn->sched & 0x00000f) >> 0;
3838 yl = (insn->sched & 0x000010) >> 4;
3839 wr = (insn->sched & 0x0000e0) >> 5;
3840 rd = (insn->sched & 0x000700) >> 8;
3841 wt = (insn->sched & 0x01f800) >> 11;
3842 ru = (insn->sched & 0x1e0000) >> 17;
3843
3844 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3845 cycle, st, yl, wr, rd, wt, ru);
3846 }
3847
3848 inline int
getStall(const Instruction * insn) const3849 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3850 {
3851 return insn->sched & 0xf;
3852 }
3853
3854 inline int
getWrDepBar(const Instruction * insn) const3855 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3856 {
3857 return (insn->sched & 0x0000e0) >> 5;
3858 }
3859
3860 inline int
getRdDepBar(const Instruction * insn) const3861 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3862 {
3863 return (insn->sched & 0x000700) >> 8;
3864 }
3865
3866 inline int
getWtDepBar(const Instruction * insn) const3867 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3868 {
3869 return (insn->sched & 0x01f800) >> 11;
3870 }
3871
3872 // Emit the reuse flag which allows to make use of the new memory hierarchy
3873 // introduced since Maxwell, the operand reuse cache.
3874 //
3875 // It allows to reduce bank conflicts by caching operands. Each time you issue
3876 // an instruction, that flag can tell the hw which operands are going to be
3877 // re-used by the next instruction. Note that the next instruction has to use
3878 // the same GPR id in the same operand slot.
3879 void
setReuseFlag(Instruction * insn)3880 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3881 {
3882 Instruction *next = insn->next;
3883 BitSet defs(255, true);
3884
3885 if (!targ->isReuseSupported(insn))
3886 return;
3887
3888 for (int d = 0; insn->defExists(d); ++d) {
3889 const Value *def = insn->def(d).rep();
3890 if (insn->def(d).getFile() != FILE_GPR)
3891 continue;
3892 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3893 continue;
3894 defs.set(def->reg.data.id);
3895 }
3896
3897 for (int s = 0; insn->srcExists(s); s++) {
3898 const Value *src = insn->src(s).rep();
3899 if (insn->src(s).getFile() != FILE_GPR)
3900 continue;
3901 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3902 continue;
3903 if (defs.test(src->reg.data.id))
3904 continue;
3905 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3906 continue;
3907 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3908 continue;
3909 assert(s < 4);
3910 emitReuse(insn, s);
3911 }
3912 }
3913
3914 void
recordWr(const Value * v,int cycle,int ready)3915 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3916 {
3917 int a = v->reg.data.id, b;
3918
3919 switch (v->reg.file) {
3920 case FILE_GPR:
3921 b = a + v->reg.size / 4;
3922 for (int r = a; r < b; ++r)
3923 score->rd.r[r] = ready;
3924 break;
3925 case FILE_PREDICATE:
3926 // To immediately use a predicate set by any instructions, the minimum
3927 // number of stall counts is 13.
3928 score->rd.p[a] = cycle + 13;
3929 break;
3930 case FILE_FLAGS:
3931 score->rd.c = ready;
3932 break;
3933 default:
3934 break;
3935 }
3936 }
3937
3938 void
checkRd(const Value * v,int cycle,int & delay) const3939 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3940 {
3941 int a = v->reg.data.id, b;
3942 int ready = cycle;
3943
3944 switch (v->reg.file) {
3945 case FILE_GPR:
3946 b = a + v->reg.size / 4;
3947 for (int r = a; r < b; ++r)
3948 ready = MAX2(ready, score->rd.r[r]);
3949 break;
3950 case FILE_PREDICATE:
3951 ready = MAX2(ready, score->rd.p[a]);
3952 break;
3953 case FILE_FLAGS:
3954 ready = MAX2(ready, score->rd.c);
3955 break;
3956 default:
3957 break;
3958 }
3959 if (cycle < ready)
3960 delay = MAX2(delay, ready - cycle);
3961 }
3962
3963 void
commitInsn(const Instruction * insn,int cycle)3964 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3965 {
3966 const int ready = cycle + targ->getLatency(insn);
3967
3968 for (int d = 0; insn->defExists(d); ++d)
3969 recordWr(insn->getDef(d), cycle, ready);
3970
3971 #ifdef GM107_DEBUG_SCHED_DATA
3972 score->print(cycle);
3973 #endif
3974 }
3975
3976 #define GM107_MIN_ISSUE_DELAY 0x1
3977 #define GM107_MAX_ISSUE_DELAY 0xf
3978
3979 int
calcDelay(const Instruction * insn,int cycle) const3980 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3981 {
3982 int delay = 0, ready = cycle;
3983
3984 for (int s = 0; insn->srcExists(s); ++s)
3985 checkRd(insn->getSrc(s), cycle, delay);
3986
3987 // TODO: make use of getReadLatency()!
3988
3989 return MAX2(delay, ready - cycle);
3990 }
3991
3992 void
setDelay(Instruction * insn,int delay,const Instruction * next)3993 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3994 const Instruction *next)
3995 {
3996 const OpClass cl = targ->getOpClass(insn->op);
3997 int wr, rd;
3998
3999 if (insn->op == OP_EXIT ||
4000 insn->op == OP_BAR ||
4001 insn->op == OP_MEMBAR) {
4002 delay = GM107_MAX_ISSUE_DELAY;
4003 } else
4004 if (insn->op == OP_QUADON ||
4005 insn->op == OP_QUADPOP) {
4006 delay = 0xd;
4007 } else
4008 if (cl == OPCLASS_FLOW || insn->join) {
4009 delay = 0xd;
4010 }
4011
4012 if (!next || !targ->canDualIssue(insn, next)) {
4013 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4014 } else {
4015 delay = 0x0; // dual-issue
4016 }
4017
4018 wr = getWrDepBar(insn);
4019 rd = getRdDepBar(insn);
4020
4021 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4022 // Barriers take one additional clock cycle to become active on top of
4023 // the clock consumed by the instruction producing it.
4024 if (!next || insn->bb != next->bb) {
4025 delay = 0x2;
4026 } else {
4027 int wt = getWtDepBar(next);
4028 if ((wt & (1 << wr)) | (wt & (1 << rd)))
4029 delay = 0x2;
4030 }
4031 }
4032
4033 emitStall(insn, delay);
4034 }
4035
4036
4037 // Return true when the given instruction needs to emit a read dependency
4038 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4039 // setting the maximum number of stall counts is not enough.
4040 bool
needRdDepBar(const Instruction * insn) const4041 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4042 {
4043 BitSet srcs(255, true), defs(255, true);
4044 int a, b;
4045
4046 if (!targ->isBarrierRequired(insn))
4047 return false;
4048
4049 // Do not emit a read dependency barrier when the instruction doesn't use
4050 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4051 for (int s = 0; insn->srcExists(s); ++s) {
4052 const Value *src = insn->src(s).rep();
4053 if (insn->src(s).getFile() != FILE_GPR)
4054 continue;
4055 if (src->reg.data.id == 255)
4056 continue;
4057
4058 a = src->reg.data.id;
4059 b = a + src->reg.size / 4;
4060 for (int r = a; r < b; ++r)
4061 srcs.set(r);
4062 }
4063
4064 if (!srcs.popCount())
4065 return false;
4066
4067 // Do not emit a read dependency barrier when the output GPRs are equal to
4068 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4069 // be produced and WaR hazards are prevented.
4070 for (int d = 0; insn->defExists(d); ++d) {
4071 const Value *def = insn->def(d).rep();
4072 if (insn->def(d).getFile() != FILE_GPR)
4073 continue;
4074 if (def->reg.data.id == 255)
4075 continue;
4076
4077 a = def->reg.data.id;
4078 b = a + def->reg.size / 4;
4079 for (int r = a; r < b; ++r)
4080 defs.set(r);
4081 }
4082
4083 srcs.andNot(defs);
4084 if (!srcs.popCount())
4085 return false;
4086
4087 return true;
4088 }
4089
4090 // Return true when the given instruction needs to emit a write dependency
4091 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4092 // setting the maximum number of stall counts is not enough. This is only legal
4093 // if the instruction output something.
4094 bool
needWrDepBar(const Instruction * insn) const4095 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4096 {
4097 if (!targ->isBarrierRequired(insn))
4098 return false;
4099
4100 for (int d = 0; insn->defExists(d); ++d) {
4101 if (insn->def(d).getFile() == FILE_GPR ||
4102 insn->def(d).getFile() == FILE_FLAGS ||
4103 insn->def(d).getFile() == FILE_PREDICATE)
4104 return true;
4105 }
4106 return false;
4107 }
4108
4109 // Helper function for findFirstUse() and findFirstDef()
4110 bool
doesInsnWriteTo(const Instruction * insn,const Value * val) const4111 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4112 const Value *val) const
4113 {
4114 if (val->reg.file != FILE_GPR &&
4115 val->reg.file != FILE_PREDICATE &&
4116 val->reg.file != FILE_FLAGS)
4117 return false;
4118
4119 for (int d = 0; insn->defExists(d); ++d) {
4120 const Value* def = insn->getDef(d);
4121 int minGPR = def->reg.data.id;
4122 int maxGPR = minGPR + def->reg.size / 4 - 1;
4123
4124 if (def->reg.file != val->reg.file)
4125 continue;
4126
4127 if (def->reg.file == FILE_GPR) {
4128 if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4129 val->reg.data.id > maxGPR)
4130 continue;
4131 return true;
4132 } else
4133 if (def->reg.file == FILE_PREDICATE) {
4134 if (val->reg.data.id != minGPR)
4135 continue;
4136 return true;
4137 } else
4138 if (def->reg.file == FILE_FLAGS) {
4139 if (val->reg.data.id != minGPR)
4140 continue;
4141 return true;
4142 }
4143 }
4144
4145 return false;
4146 }
4147
4148 // Find the next instruction inside the same basic block which uses (reads or
4149 // writes from) the output of the given instruction in order to avoid RaW and
4150 // WaW hazards.
4151 Instruction *
findFirstUse(const Instruction * bari) const4152 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4153 {
4154 Instruction *insn, *next;
4155
4156 if (!bari->defExists(0))
4157 return NULL;
4158
4159 for (insn = bari->next; insn != NULL; insn = next) {
4160 next = insn->next;
4161
4162 for (int s = 0; insn->srcExists(s); ++s)
4163 if (doesInsnWriteTo(bari, insn->getSrc(s)))
4164 return insn;
4165
4166 for (int d = 0; insn->defExists(d); ++d)
4167 if (doesInsnWriteTo(bari, insn->getDef(d)))
4168 return insn;
4169 }
4170 return NULL;
4171 }
4172
4173 // Find the next instruction inside the same basic block which overwrites, at
4174 // least, one source of the given instruction in order to avoid WaR hazards.
4175 Instruction *
findFirstDef(const Instruction * bari) const4176 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4177 {
4178 Instruction *insn, *next;
4179
4180 if (!bari->srcExists(0))
4181 return NULL;
4182
4183 for (insn = bari->next; insn != NULL; insn = next) {
4184 next = insn->next;
4185
4186 for (int s = 0; bari->srcExists(s); ++s)
4187 if (doesInsnWriteTo(insn, bari->getSrc(s)))
4188 return insn;
4189 }
4190 return NULL;
4191 }
4192
4193 // Dependency barriers:
4194 // This pass is a bit ugly and could probably be improved by performing a
4195 // better allocation.
4196 //
4197 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4198 // dependency barriers using the control codes.
4199 bool
insertBarriers(BasicBlock * bb)4200 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4201 {
4202 std::list<LiveBarUse> live_uses;
4203 std::list<LiveBarDef> live_defs;
4204 Instruction *insn, *next;
4205 BitSet bars(6, true);
4206 int bar_id;
4207
4208 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4209 Instruction *usei = NULL, *defi = NULL;
4210 bool need_wr_bar, need_rd_bar;
4211
4212 next = insn->next;
4213
4214 // Expire old barrier uses.
4215 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4216 it != live_uses.end();) {
4217 if (insn->serial >= it->usei->serial) {
4218 int wr = getWrDepBar(it->insn);
4219 emitWtDepBar(insn, wr);
4220 bars.clr(wr); // free barrier
4221 it = live_uses.erase(it);
4222 continue;
4223 }
4224 ++it;
4225 }
4226
4227 // Expire old barrier defs.
4228 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4229 it != live_defs.end();) {
4230 if (insn->serial >= it->defi->serial) {
4231 int rd = getRdDepBar(it->insn);
4232 emitWtDepBar(insn, rd);
4233 bars.clr(rd); // free barrier
4234 it = live_defs.erase(it);
4235 continue;
4236 }
4237 ++it;
4238 }
4239
4240 need_wr_bar = needWrDepBar(insn);
4241 need_rd_bar = needRdDepBar(insn);
4242
4243 if (need_wr_bar) {
4244 // When the instruction requires to emit a write dependency barrier
4245 // (all which write something at a variable latency), find the next
4246 // instruction which reads the outputs (or writes to them, potentially
4247 // completing before this insn.
4248 usei = findFirstUse(insn);
4249
4250 // Allocate and emit a new barrier.
4251 bar_id = bars.findFreeRange(1);
4252 if (bar_id == -1)
4253 bar_id = 5;
4254 bars.set(bar_id);
4255 emitWrDepBar(insn, bar_id);
4256 if (usei)
4257 live_uses.push_back(LiveBarUse(insn, usei));
4258 }
4259
4260 if (need_rd_bar) {
4261 // When the instruction requires to emit a read dependency barrier
4262 // (all which read something at a variable latency), find the next
4263 // instruction which will write the inputs.
4264 defi = findFirstDef(insn);
4265
4266 if (usei && defi && usei->serial <= defi->serial)
4267 continue;
4268
4269 // Allocate and emit a new barrier.
4270 bar_id = bars.findFreeRange(1);
4271 if (bar_id == -1)
4272 bar_id = 5;
4273 bars.set(bar_id);
4274 emitRdDepBar(insn, bar_id);
4275 if (defi)
4276 live_defs.push_back(LiveBarDef(insn, defi));
4277 }
4278 }
4279
4280 // Remove unnecessary barrier waits.
4281 BitSet alive_bars(6, true);
4282 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4283 int wr, rd, wt;
4284
4285 next = insn->next;
4286
4287 wr = getWrDepBar(insn);
4288 rd = getRdDepBar(insn);
4289 wt = getWtDepBar(insn);
4290
4291 for (int idx = 0; idx < 6; ++idx) {
4292 if (!(wt & (1 << idx)))
4293 continue;
4294 if (!alive_bars.test(idx)) {
4295 insn->sched &= ~(1 << (11 + idx));
4296 } else {
4297 alive_bars.clr(idx);
4298 }
4299 }
4300
4301 if (wr < 6)
4302 alive_bars.set(wr);
4303 if (rd < 6)
4304 alive_bars.set(rd);
4305 }
4306
4307 return true;
4308 }
4309
4310 bool
visit(Function * func)4311 SchedDataCalculatorGM107::visit(Function *func)
4312 {
4313 ArrayList insns;
4314
4315 func->orderInstructions(insns);
4316
4317 scoreBoards.resize(func->cfg.getSize());
4318 for (size_t i = 0; i < scoreBoards.size(); ++i)
4319 scoreBoards[i].wipe();
4320 return true;
4321 }
4322
4323 bool
visit(BasicBlock * bb)4324 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4325 {
4326 Instruction *insn, *next = NULL;
4327 int cycle = 0;
4328
4329 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4330 /*XXX*/
4331 insn->sched = 0x7e0;
4332 }
4333
4334 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4335 return true;
4336
4337 // Insert read/write dependency barriers for instructions which don't
4338 // operate at a fixed latency.
4339 insertBarriers(bb);
4340
4341 score = &scoreBoards.at(bb->getId());
4342
4343 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4344 // back branches will wait until all target dependencies are satisfied
4345 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4346 continue;
4347 BasicBlock *in = BasicBlock::get(ei.getNode());
4348 score->setMax(&scoreBoards.at(in->getId()));
4349 }
4350
4351 #ifdef GM107_DEBUG_SCHED_DATA
4352 INFO("=== BB:%i initial scores\n", bb->getId());
4353 score->print(cycle);
4354 #endif
4355
4356 // Because barriers are allocated locally (intra-BB), we have to make sure
4357 // that all produced barriers have been consumed before entering inside a
4358 // new basic block. The best way is to do a global allocation pre RA but
4359 // it's really more difficult, especially because of the phi nodes. Anyways,
4360 // it seems like that waiting on a barrier which has already been consumed
4361 // doesn't add any additional cost, it's just not elegant!
4362 Instruction *start = bb->getEntry();
4363 if (start && bb->cfg.incidentCount() > 0) {
4364 for (int b = 0; b < 6; b++)
4365 emitWtDepBar(start, b);
4366 }
4367
4368 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4369 next = insn->next;
4370
4371 commitInsn(insn, cycle);
4372 int delay = calcDelay(next, cycle);
4373 setDelay(insn, delay, next);
4374 cycle += getStall(insn);
4375
4376 setReuseFlag(insn);
4377
4378 // XXX: The yield flag seems to destroy a bunch of things when it is
4379 // set on every instruction, need investigation.
4380 //emitYield(insn);
4381
4382 #ifdef GM107_DEBUG_SCHED_DATA
4383 printSchedInfo(cycle, insn);
4384 insn->print();
4385 next->print();
4386 #endif
4387 }
4388
4389 if (!insn)
4390 return true;
4391 commitInsn(insn, cycle);
4392
4393 int bbDelay = -1;
4394
4395 #ifdef GM107_DEBUG_SCHED_DATA
4396 fprintf(stderr, "last instruction is : ");
4397 insn->print();
4398 fprintf(stderr, "cycle=%d\n", cycle);
4399 #endif
4400
4401 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4402 BasicBlock *out = BasicBlock::get(ei.getNode());
4403
4404 if (ei.getType() != Graph::Edge::BACK) {
4405 // Only test the first instruction of the outgoing block.
4406 next = out->getEntry();
4407 if (next) {
4408 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4409 } else {
4410 // When the outgoing BB is empty, make sure to set the number of
4411 // stall counts needed by the instruction because we don't know the
4412 // next instruction.
4413 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4414 }
4415 } else {
4416 // Wait until all dependencies are satisfied.
4417 const int regsFree = score->getLatest();
4418 next = out->getFirst();
4419 for (int c = cycle; next && c < regsFree; next = next->next) {
4420 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4421 c += getStall(next);
4422 }
4423 next = NULL;
4424 }
4425 }
4426 if (bb->cfg.outgoingCount() != 1)
4427 next = NULL;
4428 setDelay(insn, bbDelay, next);
4429 cycle += getStall(insn);
4430
4431 score->rebase(cycle); // common base for initializing out blocks' scores
4432 return true;
4433 }
4434
4435 /*******************************************************************************
4436 * main
4437 ******************************************************************************/
4438
4439 void
prepareEmission(Function * func)4440 CodeEmitterGM107::prepareEmission(Function *func)
4441 {
4442 SchedDataCalculatorGM107 sched(targGM107);
4443 CodeEmitter::prepareEmission(func);
4444 sched.run(func, true, true);
4445 }
4446
sizeToBundlesGM107(uint32_t size)4447 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4448 {
4449 return (size + 23) / 24;
4450 }
4451
4452 void
prepareEmission(Program * prog)4453 CodeEmitterGM107::prepareEmission(Program *prog)
4454 {
4455 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4456 !fi.end(); fi.next()) {
4457 Function *func = reinterpret_cast<Function *>(fi.get());
4458 func->binPos = prog->binSize;
4459 prepareEmission(func);
4460
4461 // adjust sizes & positions for schedulding info:
4462 if (prog->getTarget()->hasSWSched) {
4463 uint32_t adjPos = func->binPos;
4464 BasicBlock *bb = NULL;
4465 for (int i = 0; i < func->bbCount; ++i) {
4466 bb = func->bbArray[i];
4467 int32_t adjSize = bb->binSize;
4468 if (adjPos % 32) {
4469 adjSize -= 32 - adjPos % 32;
4470 if (adjSize < 0)
4471 adjSize = 0;
4472 }
4473 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4474 bb->binPos = adjPos;
4475 bb->binSize = adjSize;
4476 adjPos += adjSize;
4477 }
4478 if (bb)
4479 func->binSize = adjPos - func->binPos;
4480 }
4481
4482 prog->binSize += func->binSize;
4483 }
4484 }
4485
CodeEmitterGM107(const TargetGM107 * target)4486 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4487 : CodeEmitter(target),
4488 targGM107(target),
4489 progType(Program::TYPE_VERTEX),
4490 insn(NULL),
4491 writeIssueDelays(target->hasSWSched),
4492 data(NULL)
4493 {
4494 code = NULL;
4495 codeSize = codeSizeLimit = 0;
4496 relocInfo = NULL;
4497 }
4498
4499 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4500 TargetGM107::createCodeEmitterGM107(Program::Type type)
4501 {
4502 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4503 emit->setProgramType(type);
4504 return emit;
4505 }
4506
4507 } // namespace nv50_ir
4508