1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26 #include "codegen/nv50_ir_sched_gm107.h"
27
28 //#define GM107_DEBUG_SCHED_DATA
29
30 namespace nv50_ir {
31
32 class CodeEmitterGM107 : public CodeEmitter
33 {
34 public:
35 CodeEmitterGM107(const TargetGM107 *);
36
37 virtual bool emitInstruction(Instruction *);
38 virtual uint32_t getMinEncodingSize(const Instruction *) const;
39
40 virtual void prepareEmission(Program *);
41 virtual void prepareEmission(Function *);
42
setProgramType(Program::Type pType)43 inline void setProgramType(Program::Type pType) { progType = pType; }
44
45 private:
46 const TargetGM107 *targGM107;
47
48 Program::Type progType;
49
50 const Instruction *insn;
51 const bool writeIssueDelays;
52 uint32_t *data;
53
54 private:
55 inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)56 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
57
58 inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)59 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60 inline void emitPred();
61 inline void emitGPR(int, const Value *);
emitGPR(int pos)62 inline void emitGPR(int pos) {
63 emitGPR(pos, (const Value *)NULL);
64 }
emitGPR(int pos,const ValueRef & ref)65 inline void emitGPR(int pos, const ValueRef &ref) {
66 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
67 }
emitGPR(int pos,const ValueRef * ref)68 inline void emitGPR(int pos, const ValueRef *ref) {
69 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
70 }
emitGPR(int pos,const ValueDef & def)71 inline void emitGPR(int pos, const ValueDef &def) {
72 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
73 }
74 inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)75 inline void emitSYS(int pos, const ValueRef &ref) {
76 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
77 }
78 inline void emitPRED(int, const Value *);
emitPRED(int pos)79 inline void emitPRED(int pos) {
80 emitPRED(pos, (const Value *)NULL);
81 }
emitPRED(int pos,const ValueRef & ref)82 inline void emitPRED(int pos, const ValueRef &ref) {
83 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
84 }
emitPRED(int pos,const ValueDef & def)85 inline void emitPRED(int pos, const ValueDef &def) {
86 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
87 }
88 inline void emitADDR(int, int, int, int, const ValueRef &);
89 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90 inline bool longIMMD(const ValueRef &);
91 inline void emitIMMD(int, int, const ValueRef &);
92
93 void emitCond3(int, CondCode);
94 void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)95 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96 inline void emitO(int);
97 inline void emitP(int);
98 inline void emitSAT(int);
99 inline void emitCC(int);
100 inline void emitX(int);
101 inline void emitABS(int, const ValueRef &);
102 inline void emitNEG(int, const ValueRef &);
103 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104 inline void emitFMZ(int, int);
105 inline void emitRND(int, RoundMode, int);
emitRND(int pos)106 inline void emitRND(int pos) {
107 emitRND(pos, insn->rnd, -1);
108 }
109 inline void emitPDIV(int);
110 inline void emitINV(int, const ValueRef &);
111
112 void emitEXIT();
113 void emitBRA();
114 void emitCAL();
115 void emitPCNT();
116 void emitCONT();
117 void emitPBK();
118 void emitBRK();
119 void emitPRET();
120 void emitRET();
121 void emitSSY();
122 void emitSYNC();
123 void emitSAM();
124 void emitRAM();
125
126 void emitPSETP();
127
128 void emitMOV();
129 void emitS2R();
130 void emitCS2R();
131 void emitF2F();
132 void emitF2I();
133 void emitI2F();
134 void emitI2I();
135 void emitSEL();
136 void emitSHFL();
137
138 void emitDADD();
139 void emitDMUL();
140 void emitDFMA();
141 void emitDMNMX();
142 void emitDSET();
143 void emitDSETP();
144
145 void emitFADD();
146 void emitFMUL();
147 void emitFFMA();
148 void emitMUFU();
149 void emitFMNMX();
150 void emitRRO();
151 void emitFCMP();
152 void emitFSET();
153 void emitFSETP();
154 void emitFSWZADD();
155
156 void emitLOP();
157 void emitNOT();
158 void emitIADD();
159 void emitIMUL();
160 void emitIMAD();
161 void emitISCADD();
162 void emitXMAD();
163 void emitIMNMX();
164 void emitICMP();
165 void emitISET();
166 void emitISETP();
167 void emitSHL();
168 void emitSHR();
169 void emitSHF();
170 void emitPOPC();
171 void emitBFI();
172 void emitBFE();
173 void emitFLO();
174 void emitPRMT();
175
176 void emitLDSTs(int, DataType);
177 void emitLDSTc(int);
178 void emitLDC();
179 void emitLDL();
180 void emitLDS();
181 void emitLD();
182 void emitSTL();
183 void emitSTS();
184 void emitST();
185 void emitALD();
186 void emitAST();
187 void emitISBERD();
188 void emitAL2P();
189 void emitIPA();
190 void emitATOM();
191 void emitATOMS();
192 void emitRED();
193 void emitCCTL();
194
195 void emitPIXLD();
196
197 void emitTEXs(int);
198 void emitTEX();
199 void emitTEXS();
200 void emitTLD();
201 void emitTLD4();
202 void emitTXD();
203 void emitTXQ();
204 void emitTMML();
205 void emitDEPBAR();
206
207 void emitNOP();
208 void emitKIL();
209 void emitOUT();
210
211 void emitBAR();
212 void emitMEMBAR();
213
214 void emitVOTE();
215
216 void emitSUTarget();
217 void emitSUHandle(const int s);
218 void emitSUSTx();
219 void emitSULDx();
220 void emitSUREDx();
221 };
222
223 /*******************************************************************************
224 * general instruction layout/fields
225 ******************************************************************************/
226
227 void
emitField(uint32_t * data,int b,int s,uint32_t v)228 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
229 {
230 if (b >= 0) {
231 uint32_t m = ((1ULL << s) - 1);
232 uint64_t d = (uint64_t)(v & m) << b;
233 assert(!(v & ~m) || (v & ~m) == ~m);
234 data[1] |= d >> 32;
235 data[0] |= d;
236 }
237 }
238
239 void
emitPred()240 CodeEmitterGM107::emitPred()
241 {
242 if (insn->predSrc >= 0) {
243 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244 emitField(19, 1, insn->cc == CC_NOT_P);
245 } else {
246 emitField(16, 3, 7);
247 }
248 }
249
250 void
emitInsn(uint32_t hi,bool pred)251 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
252 {
253 code[0] = 0x00000000;
254 code[1] = hi;
255 if (pred)
256 emitPred();
257 }
258
259 void
emitGPR(int pos,const Value * val)260 CodeEmitterGM107::emitGPR(int pos, const Value *val)
261 {
262 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263 val->reg.data.id : 255);
264 }
265
266 void
emitSYS(int pos,const Value * val)267 CodeEmitterGM107::emitSYS(int pos, const Value *val)
268 {
269 int id = val ? val->reg.data.id : -1;
270
271 switch (id) {
272 case SV_LANEID : id = 0x00; break;
273 case SV_VERTEX_COUNT : id = 0x10; break;
274 case SV_INVOCATION_ID : id = 0x11; break;
275 case SV_THREAD_KILL : id = 0x13; break;
276 case SV_INVOCATION_INFO: id = 0x1d; break;
277 case SV_COMBINED_TID : id = 0x20; break;
278 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
279 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
280 case SV_LANEMASK_EQ : id = 0x38; break;
281 case SV_LANEMASK_LT : id = 0x39; break;
282 case SV_LANEMASK_LE : id = 0x3a; break;
283 case SV_LANEMASK_GT : id = 0x3b; break;
284 case SV_LANEMASK_GE : id = 0x3c; break;
285 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
286 default:
287 assert(!"invalid system value");
288 id = 0;
289 break;
290 }
291
292 emitField(pos, 8, id);
293 }
294
295 void
emitPRED(int pos,const Value * val)296 CodeEmitterGM107::emitPRED(int pos, const Value *val)
297 {
298 emitField(pos, 3, val ? val->reg.data.id : 7);
299 }
300
301 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)302 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
303 const ValueRef &ref)
304 {
305 const Value *v = ref.get();
306 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
307 if (gpr >= 0)
308 emitGPR(gpr, ref.getIndirect(0));
309 emitField(off, len, v->reg.data.offset >> shr);
310 }
311
312 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)313 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
314 const ValueRef &ref)
315 {
316 const Value *v = ref.get();
317 const Symbol *s = v->asSym();
318
319 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
320
321 emitField(buf, 5, v->reg.fileIndex);
322 if (gpr >= 0)
323 emitGPR(gpr, ref.getIndirect(0));
324 emitField(off, 16, s->reg.data.offset >> shr);
325 }
326
327 bool
longIMMD(const ValueRef & ref)328 CodeEmitterGM107::longIMMD(const ValueRef &ref)
329 {
330 if (ref.getFile() == FILE_IMMEDIATE) {
331 const ImmediateValue *imm = ref.get()->asImm();
332 if (isFloatType(insn->sType))
333 return imm->reg.data.u32 & 0xfff;
334 else
335 return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
336 }
337 return false;
338 }
339
340 void
emitIMMD(int pos,int len,const ValueRef & ref)341 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
342 {
343 const ImmediateValue *imm = ref.get()->asImm();
344 uint32_t val = imm->reg.data.u32;
345
346 if (len == 19) {
347 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348 assert(!(val & 0x00000fff));
349 val >>= 12;
350 } else if (insn->sType == TYPE_F64) {
351 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352 val = imm->reg.data.u64 >> 44;
353 } else {
354 assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
355 }
356 emitField( 56, 1, (val & 0x80000) >> 19);
357 emitField(pos, len, (val & 0x7ffff));
358 } else {
359 emitField(pos, len, val);
360 }
361 }
362
363 /*******************************************************************************
364 * modifiers
365 ******************************************************************************/
366
367 void
emitCond3(int pos,CondCode code)368 CodeEmitterGM107::emitCond3(int pos, CondCode code)
369 {
370 int data = 0;
371
372 switch (code) {
373 case CC_FL : data = 0x00; break;
374 case CC_LTU:
375 case CC_LT : data = 0x01; break;
376 case CC_EQU:
377 case CC_EQ : data = 0x02; break;
378 case CC_LEU:
379 case CC_LE : data = 0x03; break;
380 case CC_GTU:
381 case CC_GT : data = 0x04; break;
382 case CC_NEU:
383 case CC_NE : data = 0x05; break;
384 case CC_GEU:
385 case CC_GE : data = 0x06; break;
386 case CC_TR : data = 0x07; break;
387 default:
388 assert(!"invalid cond3");
389 break;
390 }
391
392 emitField(pos, 3, data);
393 }
394
395 void
emitCond4(int pos,CondCode code)396 CodeEmitterGM107::emitCond4(int pos, CondCode code)
397 {
398 int data = 0;
399
400 switch (code) {
401 case CC_FL: data = 0x00; break;
402 case CC_LT: data = 0x01; break;
403 case CC_EQ: data = 0x02; break;
404 case CC_LE: data = 0x03; break;
405 case CC_GT: data = 0x04; break;
406 case CC_NE: data = 0x05; break;
407 case CC_GE: data = 0x06; break;
408 // case CC_NUM: data = 0x07; break;
409 // case CC_NAN: data = 0x08; break;
410 case CC_LTU: data = 0x09; break;
411 case CC_EQU: data = 0x0a; break;
412 case CC_LEU: data = 0x0b; break;
413 case CC_GTU: data = 0x0c; break;
414 case CC_NEU: data = 0x0d; break;
415 case CC_GEU: data = 0x0e; break;
416 case CC_TR: data = 0x0f; break;
417 default:
418 assert(!"invalid cond4");
419 break;
420 }
421
422 emitField(pos, 4, data);
423 }
424
425 void
emitO(int pos)426 CodeEmitterGM107::emitO(int pos)
427 {
428 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
429 }
430
431 void
emitP(int pos)432 CodeEmitterGM107::emitP(int pos)
433 {
434 emitField(pos, 1, insn->perPatch);
435 }
436
437 void
emitSAT(int pos)438 CodeEmitterGM107::emitSAT(int pos)
439 {
440 emitField(pos, 1, insn->saturate);
441 }
442
443 void
emitCC(int pos)444 CodeEmitterGM107::emitCC(int pos)
445 {
446 emitField(pos, 1, insn->flagsDef >= 0);
447 }
448
449 void
emitX(int pos)450 CodeEmitterGM107::emitX(int pos)
451 {
452 emitField(pos, 1, insn->flagsSrc >= 0);
453 }
454
455 void
emitABS(int pos,const ValueRef & ref)456 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
457 {
458 emitField(pos, 1, ref.mod.abs());
459 }
460
461 void
emitNEG(int pos,const ValueRef & ref)462 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
463 {
464 emitField(pos, 1, ref.mod.neg());
465 }
466
467 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)468 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
469 {
470 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
471 }
472
473 void
emitFMZ(int pos,int len)474 CodeEmitterGM107::emitFMZ(int pos, int len)
475 {
476 emitField(pos, len, insn->dnz << 1 | insn->ftz);
477 }
478
479 void
emitRND(int rmp,RoundMode rnd,int rip)480 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
481 {
482 int rm = 0, ri = 0;
483 switch (rnd) {
484 case ROUND_NI: ri = 1;
485 case ROUND_N : rm = 0; break;
486 case ROUND_MI: ri = 1;
487 case ROUND_M : rm = 1; break;
488 case ROUND_PI: ri = 1;
489 case ROUND_P : rm = 2; break;
490 case ROUND_ZI: ri = 1;
491 case ROUND_Z : rm = 3; break;
492 default:
493 assert(!"invalid round mode");
494 break;
495 }
496 emitField(rip, 1, ri);
497 emitField(rmp, 2, rm);
498 }
499
500 void
emitPDIV(int pos)501 CodeEmitterGM107::emitPDIV(int pos)
502 {
503 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504 if (insn->postFactor > 0)
505 emitField(pos, 3, 7 - insn->postFactor);
506 else
507 emitField(pos, 3, 0 - insn->postFactor);
508 }
509
510 void
emitINV(int pos,const ValueRef & ref)511 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
512 {
513 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
514 }
515
516 /*******************************************************************************
517 * control flow
518 ******************************************************************************/
519
520 void
emitEXIT()521 CodeEmitterGM107::emitEXIT()
522 {
523 emitInsn (0xe3000000);
524 emitCond5(0x00, CC_TR);
525 }
526
527 void
emitBRA()528 CodeEmitterGM107::emitBRA()
529 {
530 const FlowInstruction *insn = this->insn->asFlow();
531 int gpr = -1;
532
533 if (insn->indirect) {
534 if (insn->absolute)
535 emitInsn(0xe2000000); // JMX
536 else
537 emitInsn(0xe2500000); // BRX
538 gpr = 0x08;
539 } else {
540 if (insn->absolute)
541 emitInsn(0xe2100000); // JMP
542 else
543 emitInsn(0xe2400000); // BRA
544 emitField(0x07, 1, insn->allWarp);
545 }
546
547 emitField(0x06, 1, insn->limit);
548 emitCond5(0x00, CC_TR);
549
550 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551 int32_t pos = insn->target.bb->binPos;
552 if (writeIssueDelays && !(pos & 0x1f))
553 pos += 8;
554 if (!insn->absolute)
555 emitField(0x14, 24, pos - (codeSize + 8));
556 else
557 emitField(0x14, 32, pos);
558 } else {
559 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560 emitField(0x05, 1, 1);
561 }
562 }
563
564 void
emitCAL()565 CodeEmitterGM107::emitCAL()
566 {
567 const FlowInstruction *insn = this->insn->asFlow();
568
569 if (insn->absolute) {
570 emitInsn(0xe2200000, 0); // JCAL
571 } else {
572 emitInsn(0xe2600000, 0); // CAL
573 }
574
575 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
576 if (!insn->absolute)
577 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
578 else {
579 if (insn->builtin) {
580 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
582 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
583 } else {
584 emitField(0x14, 32, insn->target.bb->binPos);
585 }
586 }
587 } else {
588 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589 emitField(0x05, 1, 1);
590 }
591 }
592
593 void
emitPCNT()594 CodeEmitterGM107::emitPCNT()
595 {
596 const FlowInstruction *insn = this->insn->asFlow();
597
598 emitInsn(0xe2b00000, 0);
599
600 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
602 } else {
603 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604 emitField(0x05, 1, 1);
605 }
606 }
607
608 void
emitCONT()609 CodeEmitterGM107::emitCONT()
610 {
611 emitInsn (0xe3500000);
612 emitCond5(0x00, CC_TR);
613 }
614
615 void
emitPBK()616 CodeEmitterGM107::emitPBK()
617 {
618 const FlowInstruction *insn = this->insn->asFlow();
619
620 emitInsn(0xe2a00000, 0);
621
622 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
624 } else {
625 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626 emitField(0x05, 1, 1);
627 }
628 }
629
630 void
emitBRK()631 CodeEmitterGM107::emitBRK()
632 {
633 emitInsn (0xe3400000);
634 emitCond5(0x00, CC_TR);
635 }
636
637 void
emitPRET()638 CodeEmitterGM107::emitPRET()
639 {
640 const FlowInstruction *insn = this->insn->asFlow();
641
642 emitInsn(0xe2700000, 0);
643
644 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
646 } else {
647 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648 emitField(0x05, 1, 1);
649 }
650 }
651
652 void
emitRET()653 CodeEmitterGM107::emitRET()
654 {
655 emitInsn (0xe3200000);
656 emitCond5(0x00, CC_TR);
657 }
658
659 void
emitSSY()660 CodeEmitterGM107::emitSSY()
661 {
662 const FlowInstruction *insn = this->insn->asFlow();
663
664 emitInsn(0xe2900000, 0);
665
666 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
668 } else {
669 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670 emitField(0x05, 1, 1);
671 }
672 }
673
674 void
emitSYNC()675 CodeEmitterGM107::emitSYNC()
676 {
677 emitInsn (0xf0f80000);
678 emitCond5(0x00, CC_TR);
679 }
680
681 void
emitSAM()682 CodeEmitterGM107::emitSAM()
683 {
684 emitInsn(0xe3700000, 0);
685 }
686
687 void
emitRAM()688 CodeEmitterGM107::emitRAM()
689 {
690 emitInsn(0xe3800000, 0);
691 }
692
693 /*******************************************************************************
694 * predicate/cc
695 ******************************************************************************/
696
697 void
emitPSETP()698 CodeEmitterGM107::emitPSETP()
699 {
700
701 emitInsn(0x50900000);
702
703 switch (insn->op) {
704 case OP_AND: emitField(0x18, 3, 0); break;
705 case OP_OR: emitField(0x18, 3, 1); break;
706 case OP_XOR: emitField(0x18, 3, 2); break;
707 default:
708 assert(!"unexpected operation");
709 break;
710 }
711
712 // emitINV (0x2a);
713 emitPRED(0x27); // TODO: support 3-arg
714 emitINV (0x20, insn->src(1));
715 emitPRED(0x1d, insn->src(1));
716 emitINV (0x0f, insn->src(0));
717 emitPRED(0x0c, insn->src(0));
718 emitPRED(0x03, insn->def(0));
719 emitPRED(0x00);
720 }
721
722 /*******************************************************************************
723 * movement / conversion
724 ******************************************************************************/
725
726 void
emitMOV()727 CodeEmitterGM107::emitMOV()
728 {
729 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730 switch (insn->src(0).getFile()) {
731 case FILE_GPR:
732 if (insn->def(0).getFile() == FILE_PREDICATE) {
733 emitInsn(0x5b6a0000);
734 emitGPR (0x08);
735 } else {
736 emitInsn(0x5c980000);
737 }
738 emitGPR (0x14, insn->src(0));
739 break;
740 case FILE_MEMORY_CONST:
741 emitInsn(0x4c980000);
742 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
743 break;
744 case FILE_IMMEDIATE:
745 emitInsn(0x38980000);
746 emitIMMD(0x14, 19, insn->src(0));
747 break;
748 case FILE_PREDICATE:
749 emitInsn(0x50880000);
750 emitPRED(0x0c, insn->src(0));
751 emitPRED(0x1d);
752 emitPRED(0x27);
753 break;
754 default:
755 assert(!"bad src file");
756 break;
757 }
758 if (insn->def(0).getFile() != FILE_PREDICATE &&
759 insn->src(0).getFile() != FILE_PREDICATE)
760 emitField(0x27, 4, insn->lanes);
761 } else {
762 emitInsn (0x01000000);
763 emitIMMD (0x14, 32, insn->src(0));
764 emitField(0x0c, 4, insn->lanes);
765 }
766
767 if (insn->def(0).getFile() == FILE_PREDICATE) {
768 emitPRED(0x27);
769 emitPRED(0x03, insn->def(0));
770 emitPRED(0x00);
771 } else {
772 emitGPR(0x00, insn->def(0));
773 }
774 }
775
776 void
emitS2R()777 CodeEmitterGM107::emitS2R()
778 {
779 emitInsn(0xf0c80000);
780 emitSYS (0x14, insn->src(0));
781 emitGPR (0x00, insn->def(0));
782 }
783
784 void
emitCS2R()785 CodeEmitterGM107::emitCS2R()
786 {
787 emitInsn(0x50c80000);
788 emitSYS (0x14, insn->src(0));
789 emitGPR (0x00, insn->def(0));
790 }
791
792 void
emitF2F()793 CodeEmitterGM107::emitF2F()
794 {
795 RoundMode rnd = insn->rnd;
796
797 switch (insn->op) {
798 case OP_FLOOR: rnd = ROUND_MI; break;
799 case OP_CEIL : rnd = ROUND_PI; break;
800 case OP_TRUNC: rnd = ROUND_ZI; break;
801 default:
802 break;
803 }
804
805 switch (insn->src(0).getFile()) {
806 case FILE_GPR:
807 emitInsn(0x5ca80000);
808 emitGPR (0x14, insn->src(0));
809 break;
810 case FILE_MEMORY_CONST:
811 emitInsn(0x4ca80000);
812 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
813 break;
814 case FILE_IMMEDIATE:
815 emitInsn(0x38a80000);
816 emitIMMD(0x14, 19, insn->src(0));
817 break;
818 default:
819 assert(!"bad src0 file");
820 break;
821 }
822
823 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
825 emitCC (0x2f);
826 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
827 emitFMZ (0x2c, 1);
828 emitField(0x29, 1, insn->subOp);
829 emitRND (0x27, rnd, 0x2a);
830 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832 emitGPR (0x00, insn->def(0));
833 }
834
835 void
emitF2I()836 CodeEmitterGM107::emitF2I()
837 {
838 RoundMode rnd = insn->rnd;
839
840 switch (insn->op) {
841 case OP_FLOOR: rnd = ROUND_M; break;
842 case OP_CEIL : rnd = ROUND_P; break;
843 case OP_TRUNC: rnd = ROUND_Z; break;
844 default:
845 break;
846 }
847
848 switch (insn->src(0).getFile()) {
849 case FILE_GPR:
850 emitInsn(0x5cb00000);
851 emitGPR (0x14, insn->src(0));
852 break;
853 case FILE_MEMORY_CONST:
854 emitInsn(0x4cb00000);
855 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
856 break;
857 case FILE_IMMEDIATE:
858 emitInsn(0x38b00000);
859 emitIMMD(0x14, 19, insn->src(0));
860 break;
861 default:
862 assert(!"bad src0 file");
863 break;
864 }
865
866 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
867 emitCC (0x2f);
868 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
869 emitFMZ (0x2c, 1);
870 emitRND (0x27, rnd, 0x2a);
871 emitField(0x0c, 1, isSignedType(insn->dType));
872 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874 emitGPR (0x00, insn->def(0));
875 }
876
877 void
emitI2F()878 CodeEmitterGM107::emitI2F()
879 {
880 RoundMode rnd = insn->rnd;
881
882 switch (insn->op) {
883 case OP_FLOOR: rnd = ROUND_M; break;
884 case OP_CEIL : rnd = ROUND_P; break;
885 case OP_TRUNC: rnd = ROUND_Z; break;
886 default:
887 break;
888 }
889
890 switch (insn->src(0).getFile()) {
891 case FILE_GPR:
892 emitInsn(0x5cb80000);
893 emitGPR (0x14, insn->src(0));
894 break;
895 case FILE_MEMORY_CONST:
896 emitInsn(0x4cb80000);
897 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
898 break;
899 case FILE_IMMEDIATE:
900 emitInsn(0x38b80000);
901 emitIMMD(0x14, 19, insn->src(0));
902 break;
903 default:
904 assert(!"bad src0 file");
905 break;
906 }
907
908 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
909 emitCC (0x2f);
910 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911 emitField(0x29, 2, insn->subOp);
912 emitRND (0x27, rnd, -1);
913 emitField(0x0d, 1, isSignedType(insn->sType));
914 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916 emitGPR (0x00, insn->def(0));
917 }
918
919 void
emitI2I()920 CodeEmitterGM107::emitI2I()
921 {
922 switch (insn->src(0).getFile()) {
923 case FILE_GPR:
924 emitInsn(0x5ce00000);
925 emitGPR (0x14, insn->src(0));
926 break;
927 case FILE_MEMORY_CONST:
928 emitInsn(0x4ce00000);
929 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
930 break;
931 case FILE_IMMEDIATE:
932 emitInsn(0x38e00000);
933 emitIMMD(0x14, 19, insn->src(0));
934 break;
935 default:
936 assert(!"bad src0 file");
937 break;
938 }
939
940 emitSAT (0x32);
941 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
942 emitCC (0x2f);
943 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944 emitField(0x29, 2, insn->subOp);
945 emitField(0x0d, 1, isSignedType(insn->sType));
946 emitField(0x0c, 1, isSignedType(insn->dType));
947 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949 emitGPR (0x00, insn->def(0));
950 }
951
952 void
gm107_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)953 gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
954 {
955 int loc = entry->loc;
956 if (data.force_persample_interp)
957 code[loc + 1] |= 1 << 10;
958 else
959 code[loc + 1] &= ~(1 << 10);
960 }
961
962 void
emitSEL()963 CodeEmitterGM107::emitSEL()
964 {
965 switch (insn->src(1).getFile()) {
966 case FILE_GPR:
967 emitInsn(0x5ca00000);
968 emitGPR (0x14, insn->src(1));
969 break;
970 case FILE_MEMORY_CONST:
971 emitInsn(0x4ca00000);
972 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
973 break;
974 case FILE_IMMEDIATE:
975 emitInsn(0x38a00000);
976 emitIMMD(0x14, 19, insn->src(1));
977 break;
978 default:
979 assert(!"bad src1 file");
980 break;
981 }
982
983 emitINV (0x2a, insn->src(2));
984 emitPRED(0x27, insn->src(2));
985 emitGPR (0x08, insn->src(0));
986 emitGPR (0x00, insn->def(0));
987
988 if (insn->subOp == 1) {
989 addInterp(0, 0, gm107_selpFlip);
990 }
991 }
992
993 void
emitSHFL()994 CodeEmitterGM107::emitSHFL()
995 {
996 int type = 0;
997
998 emitInsn (0xef100000);
999
1000 switch (insn->src(1).getFile()) {
1001 case FILE_GPR:
1002 emitGPR(0x14, insn->src(1));
1003 break;
1004 case FILE_IMMEDIATE:
1005 emitIMMD(0x14, 5, insn->src(1));
1006 type |= 1;
1007 break;
1008 default:
1009 assert(!"invalid src1 file");
1010 break;
1011 }
1012
1013 switch (insn->src(2).getFile()) {
1014 case FILE_GPR:
1015 emitGPR(0x27, insn->src(2));
1016 break;
1017 case FILE_IMMEDIATE:
1018 emitIMMD(0x22, 13, insn->src(2));
1019 type |= 2;
1020 break;
1021 default:
1022 assert(!"invalid src2 file");
1023 break;
1024 }
1025
1026 if (!insn->defExists(1))
1027 emitPRED(0x30);
1028 else {
1029 assert(insn->def(1).getFile() == FILE_PREDICATE);
1030 emitPRED(0x30, insn->def(1));
1031 }
1032
1033 emitField(0x1e, 2, insn->subOp);
1034 emitField(0x1c, 2, type);
1035 emitGPR (0x08, insn->src(0));
1036 emitGPR (0x00, insn->def(0));
1037 }
1038
1039 /*******************************************************************************
1040 * double
1041 ******************************************************************************/
1042
1043 void
emitDADD()1044 CodeEmitterGM107::emitDADD()
1045 {
1046 switch (insn->src(1).getFile()) {
1047 case FILE_GPR:
1048 emitInsn(0x5c700000);
1049 emitGPR (0x14, insn->src(1));
1050 break;
1051 case FILE_MEMORY_CONST:
1052 emitInsn(0x4c700000);
1053 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1054 break;
1055 case FILE_IMMEDIATE:
1056 emitInsn(0x38700000);
1057 emitIMMD(0x14, 19, insn->src(1));
1058 break;
1059 default:
1060 assert(!"bad src1 file");
1061 break;
1062 }
1063 emitABS(0x31, insn->src(1));
1064 emitNEG(0x30, insn->src(0));
1065 emitCC (0x2f);
1066 emitABS(0x2e, insn->src(0));
1067 emitNEG(0x2d, insn->src(1));
1068
1069 if (insn->op == OP_SUB)
1070 code[1] ^= 0x00002000;
1071
1072 emitGPR(0x08, insn->src(0));
1073 emitGPR(0x00, insn->def(0));
1074 }
1075
1076 void
emitDMUL()1077 CodeEmitterGM107::emitDMUL()
1078 {
1079 switch (insn->src(1).getFile()) {
1080 case FILE_GPR:
1081 emitInsn(0x5c800000);
1082 emitGPR (0x14, insn->src(1));
1083 break;
1084 case FILE_MEMORY_CONST:
1085 emitInsn(0x4c800000);
1086 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1087 break;
1088 case FILE_IMMEDIATE:
1089 emitInsn(0x38800000);
1090 emitIMMD(0x14, 19, insn->src(1));
1091 break;
1092 default:
1093 assert(!"bad src1 file");
1094 break;
1095 }
1096
1097 emitNEG2(0x30, insn->src(0), insn->src(1));
1098 emitCC (0x2f);
1099 emitRND (0x27);
1100 emitGPR (0x08, insn->src(0));
1101 emitGPR (0x00, insn->def(0));
1102 }
1103
1104 void
emitDFMA()1105 CodeEmitterGM107::emitDFMA()
1106 {
1107 switch(insn->src(2).getFile()) {
1108 case FILE_GPR:
1109 switch (insn->src(1).getFile()) {
1110 case FILE_GPR:
1111 emitInsn(0x5b700000);
1112 emitGPR (0x14, insn->src(1));
1113 break;
1114 case FILE_MEMORY_CONST:
1115 emitInsn(0x4b700000);
1116 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1117 break;
1118 case FILE_IMMEDIATE:
1119 emitInsn(0x36700000);
1120 emitIMMD(0x14, 19, insn->src(1));
1121 break;
1122 default:
1123 assert(!"bad src1 file");
1124 break;
1125 }
1126 emitGPR (0x27, insn->src(2));
1127 break;
1128 case FILE_MEMORY_CONST:
1129 emitInsn(0x53700000);
1130 emitGPR (0x27, insn->src(1));
1131 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1132 break;
1133 default:
1134 assert(!"bad src2 file");
1135 break;
1136 }
1137
1138 emitRND (0x32);
1139 emitNEG (0x31, insn->src(2));
1140 emitNEG2(0x30, insn->src(0), insn->src(1));
1141 emitCC (0x2f);
1142 emitGPR (0x08, insn->src(0));
1143 emitGPR (0x00, insn->def(0));
1144 }
1145
1146 void
emitDMNMX()1147 CodeEmitterGM107::emitDMNMX()
1148 {
1149 switch (insn->src(1).getFile()) {
1150 case FILE_GPR:
1151 emitInsn(0x5c500000);
1152 emitGPR (0x14, insn->src(1));
1153 break;
1154 case FILE_MEMORY_CONST:
1155 emitInsn(0x4c500000);
1156 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1157 break;
1158 case FILE_IMMEDIATE:
1159 emitInsn(0x38500000);
1160 emitIMMD(0x14, 19, insn->src(1));
1161 break;
1162 default:
1163 assert(!"bad src1 file");
1164 break;
1165 }
1166
1167 emitABS (0x31, insn->src(1));
1168 emitNEG (0x30, insn->src(0));
1169 emitCC (0x2f);
1170 emitABS (0x2e, insn->src(0));
1171 emitNEG (0x2d, insn->src(1));
1172 emitField(0x2a, 1, insn->op == OP_MAX);
1173 emitPRED (0x27);
1174 emitGPR (0x08, insn->src(0));
1175 emitGPR (0x00, insn->def(0));
1176 }
1177
1178 void
emitDSET()1179 CodeEmitterGM107::emitDSET()
1180 {
1181 const CmpInstruction *insn = this->insn->asCmp();
1182
1183 switch (insn->src(1).getFile()) {
1184 case FILE_GPR:
1185 emitInsn(0x59000000);
1186 emitGPR (0x14, insn->src(1));
1187 break;
1188 case FILE_MEMORY_CONST:
1189 emitInsn(0x49000000);
1190 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1191 break;
1192 case FILE_IMMEDIATE:
1193 emitInsn(0x32000000);
1194 emitIMMD(0x14, 19, insn->src(1));
1195 break;
1196 default:
1197 assert(!"bad src1 file");
1198 break;
1199 }
1200
1201 if (insn->op != OP_SET) {
1202 switch (insn->op) {
1203 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1204 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1205 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1206 default:
1207 assert(!"invalid set op");
1208 break;
1209 }
1210 emitPRED(0x27, insn->src(2));
1211 } else {
1212 emitPRED(0x27);
1213 }
1214
1215 emitABS (0x36, insn->src(0));
1216 emitNEG (0x35, insn->src(1));
1217 emitField(0x34, 1, insn->dType == TYPE_F32);
1218 emitCond4(0x30, insn->setCond);
1219 emitCC (0x2f);
1220 emitABS (0x2c, insn->src(1));
1221 emitNEG (0x2b, insn->src(0));
1222 emitGPR (0x08, insn->src(0));
1223 emitGPR (0x00, insn->def(0));
1224 }
1225
1226 void
emitDSETP()1227 CodeEmitterGM107::emitDSETP()
1228 {
1229 const CmpInstruction *insn = this->insn->asCmp();
1230
1231 switch (insn->src(1).getFile()) {
1232 case FILE_GPR:
1233 emitInsn(0x5b800000);
1234 emitGPR (0x14, insn->src(1));
1235 break;
1236 case FILE_MEMORY_CONST:
1237 emitInsn(0x4b800000);
1238 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1239 break;
1240 case FILE_IMMEDIATE:
1241 emitInsn(0x36800000);
1242 emitIMMD(0x14, 19, insn->src(1));
1243 break;
1244 default:
1245 assert(!"bad src1 file");
1246 break;
1247 }
1248
1249 if (insn->op != OP_SET) {
1250 switch (insn->op) {
1251 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1252 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1253 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1254 default:
1255 assert(!"invalid set op");
1256 break;
1257 }
1258 emitPRED(0x27, insn->src(2));
1259 } else {
1260 emitPRED(0x27);
1261 }
1262
1263 emitCond4(0x30, insn->setCond);
1264 emitABS (0x2c, insn->src(1));
1265 emitNEG (0x2b, insn->src(0));
1266 emitGPR (0x08, insn->src(0));
1267 emitABS (0x07, insn->src(0));
1268 emitNEG (0x06, insn->src(1));
1269 emitPRED (0x03, insn->def(0));
1270 if (insn->defExists(1))
1271 emitPRED(0x00, insn->def(1));
1272 else
1273 emitPRED(0x00);
1274 }
1275
1276 /*******************************************************************************
1277 * float
1278 ******************************************************************************/
1279
1280 void
emitFADD()1281 CodeEmitterGM107::emitFADD()
1282 {
1283 if (!longIMMD(insn->src(1))) {
1284 switch (insn->src(1).getFile()) {
1285 case FILE_GPR:
1286 emitInsn(0x5c580000);
1287 emitGPR (0x14, insn->src(1));
1288 break;
1289 case FILE_MEMORY_CONST:
1290 emitInsn(0x4c580000);
1291 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1292 break;
1293 case FILE_IMMEDIATE:
1294 emitInsn(0x38580000);
1295 emitIMMD(0x14, 19, insn->src(1));
1296 break;
1297 default:
1298 assert(!"bad src1 file");
1299 break;
1300 }
1301 emitSAT(0x32);
1302 emitABS(0x31, insn->src(1));
1303 emitNEG(0x30, insn->src(0));
1304 emitCC (0x2f);
1305 emitABS(0x2e, insn->src(0));
1306 emitNEG(0x2d, insn->src(1));
1307 emitFMZ(0x2c, 1);
1308
1309 if (insn->op == OP_SUB)
1310 code[1] ^= 0x00002000;
1311 } else {
1312 emitInsn(0x08000000);
1313 emitABS(0x39, insn->src(1));
1314 emitNEG(0x38, insn->src(0));
1315 emitFMZ(0x37, 1);
1316 emitABS(0x36, insn->src(0));
1317 emitNEG(0x35, insn->src(1));
1318 emitCC (0x34);
1319 emitIMMD(0x14, 32, insn->src(1));
1320
1321 if (insn->op == OP_SUB)
1322 code[1] ^= 0x00080000;
1323 }
1324
1325 emitGPR(0x08, insn->src(0));
1326 emitGPR(0x00, insn->def(0));
1327 }
1328
1329 void
emitFMUL()1330 CodeEmitterGM107::emitFMUL()
1331 {
1332 if (!longIMMD(insn->src(1))) {
1333 switch (insn->src(1).getFile()) {
1334 case FILE_GPR:
1335 emitInsn(0x5c680000);
1336 emitGPR (0x14, insn->src(1));
1337 break;
1338 case FILE_MEMORY_CONST:
1339 emitInsn(0x4c680000);
1340 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1341 break;
1342 case FILE_IMMEDIATE:
1343 emitInsn(0x38680000);
1344 emitIMMD(0x14, 19, insn->src(1));
1345 break;
1346 default:
1347 assert(!"bad src1 file");
1348 break;
1349 }
1350 emitSAT (0x32);
1351 emitNEG2(0x30, insn->src(0), insn->src(1));
1352 emitCC (0x2f);
1353 emitFMZ (0x2c, 2);
1354 emitPDIV(0x29);
1355 emitRND (0x27);
1356 } else {
1357 emitInsn(0x1e000000);
1358 emitSAT (0x37);
1359 emitFMZ (0x35, 2);
1360 emitCC (0x34);
1361 emitIMMD(0x14, 32, insn->src(1));
1362 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1363 code[1] ^= 0x00080000; /* flip immd sign bit */
1364 }
1365
1366 emitGPR(0x08, insn->src(0));
1367 emitGPR(0x00, insn->def(0));
1368 }
1369
1370 void
emitFFMA()1371 CodeEmitterGM107::emitFFMA()
1372 {
1373 bool isLongIMMD = false;
1374 switch(insn->src(2).getFile()) {
1375 case FILE_GPR:
1376 switch (insn->src(1).getFile()) {
1377 case FILE_GPR:
1378 emitInsn(0x59800000);
1379 emitGPR (0x14, insn->src(1));
1380 break;
1381 case FILE_MEMORY_CONST:
1382 emitInsn(0x49800000);
1383 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1384 break;
1385 case FILE_IMMEDIATE:
1386 if (longIMMD(insn->getSrc(1))) {
1387 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1388 isLongIMMD = true;
1389 emitInsn(0x0c000000);
1390 emitIMMD(0x14, 32, insn->src(1));
1391 } else {
1392 emitInsn(0x32800000);
1393 emitIMMD(0x14, 19, insn->src(1));
1394 }
1395 break;
1396 default:
1397 assert(!"bad src1 file");
1398 break;
1399 }
1400 if (!isLongIMMD)
1401 emitGPR (0x27, insn->src(2));
1402 break;
1403 case FILE_MEMORY_CONST:
1404 emitInsn(0x51800000);
1405 emitGPR (0x27, insn->src(1));
1406 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1407 break;
1408 default:
1409 assert(!"bad src2 file");
1410 break;
1411 }
1412
1413 if (isLongIMMD) {
1414 emitNEG (0x39, insn->src(2));
1415 emitNEG2(0x38, insn->src(0), insn->src(1));
1416 emitSAT (0x37);
1417 emitCC (0x34);
1418 } else {
1419 emitRND (0x33);
1420 emitSAT (0x32);
1421 emitNEG (0x31, insn->src(2));
1422 emitNEG2(0x30, insn->src(0), insn->src(1));
1423 emitCC (0x2f);
1424 }
1425
1426 emitFMZ(0x35, 2);
1427 emitGPR(0x08, insn->src(0));
1428 emitGPR(0x00, insn->def(0));
1429 }
1430
1431 void
emitMUFU()1432 CodeEmitterGM107::emitMUFU()
1433 {
1434 int mufu = 0;
1435
1436 switch (insn->op) {
1437 case OP_COS: mufu = 0; break;
1438 case OP_SIN: mufu = 1; break;
1439 case OP_EX2: mufu = 2; break;
1440 case OP_LG2: mufu = 3; break;
1441 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1442 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1443 case OP_SQRT: mufu = 8; break;
1444 default:
1445 assert(!"invalid mufu");
1446 break;
1447 }
1448
1449 emitInsn (0x50800000);
1450 emitSAT (0x32);
1451 emitNEG (0x30, insn->src(0));
1452 emitABS (0x2e, insn->src(0));
1453 emitField(0x14, 4, mufu);
1454 emitGPR (0x08, insn->src(0));
1455 emitGPR (0x00, insn->def(0));
1456 }
1457
1458 void
emitFMNMX()1459 CodeEmitterGM107::emitFMNMX()
1460 {
1461 switch (insn->src(1).getFile()) {
1462 case FILE_GPR:
1463 emitInsn(0x5c600000);
1464 emitGPR (0x14, insn->src(1));
1465 break;
1466 case FILE_MEMORY_CONST:
1467 emitInsn(0x4c600000);
1468 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1469 break;
1470 case FILE_IMMEDIATE:
1471 emitInsn(0x38600000);
1472 emitIMMD(0x14, 19, insn->src(1));
1473 break;
1474 default:
1475 assert(!"bad src1 file");
1476 break;
1477 }
1478
1479 emitField(0x2a, 1, insn->op == OP_MAX);
1480 emitPRED (0x27);
1481
1482 emitABS(0x31, insn->src(1));
1483 emitNEG(0x30, insn->src(0));
1484 emitCC (0x2f);
1485 emitABS(0x2e, insn->src(0));
1486 emitNEG(0x2d, insn->src(1));
1487 emitFMZ(0x2c, 1);
1488 emitGPR(0x08, insn->src(0));
1489 emitGPR(0x00, insn->def(0));
1490 }
1491
1492 void
emitRRO()1493 CodeEmitterGM107::emitRRO()
1494 {
1495 switch (insn->src(0).getFile()) {
1496 case FILE_GPR:
1497 emitInsn(0x5c900000);
1498 emitGPR (0x14, insn->src(0));
1499 break;
1500 case FILE_MEMORY_CONST:
1501 emitInsn(0x4c900000);
1502 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1503 break;
1504 case FILE_IMMEDIATE:
1505 emitInsn(0x38900000);
1506 emitIMMD(0x14, 19, insn->src(0));
1507 break;
1508 default:
1509 assert(!"bad src file");
1510 break;
1511 }
1512
1513 emitABS (0x31, insn->src(0));
1514 emitNEG (0x2d, insn->src(0));
1515 emitField(0x27, 1, insn->op == OP_PREEX2);
1516 emitGPR (0x00, insn->def(0));
1517 }
1518
1519 void
emitFCMP()1520 CodeEmitterGM107::emitFCMP()
1521 {
1522 const CmpInstruction *insn = this->insn->asCmp();
1523 CondCode cc = insn->setCond;
1524
1525 if (insn->src(2).mod.neg())
1526 cc = reverseCondCode(cc);
1527
1528 switch(insn->src(2).getFile()) {
1529 case FILE_GPR:
1530 switch (insn->src(1).getFile()) {
1531 case FILE_GPR:
1532 emitInsn(0x5ba00000);
1533 emitGPR (0x14, insn->src(1));
1534 break;
1535 case FILE_MEMORY_CONST:
1536 emitInsn(0x4ba00000);
1537 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1538 break;
1539 case FILE_IMMEDIATE:
1540 emitInsn(0x36a00000);
1541 emitIMMD(0x14, 19, insn->src(1));
1542 break;
1543 default:
1544 assert(!"bad src1 file");
1545 break;
1546 }
1547 emitGPR (0x27, insn->src(2));
1548 break;
1549 case FILE_MEMORY_CONST:
1550 emitInsn(0x53a00000);
1551 emitGPR (0x27, insn->src(1));
1552 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1553 break;
1554 default:
1555 assert(!"bad src2 file");
1556 break;
1557 }
1558
1559 emitCond4(0x30, cc);
1560 emitFMZ (0x2f, 1);
1561 emitGPR (0x08, insn->src(0));
1562 emitGPR (0x00, insn->def(0));
1563 }
1564
1565 void
emitFSET()1566 CodeEmitterGM107::emitFSET()
1567 {
1568 const CmpInstruction *insn = this->insn->asCmp();
1569
1570 switch (insn->src(1).getFile()) {
1571 case FILE_GPR:
1572 emitInsn(0x58000000);
1573 emitGPR (0x14, insn->src(1));
1574 break;
1575 case FILE_MEMORY_CONST:
1576 emitInsn(0x48000000);
1577 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1578 break;
1579 case FILE_IMMEDIATE:
1580 emitInsn(0x30000000);
1581 emitIMMD(0x14, 19, insn->src(1));
1582 break;
1583 default:
1584 assert(!"bad src1 file");
1585 break;
1586 }
1587
1588 if (insn->op != OP_SET) {
1589 switch (insn->op) {
1590 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1591 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1592 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1593 default:
1594 assert(!"invalid set op");
1595 break;
1596 }
1597 emitPRED(0x27, insn->src(2));
1598 } else {
1599 emitPRED(0x27);
1600 }
1601
1602 emitFMZ (0x37, 1);
1603 emitABS (0x36, insn->src(0));
1604 emitNEG (0x35, insn->src(1));
1605 emitField(0x34, 1, insn->dType == TYPE_F32);
1606 emitCond4(0x30, insn->setCond);
1607 emitCC (0x2f);
1608 emitABS (0x2c, insn->src(1));
1609 emitNEG (0x2b, insn->src(0));
1610 emitGPR (0x08, insn->src(0));
1611 emitGPR (0x00, insn->def(0));
1612 }
1613
1614 void
emitFSETP()1615 CodeEmitterGM107::emitFSETP()
1616 {
1617 const CmpInstruction *insn = this->insn->asCmp();
1618
1619 switch (insn->src(1).getFile()) {
1620 case FILE_GPR:
1621 emitInsn(0x5bb00000);
1622 emitGPR (0x14, insn->src(1));
1623 break;
1624 case FILE_MEMORY_CONST:
1625 emitInsn(0x4bb00000);
1626 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1627 break;
1628 case FILE_IMMEDIATE:
1629 emitInsn(0x36b00000);
1630 emitIMMD(0x14, 19, insn->src(1));
1631 break;
1632 default:
1633 assert(!"bad src1 file");
1634 break;
1635 }
1636
1637 if (insn->op != OP_SET) {
1638 switch (insn->op) {
1639 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1640 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1641 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1642 default:
1643 assert(!"invalid set op");
1644 break;
1645 }
1646 emitPRED(0x27, insn->src(2));
1647 } else {
1648 emitPRED(0x27);
1649 }
1650
1651 emitCond4(0x30, insn->setCond);
1652 emitFMZ (0x2f, 1);
1653 emitABS (0x2c, insn->src(1));
1654 emitNEG (0x2b, insn->src(0));
1655 emitGPR (0x08, insn->src(0));
1656 emitABS (0x07, insn->src(0));
1657 emitNEG (0x06, insn->src(1));
1658 emitPRED (0x03, insn->def(0));
1659 if (insn->defExists(1))
1660 emitPRED(0x00, insn->def(1));
1661 else
1662 emitPRED(0x00);
1663 }
1664
1665 void
emitFSWZADD()1666 CodeEmitterGM107::emitFSWZADD()
1667 {
1668 emitInsn (0x50f80000);
1669 emitCC (0x2f);
1670 emitFMZ (0x2c, 1);
1671 emitRND (0x27);
1672 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1673 emitField(0x1c, 8, insn->subOp);
1674 if (insn->predSrc != 1)
1675 emitGPR (0x14, insn->src(1));
1676 else
1677 emitGPR (0x14);
1678 emitGPR (0x08, insn->src(0));
1679 emitGPR (0x00, insn->def(0));
1680 }
1681
1682 /*******************************************************************************
1683 * integer
1684 ******************************************************************************/
1685
1686 void
emitLOP()1687 CodeEmitterGM107::emitLOP()
1688 {
1689 int lop = 0;
1690
1691 switch (insn->op) {
1692 case OP_AND: lop = 0; break;
1693 case OP_OR : lop = 1; break;
1694 case OP_XOR: lop = 2; break;
1695 default:
1696 assert(!"invalid lop");
1697 break;
1698 }
1699
1700 if (!longIMMD(insn->src(1))) {
1701 switch (insn->src(1).getFile()) {
1702 case FILE_GPR:
1703 emitInsn(0x5c400000);
1704 emitGPR (0x14, insn->src(1));
1705 break;
1706 case FILE_MEMORY_CONST:
1707 emitInsn(0x4c400000);
1708 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1709 break;
1710 case FILE_IMMEDIATE:
1711 emitInsn(0x38400000);
1712 emitIMMD(0x14, 19, insn->src(1));
1713 break;
1714 default:
1715 assert(!"bad src1 file");
1716 break;
1717 }
1718 emitPRED (0x30);
1719 emitCC (0x2f);
1720 emitX (0x2b);
1721 emitField(0x29, 2, lop);
1722 emitINV (0x28, insn->src(1));
1723 emitINV (0x27, insn->src(0));
1724 } else {
1725 emitInsn (0x04000000);
1726 emitX (0x39);
1727 emitINV (0x38, insn->src(1));
1728 emitINV (0x37, insn->src(0));
1729 emitField(0x35, 2, lop);
1730 emitCC (0x34);
1731 emitIMMD (0x14, 32, insn->src(1));
1732 }
1733
1734 emitGPR (0x08, insn->src(0));
1735 emitGPR (0x00, insn->def(0));
1736 }
1737
1738 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1739 void
emitNOT()1740 CodeEmitterGM107::emitNOT()
1741 {
1742 if (!longIMMD(insn->src(0))) {
1743 switch (insn->src(0).getFile()) {
1744 case FILE_GPR:
1745 emitInsn(0x5c400700);
1746 emitGPR (0x14, insn->src(0));
1747 break;
1748 case FILE_MEMORY_CONST:
1749 emitInsn(0x4c400700);
1750 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1751 break;
1752 case FILE_IMMEDIATE:
1753 emitInsn(0x38400700);
1754 emitIMMD(0x14, 19, insn->src(0));
1755 break;
1756 default:
1757 assert(!"bad src1 file");
1758 break;
1759 }
1760 emitPRED (0x30);
1761 } else {
1762 emitInsn (0x05600000);
1763 emitIMMD (0x14, 32, insn->src(1));
1764 }
1765
1766 emitGPR(0x08);
1767 emitGPR(0x00, insn->def(0));
1768 }
1769
1770 void
emitIADD()1771 CodeEmitterGM107::emitIADD()
1772 {
1773 if (!longIMMD(insn->src(1))) {
1774 switch (insn->src(1).getFile()) {
1775 case FILE_GPR:
1776 emitInsn(0x5c100000);
1777 emitGPR (0x14, insn->src(1));
1778 break;
1779 case FILE_MEMORY_CONST:
1780 emitInsn(0x4c100000);
1781 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1782 break;
1783 case FILE_IMMEDIATE:
1784 emitInsn(0x38100000);
1785 emitIMMD(0x14, 19, insn->src(1));
1786 break;
1787 default:
1788 assert(!"bad src1 file");
1789 break;
1790 }
1791 emitSAT(0x32);
1792 emitNEG(0x31, insn->src(0));
1793 emitNEG(0x30, insn->src(1));
1794 emitCC (0x2f);
1795 emitX (0x2b);
1796 } else {
1797 emitInsn(0x1c000000);
1798 emitNEG (0x38, insn->src(0));
1799 emitSAT (0x36);
1800 emitX (0x35);
1801 emitCC (0x34);
1802 emitIMMD(0x14, 32, insn->src(1));
1803 }
1804
1805 if (insn->op == OP_SUB)
1806 code[1] ^= 0x00010000;
1807
1808 emitGPR(0x08, insn->src(0));
1809 emitGPR(0x00, insn->def(0));
1810 }
1811
1812 void
emitIMUL()1813 CodeEmitterGM107::emitIMUL()
1814 {
1815 if (!longIMMD(insn->src(1))) {
1816 switch (insn->src(1).getFile()) {
1817 case FILE_GPR:
1818 emitInsn(0x5c380000);
1819 emitGPR (0x14, insn->src(1));
1820 break;
1821 case FILE_MEMORY_CONST:
1822 emitInsn(0x4c380000);
1823 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1824 break;
1825 case FILE_IMMEDIATE:
1826 emitInsn(0x38380000);
1827 emitIMMD(0x14, 19, insn->src(1));
1828 break;
1829 default:
1830 assert(!"bad src1 file");
1831 break;
1832 }
1833 emitCC (0x2f);
1834 emitField(0x29, 1, isSignedType(insn->sType));
1835 emitField(0x28, 1, isSignedType(insn->dType));
1836 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1837 } else {
1838 emitInsn (0x1f000000);
1839 emitField(0x37, 1, isSignedType(insn->sType));
1840 emitField(0x36, 1, isSignedType(insn->dType));
1841 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1842 emitCC (0x34);
1843 emitIMMD (0x14, 32, insn->src(1));
1844 }
1845
1846 emitGPR(0x08, insn->src(0));
1847 emitGPR(0x00, insn->def(0));
1848 }
1849
1850 void
emitIMAD()1851 CodeEmitterGM107::emitIMAD()
1852 {
1853 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1854 switch(insn->src(2).getFile()) {
1855 case FILE_GPR:
1856 switch (insn->src(1).getFile()) {
1857 case FILE_GPR:
1858 emitInsn(0x5a000000);
1859 emitGPR (0x14, insn->src(1));
1860 break;
1861 case FILE_MEMORY_CONST:
1862 emitInsn(0x4a000000);
1863 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1864 break;
1865 case FILE_IMMEDIATE:
1866 emitInsn(0x34000000);
1867 emitIMMD(0x14, 19, insn->src(1));
1868 break;
1869 default:
1870 assert(!"bad src1 file");
1871 break;
1872 }
1873 emitGPR (0x27, insn->src(2));
1874 break;
1875 case FILE_MEMORY_CONST:
1876 emitInsn(0x52000000);
1877 emitGPR (0x27, insn->src(1));
1878 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1879 break;
1880 default:
1881 assert(!"bad src2 file");
1882 break;
1883 }
1884
1885 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1886 emitField(0x35, 1, isSignedType(insn->sType));
1887 emitNEG (0x34, insn->src(2));
1888 emitNEG2 (0x33, insn->src(0), insn->src(1));
1889 emitSAT (0x32);
1890 emitX (0x31);
1891 emitField(0x30, 1, isSignedType(insn->dType));
1892 emitCC (0x2f);
1893 emitGPR (0x08, insn->src(0));
1894 emitGPR (0x00, insn->def(0));
1895 }
1896
1897 void
emitISCADD()1898 CodeEmitterGM107::emitISCADD()
1899 {
1900 assert(insn->src(1).get()->asImm());
1901
1902 switch (insn->src(2).getFile()) {
1903 case FILE_GPR:
1904 emitInsn(0x5c180000);
1905 emitGPR (0x14, insn->src(2));
1906 break;
1907 case FILE_MEMORY_CONST:
1908 emitInsn(0x4c180000);
1909 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1910 break;
1911 case FILE_IMMEDIATE:
1912 emitInsn(0x38180000);
1913 emitIMMD(0x14, 19, insn->src(2));
1914 break;
1915 default:
1916 assert(!"bad src1 file");
1917 break;
1918 }
1919 emitNEG (0x31, insn->src(0));
1920 emitNEG (0x30, insn->src(2));
1921 emitCC (0x2f);
1922 emitIMMD(0x27, 5, insn->src(1));
1923 emitGPR (0x08, insn->src(0));
1924 emitGPR (0x00, insn->def(0));
1925 }
1926
1927 void
emitXMAD()1928 CodeEmitterGM107::emitXMAD()
1929 {
1930 assert(insn->src(0).getFile() == FILE_GPR);
1931
1932 bool constbuf = false;
1933 bool psl_mrg = true;
1934 bool immediate = false;
1935 if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1936 assert(insn->src(1).getFile() == FILE_GPR);
1937 constbuf = true;
1938 psl_mrg = false;
1939 emitInsn(0x51000000);
1940 emitGPR(0x27, insn->src(1));
1941 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1942 } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1943 assert(insn->src(2).getFile() == FILE_GPR);
1944 constbuf = true;
1945 emitInsn(0x4e000000);
1946 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1947 emitGPR(0x27, insn->src(2));
1948 } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1949 assert(insn->src(2).getFile() == FILE_GPR);
1950 assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1951 immediate = true;
1952 emitInsn(0x36000000);
1953 emitIMMD(0x14, 16, insn->src(1));
1954 emitGPR(0x27, insn->src(2));
1955 } else {
1956 assert(insn->src(1).getFile() == FILE_GPR);
1957 assert(insn->src(2).getFile() == FILE_GPR);
1958 emitInsn(0x5b000000);
1959 emitGPR(0x14, insn->src(1));
1960 emitGPR(0x27, insn->src(2));
1961 }
1962
1963 if (psl_mrg)
1964 emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1965
1966 unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1967 cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1968 emitField(0x32, constbuf ? 2 : 3, cmode);
1969
1970 emitX(constbuf ? 0x36 : 0x26);
1971 emitCC(0x2f);
1972
1973 emitGPR(0x0, insn->def(0));
1974 emitGPR(0x8, insn->src(0));
1975
1976 // source flags
1977 if (isSignedType(insn->sType)) {
1978 uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1979 emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1980 }
1981 emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1982 if (!immediate) {
1983 bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1984 emitField(constbuf ? 0x34 : 0x23, 1, h1);
1985 }
1986 }
1987
1988 void
emitIMNMX()1989 CodeEmitterGM107::emitIMNMX()
1990 {
1991 switch (insn->src(1).getFile()) {
1992 case FILE_GPR:
1993 emitInsn(0x5c200000);
1994 emitGPR (0x14, insn->src(1));
1995 break;
1996 case FILE_MEMORY_CONST:
1997 emitInsn(0x4c200000);
1998 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1999 break;
2000 case FILE_IMMEDIATE:
2001 emitInsn(0x38200000);
2002 emitIMMD(0x14, 19, insn->src(1));
2003 break;
2004 default:
2005 assert(!"bad src1 file");
2006 break;
2007 }
2008
2009 emitField(0x30, 1, isSignedType(insn->dType));
2010 emitCC (0x2f);
2011 emitField(0x2b, 2, insn->subOp);
2012 emitField(0x2a, 1, insn->op == OP_MAX);
2013 emitPRED (0x27);
2014 emitGPR (0x08, insn->src(0));
2015 emitGPR (0x00, insn->def(0));
2016 }
2017
2018 void
emitICMP()2019 CodeEmitterGM107::emitICMP()
2020 {
2021 const CmpInstruction *insn = this->insn->asCmp();
2022 CondCode cc = insn->setCond;
2023
2024 if (insn->src(2).mod.neg())
2025 cc = reverseCondCode(cc);
2026
2027 switch(insn->src(2).getFile()) {
2028 case FILE_GPR:
2029 switch (insn->src(1).getFile()) {
2030 case FILE_GPR:
2031 emitInsn(0x5b400000);
2032 emitGPR (0x14, insn->src(1));
2033 break;
2034 case FILE_MEMORY_CONST:
2035 emitInsn(0x4b400000);
2036 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2037 break;
2038 case FILE_IMMEDIATE:
2039 emitInsn(0x36400000);
2040 emitIMMD(0x14, 19, insn->src(1));
2041 break;
2042 default:
2043 assert(!"bad src1 file");
2044 break;
2045 }
2046 emitGPR (0x27, insn->src(2));
2047 break;
2048 case FILE_MEMORY_CONST:
2049 emitInsn(0x53400000);
2050 emitGPR (0x27, insn->src(1));
2051 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2052 break;
2053 default:
2054 assert(!"bad src2 file");
2055 break;
2056 }
2057
2058 emitCond3(0x31, cc);
2059 emitField(0x30, 1, isSignedType(insn->sType));
2060 emitGPR (0x08, insn->src(0));
2061 emitGPR (0x00, insn->def(0));
2062 }
2063
2064 void
emitISET()2065 CodeEmitterGM107::emitISET()
2066 {
2067 const CmpInstruction *insn = this->insn->asCmp();
2068
2069 switch (insn->src(1).getFile()) {
2070 case FILE_GPR:
2071 emitInsn(0x5b500000);
2072 emitGPR (0x14, insn->src(1));
2073 break;
2074 case FILE_MEMORY_CONST:
2075 emitInsn(0x4b500000);
2076 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2077 break;
2078 case FILE_IMMEDIATE:
2079 emitInsn(0x36500000);
2080 emitIMMD(0x14, 19, insn->src(1));
2081 break;
2082 default:
2083 assert(!"bad src1 file");
2084 break;
2085 }
2086
2087 if (insn->op != OP_SET) {
2088 switch (insn->op) {
2089 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2090 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2091 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2092 default:
2093 assert(!"invalid set op");
2094 break;
2095 }
2096 emitPRED(0x27, insn->src(2));
2097 } else {
2098 emitPRED(0x27);
2099 }
2100
2101 emitCond3(0x31, insn->setCond);
2102 emitField(0x30, 1, isSignedType(insn->sType));
2103 emitCC (0x2f);
2104 emitField(0x2c, 1, insn->dType == TYPE_F32);
2105 emitX (0x2b);
2106 emitGPR (0x08, insn->src(0));
2107 emitGPR (0x00, insn->def(0));
2108 }
2109
2110 void
emitISETP()2111 CodeEmitterGM107::emitISETP()
2112 {
2113 const CmpInstruction *insn = this->insn->asCmp();
2114
2115 switch (insn->src(1).getFile()) {
2116 case FILE_GPR:
2117 emitInsn(0x5b600000);
2118 emitGPR (0x14, insn->src(1));
2119 break;
2120 case FILE_MEMORY_CONST:
2121 emitInsn(0x4b600000);
2122 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2123 break;
2124 case FILE_IMMEDIATE:
2125 emitInsn(0x36600000);
2126 emitIMMD(0x14, 19, insn->src(1));
2127 break;
2128 default:
2129 assert(!"bad src1 file");
2130 break;
2131 }
2132
2133 if (insn->op != OP_SET) {
2134 switch (insn->op) {
2135 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2136 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2137 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2138 default:
2139 assert(!"invalid set op");
2140 break;
2141 }
2142 emitPRED(0x27, insn->src(2));
2143 } else {
2144 emitPRED(0x27);
2145 }
2146
2147 emitCond3(0x31, insn->setCond);
2148 emitField(0x30, 1, isSignedType(insn->sType));
2149 emitX (0x2b);
2150 emitGPR (0x08, insn->src(0));
2151 emitPRED (0x03, insn->def(0));
2152 if (insn->defExists(1))
2153 emitPRED(0x00, insn->def(1));
2154 else
2155 emitPRED(0x00);
2156 }
2157
2158 void
emitSHL()2159 CodeEmitterGM107::emitSHL()
2160 {
2161 switch (insn->src(1).getFile()) {
2162 case FILE_GPR:
2163 emitInsn(0x5c480000);
2164 emitGPR (0x14, insn->src(1));
2165 break;
2166 case FILE_MEMORY_CONST:
2167 emitInsn(0x4c480000);
2168 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2169 break;
2170 case FILE_IMMEDIATE:
2171 emitInsn(0x38480000);
2172 emitIMMD(0x14, 19, insn->src(1));
2173 break;
2174 default:
2175 assert(!"bad src1 file");
2176 break;
2177 }
2178
2179 emitCC (0x2f);
2180 emitX (0x2b);
2181 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2182 emitGPR (0x08, insn->src(0));
2183 emitGPR (0x00, insn->def(0));
2184 }
2185
2186 void
emitSHR()2187 CodeEmitterGM107::emitSHR()
2188 {
2189 switch (insn->src(1).getFile()) {
2190 case FILE_GPR:
2191 emitInsn(0x5c280000);
2192 emitGPR (0x14, insn->src(1));
2193 break;
2194 case FILE_MEMORY_CONST:
2195 emitInsn(0x4c280000);
2196 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2197 break;
2198 case FILE_IMMEDIATE:
2199 emitInsn(0x38280000);
2200 emitIMMD(0x14, 19, insn->src(1));
2201 break;
2202 default:
2203 assert(!"bad src1 file");
2204 break;
2205 }
2206
2207 emitField(0x30, 1, isSignedType(insn->dType));
2208 emitCC (0x2f);
2209 emitX (0x2c);
2210 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2211 emitGPR (0x08, insn->src(0));
2212 emitGPR (0x00, insn->def(0));
2213 }
2214
2215 void
emitSHF()2216 CodeEmitterGM107::emitSHF()
2217 {
2218 unsigned type;
2219
2220 switch (insn->src(1).getFile()) {
2221 case FILE_GPR:
2222 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2223 emitGPR(0x14, insn->src(1));
2224 break;
2225 case FILE_IMMEDIATE:
2226 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2227 emitIMMD(0x14, 19, insn->src(1));
2228 break;
2229 default:
2230 assert(!"bad src1 file");
2231 break;
2232 }
2233
2234 switch (insn->sType) {
2235 case TYPE_U64:
2236 type = 2;
2237 break;
2238 case TYPE_S64:
2239 type = 3;
2240 break;
2241 default:
2242 type = 0;
2243 break;
2244 }
2245
2246 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2247 emitX (0x31);
2248 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2249 emitCC (0x2f);
2250 emitGPR (0x27, insn->src(2));
2251 emitField(0x25, 2, type);
2252 emitGPR (0x08, insn->src(0));
2253 emitGPR (0x00, insn->def(0));
2254 }
2255
2256 void
emitPOPC()2257 CodeEmitterGM107::emitPOPC()
2258 {
2259 switch (insn->src(0).getFile()) {
2260 case FILE_GPR:
2261 emitInsn(0x5c080000);
2262 emitGPR (0x14, insn->src(0));
2263 break;
2264 case FILE_MEMORY_CONST:
2265 emitInsn(0x4c080000);
2266 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2267 break;
2268 case FILE_IMMEDIATE:
2269 emitInsn(0x38080000);
2270 emitIMMD(0x14, 19, insn->src(0));
2271 break;
2272 default:
2273 assert(!"bad src1 file");
2274 break;
2275 }
2276
2277 emitINV(0x28, insn->src(0));
2278 emitGPR(0x00, insn->def(0));
2279 }
2280
2281 void
emitBFI()2282 CodeEmitterGM107::emitBFI()
2283 {
2284 switch(insn->src(2).getFile()) {
2285 case FILE_GPR:
2286 switch (insn->src(1).getFile()) {
2287 case FILE_GPR:
2288 emitInsn(0x5bf00000);
2289 emitGPR (0x14, insn->src(1));
2290 break;
2291 case FILE_MEMORY_CONST:
2292 emitInsn(0x4bf00000);
2293 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2294 break;
2295 case FILE_IMMEDIATE:
2296 emitInsn(0x36f00000);
2297 emitIMMD(0x14, 19, insn->src(1));
2298 break;
2299 default:
2300 assert(!"bad src1 file");
2301 break;
2302 }
2303 emitGPR (0x27, insn->src(2));
2304 break;
2305 case FILE_MEMORY_CONST:
2306 emitInsn(0x53f00000);
2307 emitGPR (0x27, insn->src(1));
2308 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2309 break;
2310 default:
2311 assert(!"bad src2 file");
2312 break;
2313 }
2314
2315 emitCC (0x2f);
2316 emitGPR (0x08, insn->src(0));
2317 emitGPR (0x00, insn->def(0));
2318 }
2319
2320 void
emitBFE()2321 CodeEmitterGM107::emitBFE()
2322 {
2323 switch (insn->src(1).getFile()) {
2324 case FILE_GPR:
2325 emitInsn(0x5c000000);
2326 emitGPR (0x14, insn->src(1));
2327 break;
2328 case FILE_MEMORY_CONST:
2329 emitInsn(0x4c000000);
2330 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2331 break;
2332 case FILE_IMMEDIATE:
2333 emitInsn(0x38000000);
2334 emitIMMD(0x14, 19, insn->src(1));
2335 break;
2336 default:
2337 assert(!"bad src1 file");
2338 break;
2339 }
2340
2341 emitField(0x30, 1, isSignedType(insn->dType));
2342 emitCC (0x2f);
2343 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2344 emitGPR (0x08, insn->src(0));
2345 emitGPR (0x00, insn->def(0));
2346 }
2347
2348 void
emitFLO()2349 CodeEmitterGM107::emitFLO()
2350 {
2351 switch (insn->src(0).getFile()) {
2352 case FILE_GPR:
2353 emitInsn(0x5c300000);
2354 emitGPR (0x14, insn->src(0));
2355 break;
2356 case FILE_MEMORY_CONST:
2357 emitInsn(0x4c300000);
2358 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2359 break;
2360 case FILE_IMMEDIATE:
2361 emitInsn(0x38300000);
2362 emitIMMD(0x14, 19, insn->src(0));
2363 break;
2364 default:
2365 assert(!"bad src1 file");
2366 break;
2367 }
2368
2369 emitField(0x30, 1, isSignedType(insn->dType));
2370 emitCC (0x2f);
2371 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2372 emitINV (0x28, insn->src(0));
2373 emitGPR (0x00, insn->def(0));
2374 }
2375
2376 void
emitPRMT()2377 CodeEmitterGM107::emitPRMT()
2378 {
2379 switch (insn->src(1).getFile()) {
2380 case FILE_GPR:
2381 emitInsn(0x5bc00000);
2382 emitGPR (0x14, insn->src(1));
2383 break;
2384 case FILE_MEMORY_CONST:
2385 emitInsn(0x4bc00000);
2386 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2387 break;
2388 case FILE_IMMEDIATE:
2389 emitInsn(0x36c00000);
2390 emitIMMD(0x14, 19, insn->src(1));
2391 break;
2392 default:
2393 assert(!"bad src1 file");
2394 break;
2395 }
2396
2397 emitField(0x30, 3, insn->subOp);
2398 emitGPR (0x27, insn->src(2));
2399 emitGPR (0x08, insn->src(0));
2400 emitGPR (0x00, insn->def(0));
2401 }
2402
2403 /*******************************************************************************
2404 * memory
2405 ******************************************************************************/
2406
2407 void
emitLDSTs(int pos,DataType type)2408 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2409 {
2410 int data = 0;
2411
2412 switch (typeSizeof(type)) {
2413 case 1: data = isSignedType(type) ? 1 : 0; break;
2414 case 2: data = isSignedType(type) ? 3 : 2; break;
2415 case 4: data = 4; break;
2416 case 8: data = 5; break;
2417 case 16: data = 6; break;
2418 default:
2419 assert(!"bad type");
2420 break;
2421 }
2422
2423 emitField(pos, 3, data);
2424 }
2425
2426 void
emitLDSTc(int pos)2427 CodeEmitterGM107::emitLDSTc(int pos)
2428 {
2429 int mode = 0;
2430
2431 switch (insn->cache) {
2432 case CACHE_CA: mode = 0; break;
2433 case CACHE_CG: mode = 1; break;
2434 case CACHE_CS: mode = 2; break;
2435 case CACHE_CV: mode = 3; break;
2436 default:
2437 assert(!"invalid caching mode");
2438 break;
2439 }
2440
2441 emitField(pos, 2, mode);
2442 }
2443
2444 void
emitLDC()2445 CodeEmitterGM107::emitLDC()
2446 {
2447 emitInsn (0xef900000);
2448 emitLDSTs(0x30, insn->dType);
2449 emitField(0x2c, 2, insn->subOp);
2450 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2451 emitGPR (0x00, insn->def(0));
2452 }
2453
2454 void
emitLDL()2455 CodeEmitterGM107::emitLDL()
2456 {
2457 emitInsn (0xef400000);
2458 emitLDSTs(0x30, insn->dType);
2459 emitLDSTc(0x2c);
2460 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2461 emitGPR (0x00, insn->def(0));
2462 }
2463
2464 void
emitLDS()2465 CodeEmitterGM107::emitLDS()
2466 {
2467 emitInsn (0xef480000);
2468 emitLDSTs(0x30, insn->dType);
2469 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470 emitGPR (0x00, insn->def(0));
2471 }
2472
2473 void
emitLD()2474 CodeEmitterGM107::emitLD()
2475 {
2476 emitInsn (0x80000000);
2477 emitPRED (0x3a);
2478 emitLDSTc(0x38);
2479 emitLDSTs(0x35, insn->dType);
2480 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2481 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2482 emitGPR (0x00, insn->def(0));
2483 }
2484
2485 void
emitSTL()2486 CodeEmitterGM107::emitSTL()
2487 {
2488 emitInsn (0xef500000);
2489 emitLDSTs(0x30, insn->dType);
2490 emitLDSTc(0x2c);
2491 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2492 emitGPR (0x00, insn->src(1));
2493 }
2494
2495 void
emitSTS()2496 CodeEmitterGM107::emitSTS()
2497 {
2498 emitInsn (0xef580000);
2499 emitLDSTs(0x30, insn->dType);
2500 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501 emitGPR (0x00, insn->src(1));
2502 }
2503
2504 void
emitST()2505 CodeEmitterGM107::emitST()
2506 {
2507 emitInsn (0xa0000000);
2508 emitPRED (0x3a);
2509 emitLDSTc(0x38);
2510 emitLDSTs(0x35, insn->dType);
2511 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2512 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2513 emitGPR (0x00, insn->src(1));
2514 }
2515
2516 void
emitALD()2517 CodeEmitterGM107::emitALD()
2518 {
2519 emitInsn (0xefd80000);
2520 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2521 emitGPR (0x27, insn->src(0).getIndirect(1));
2522 emitO (0x20);
2523 emitP (0x1f);
2524 emitADDR (0x08, 20, 10, 0, insn->src(0));
2525 emitGPR (0x00, insn->def(0));
2526 }
2527
2528 void
emitAST()2529 CodeEmitterGM107::emitAST()
2530 {
2531 emitInsn (0xeff00000);
2532 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2533 emitGPR (0x27, insn->src(0).getIndirect(1));
2534 emitP (0x1f);
2535 emitADDR (0x08, 20, 10, 0, insn->src(0));
2536 emitGPR (0x00, insn->src(1));
2537 }
2538
2539 void
emitISBERD()2540 CodeEmitterGM107::emitISBERD()
2541 {
2542 emitInsn(0xefd00000);
2543 emitGPR (0x08, insn->src(0));
2544 emitGPR (0x00, insn->def(0));
2545 }
2546
2547 void
emitAL2P()2548 CodeEmitterGM107::emitAL2P()
2549 {
2550 emitInsn (0xefa00000);
2551 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2552 emitPRED (0x2c);
2553 emitO (0x20);
2554 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2555 emitGPR (0x08, insn->src(0).getIndirect(0));
2556 emitGPR (0x00, insn->def(0));
2557 }
2558
2559 void
gm107_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2560 gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2561 {
2562 int ipa = entry->ipa;
2563 int reg = entry->reg;
2564 int loc = entry->loc;
2565
2566 if (data.flatshade &&
2567 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2568 ipa = NV50_IR_INTERP_FLAT;
2569 reg = 0xff;
2570 } else if (data.force_persample_interp &&
2571 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2572 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2573 ipa |= NV50_IR_INTERP_CENTROID;
2574 }
2575 code[loc + 1] &= ~(0xf << 0x14);
2576 code[loc + 1] |= (ipa & 0x3) << 0x16;
2577 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2578 code[loc + 0] &= ~(0xff << 0x14);
2579 code[loc + 0] |= reg << 0x14;
2580 }
2581
2582 void
emitIPA()2583 CodeEmitterGM107::emitIPA()
2584 {
2585 int ipam = 0, ipas = 0;
2586
2587 switch (insn->getInterpMode()) {
2588 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2589 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2590 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2591 case NV50_IR_INTERP_SC : ipam = 3; break;
2592 default:
2593 assert(!"invalid ipa mode");
2594 break;
2595 }
2596
2597 switch (insn->getSampleMode()) {
2598 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2599 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2600 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2601 default:
2602 assert(!"invalid ipa sample mode");
2603 break;
2604 }
2605
2606 emitInsn (0xe0000000);
2607 emitField(0x36, 2, ipam);
2608 emitField(0x34, 2, ipas);
2609 emitSAT (0x33);
2610 emitField(0x2f, 3, 7);
2611 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2612 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2613 code[1] |= 0x00000040; /* .idx */
2614 emitGPR(0x00, insn->def(0));
2615
2616 if (insn->op == OP_PINTERP) {
2617 emitGPR(0x14, insn->src(1));
2618 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2619 emitGPR(0x27, insn->src(2));
2620 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2621 } else {
2622 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2623 emitGPR(0x27, insn->src(1));
2624 emitGPR(0x14);
2625 addInterp(insn->ipa, 0xff, gm107_interpApply);
2626 }
2627
2628 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2629 emitGPR(0x27);
2630 }
2631
2632 void
emitATOM()2633 CodeEmitterGM107::emitATOM()
2634 {
2635 unsigned dType, subOp;
2636
2637 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2638 switch (insn->dType) {
2639 case TYPE_U32: dType = 0; break;
2640 case TYPE_U64: dType = 1; break;
2641 default: assert(!"unexpected dType"); dType = 0; break;
2642 }
2643 subOp = 15;
2644
2645 emitInsn (0xee000000);
2646 } else {
2647 switch (insn->dType) {
2648 case TYPE_U32: dType = 0; break;
2649 case TYPE_S32: dType = 1; break;
2650 case TYPE_U64: dType = 2; break;
2651 case TYPE_F32: dType = 3; break;
2652 case TYPE_B128: dType = 4; break;
2653 case TYPE_S64: dType = 5; break;
2654 default: assert(!"unexpected dType"); dType = 0; break;
2655 }
2656 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2657 subOp = 8;
2658 else
2659 subOp = insn->subOp;
2660
2661 emitInsn (0xed000000);
2662 }
2663
2664 emitField(0x34, 4, subOp);
2665 emitField(0x31, 3, dType);
2666 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2667 emitGPR (0x14, insn->src(1));
2668 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2669 emitGPR (0x00, insn->def(0));
2670 }
2671
2672 void
emitATOMS()2673 CodeEmitterGM107::emitATOMS()
2674 {
2675 unsigned dType, subOp;
2676
2677 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2678 switch (insn->dType) {
2679 case TYPE_U32: dType = 0; break;
2680 case TYPE_U64: dType = 1; break;
2681 default: assert(!"unexpected dType"); dType = 0; break;
2682 }
2683 subOp = 4;
2684
2685 emitInsn (0xee000000);
2686 emitField(0x34, 1, dType);
2687 } else {
2688 switch (insn->dType) {
2689 case TYPE_U32: dType = 0; break;
2690 case TYPE_S32: dType = 1; break;
2691 case TYPE_U64: dType = 2; break;
2692 case TYPE_S64: dType = 3; break;
2693 default: assert(!"unexpected dType"); dType = 0; break;
2694 }
2695
2696 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2697 subOp = 8;
2698 else
2699 subOp = insn->subOp;
2700
2701 emitInsn (0xec000000);
2702 emitField(0x1c, 3, dType);
2703 }
2704
2705 emitField(0x34, 4, subOp);
2706 emitGPR (0x14, insn->src(1));
2707 emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2708 emitGPR (0x00, insn->def(0));
2709 }
2710
2711 void
emitRED()2712 CodeEmitterGM107::emitRED()
2713 {
2714 unsigned dType;
2715
2716 switch (insn->dType) {
2717 case TYPE_U32: dType = 0; break;
2718 case TYPE_S32: dType = 1; break;
2719 case TYPE_U64: dType = 2; break;
2720 case TYPE_F32: dType = 3; break;
2721 case TYPE_B128: dType = 4; break;
2722 case TYPE_S64: dType = 5; break;
2723 default: assert(!"unexpected dType"); dType = 0; break;
2724 }
2725
2726 emitInsn (0xebf80000);
2727 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2728 emitField(0x17, 3, insn->subOp);
2729 emitField(0x14, 3, dType);
2730 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2731 emitGPR (0x00, insn->src(1));
2732 }
2733
2734 void
emitCCTL()2735 CodeEmitterGM107::emitCCTL()
2736 {
2737 unsigned width;
2738 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2739 emitInsn(0xef600000);
2740 width = 30;
2741 } else {
2742 emitInsn(0xef800000);
2743 width = 22;
2744 }
2745 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2746 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2747 emitField(0x00, 4, insn->subOp);
2748 }
2749
2750 /*******************************************************************************
2751 * surface
2752 ******************************************************************************/
2753
2754 void
emitPIXLD()2755 CodeEmitterGM107::emitPIXLD()
2756 {
2757 emitInsn (0xefe80000);
2758 emitPRED (0x2d);
2759 emitField(0x1f, 3, insn->subOp);
2760 emitGPR (0x08, insn->src(0));
2761 emitGPR (0x00, insn->def(0));
2762 }
2763
2764 /*******************************************************************************
2765 * texture
2766 ******************************************************************************/
2767
2768 void
emitTEXs(int pos)2769 CodeEmitterGM107::emitTEXs(int pos)
2770 {
2771 int src1 = insn->predSrc == 1 ? 2 : 1;
2772 if (insn->srcExists(src1))
2773 emitGPR(pos, insn->src(src1));
2774 else
2775 emitGPR(pos);
2776 }
2777
2778 static uint8_t
getTEXSMask(uint8_t mask)2779 getTEXSMask(uint8_t mask)
2780 {
2781 switch (mask) {
2782 case 0x1: return 0x0;
2783 case 0x2: return 0x1;
2784 case 0x3: return 0x4;
2785 case 0x4: return 0x2;
2786 case 0x7: return 0x0;
2787 case 0x8: return 0x3;
2788 case 0x9: return 0x5;
2789 case 0xa: return 0x6;
2790 case 0xb: return 0x1;
2791 case 0xc: return 0x7;
2792 case 0xd: return 0x2;
2793 case 0xe: return 0x3;
2794 case 0xf: return 0x4;
2795 default:
2796 assert(!"invalid mask");
2797 return 0;
2798 }
2799 }
2800
2801 static uint8_t
getTEXSTarget(const TexInstruction * tex)2802 getTEXSTarget(const TexInstruction *tex)
2803 {
2804 assert(tex->op == OP_TEX || tex->op == OP_TXL);
2805
2806 switch (tex->tex.target.getEnum()) {
2807 case TEX_TARGET_1D:
2808 assert(tex->tex.levelZero);
2809 return 0x0;
2810 case TEX_TARGET_2D:
2811 case TEX_TARGET_RECT:
2812 if (tex->tex.levelZero)
2813 return 0x2;
2814 if (tex->op == OP_TXL)
2815 return 0x3;
2816 return 0x1;
2817 case TEX_TARGET_2D_SHADOW:
2818 case TEX_TARGET_RECT_SHADOW:
2819 if (tex->tex.levelZero)
2820 return 0x6;
2821 if (tex->op == OP_TXL)
2822 return 0x5;
2823 return 0x4;
2824 case TEX_TARGET_2D_ARRAY:
2825 if (tex->tex.levelZero)
2826 return 0x8;
2827 return 0x7;
2828 case TEX_TARGET_2D_ARRAY_SHADOW:
2829 assert(tex->tex.levelZero);
2830 return 0x9;
2831 case TEX_TARGET_3D:
2832 if (tex->tex.levelZero)
2833 return 0xb;
2834 assert(tex->op != OP_TXL);
2835 return 0xa;
2836 case TEX_TARGET_CUBE:
2837 assert(!tex->tex.levelZero);
2838 if (tex->op == OP_TXL)
2839 return 0xd;
2840 return 0xc;
2841 default:
2842 assert(false);
2843 return 0x0;
2844 }
2845 }
2846
2847 static uint8_t
getTLDSTarget(const TexInstruction * tex)2848 getTLDSTarget(const TexInstruction *tex)
2849 {
2850 switch (tex->tex.target.getEnum()) {
2851 case TEX_TARGET_1D:
2852 if (tex->tex.levelZero)
2853 return 0x0;
2854 return 0x1;
2855 case TEX_TARGET_2D:
2856 case TEX_TARGET_RECT:
2857 if (tex->tex.levelZero)
2858 return tex->tex.useOffsets ? 0x4 : 0x2;
2859 return tex->tex.useOffsets ? 0xc : 0x5;
2860 case TEX_TARGET_2D_MS:
2861 assert(tex->tex.levelZero);
2862 return 0x6;
2863 case TEX_TARGET_3D:
2864 assert(tex->tex.levelZero);
2865 return 0x7;
2866 case TEX_TARGET_2D_ARRAY:
2867 assert(tex->tex.levelZero);
2868 return 0x8;
2869
2870 default:
2871 assert(false);
2872 return 0x0;
2873 }
2874 }
2875
2876 void
emitTEX()2877 CodeEmitterGM107::emitTEX()
2878 {
2879 const TexInstruction *insn = this->insn->asTex();
2880 int lodm = 0;
2881
2882 if (!insn->tex.levelZero) {
2883 switch (insn->op) {
2884 case OP_TEX: lodm = 0; break;
2885 case OP_TXB: lodm = 2; break;
2886 case OP_TXL: lodm = 3; break;
2887 default:
2888 assert(!"invalid tex op");
2889 break;
2890 }
2891 } else {
2892 lodm = 1;
2893 }
2894
2895 if (insn->tex.rIndirectSrc >= 0) {
2896 emitInsn (0xdeb80000);
2897 emitField(0x25, 2, lodm);
2898 emitField(0x24, 1, insn->tex.useOffsets == 1);
2899 } else {
2900 emitInsn (0xc0380000);
2901 emitField(0x37, 2, lodm);
2902 emitField(0x36, 1, insn->tex.useOffsets == 1);
2903 emitField(0x24, 13, insn->tex.r);
2904 }
2905
2906 emitField(0x32, 1, insn->tex.target.isShadow());
2907 emitField(0x31, 1, insn->tex.liveOnly);
2908 emitField(0x23, 1, insn->tex.derivAll);
2909 emitField(0x1f, 4, insn->tex.mask);
2910 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2911 insn->tex.target.getDim() - 1);
2912 emitField(0x1c, 1, insn->tex.target.isArray());
2913 emitTEXs (0x14);
2914 emitGPR (0x08, insn->src(0));
2915 emitGPR (0x00, insn->def(0));
2916 }
2917
2918 void
emitTEXS()2919 CodeEmitterGM107::emitTEXS()
2920 {
2921 const TexInstruction *insn = this->insn->asTex();
2922 assert(!insn->tex.derivAll);
2923
2924 switch (insn->op) {
2925 case OP_TEX:
2926 case OP_TXL:
2927 emitInsn (0xd8000000);
2928 emitField(0x35, 4, getTEXSTarget(insn));
2929 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2930 break;
2931 case OP_TXF:
2932 emitInsn (0xda000000);
2933 emitField(0x35, 4, getTLDSTarget(insn));
2934 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2935 break;
2936 case OP_TXG:
2937 assert(insn->tex.useOffsets != 4);
2938 emitInsn (0xdf000000);
2939 emitField(0x34, 2, insn->tex.gatherComp);
2940 emitField(0x33, 1, insn->tex.useOffsets == 1);
2941 emitField(0x32, 1, insn->tex.target.isShadow());
2942 break;
2943 default:
2944 unreachable("unknown op in emitTEXS()");
2945 break;
2946 }
2947
2948 emitField(0x31, 1, insn->tex.liveOnly);
2949 emitField(0x24, 13, insn->tex.r);
2950 if (insn->defExists(1))
2951 emitGPR(0x1c, insn->def(1));
2952 else
2953 emitGPR(0x1c);
2954 if (insn->srcExists(1))
2955 emitGPR(0x14, insn->getSrc(1));
2956 else
2957 emitGPR(0x14);
2958 emitGPR (0x08, insn->src(0));
2959 emitGPR (0x00, insn->def(0));
2960 }
2961
2962 void
emitTLD()2963 CodeEmitterGM107::emitTLD()
2964 {
2965 const TexInstruction *insn = this->insn->asTex();
2966
2967 if (insn->tex.rIndirectSrc >= 0) {
2968 emitInsn (0xdd380000);
2969 } else {
2970 emitInsn (0xdc380000);
2971 emitField(0x24, 13, insn->tex.r);
2972 }
2973
2974 emitField(0x37, 1, insn->tex.levelZero == 0);
2975 emitField(0x32, 1, insn->tex.target.isMS());
2976 emitField(0x31, 1, insn->tex.liveOnly);
2977 emitField(0x23, 1, insn->tex.useOffsets == 1);
2978 emitField(0x1f, 4, insn->tex.mask);
2979 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2980 insn->tex.target.getDim() - 1);
2981 emitField(0x1c, 1, insn->tex.target.isArray());
2982 emitTEXs (0x14);
2983 emitGPR (0x08, insn->src(0));
2984 emitGPR (0x00, insn->def(0));
2985 }
2986
2987 void
emitTLD4()2988 CodeEmitterGM107::emitTLD4()
2989 {
2990 const TexInstruction *insn = this->insn->asTex();
2991
2992 if (insn->tex.rIndirectSrc >= 0) {
2993 emitInsn (0xdef80000);
2994 emitField(0x26, 2, insn->tex.gatherComp);
2995 emitField(0x25, 2, insn->tex.useOffsets == 4);
2996 emitField(0x24, 2, insn->tex.useOffsets == 1);
2997 } else {
2998 emitInsn (0xc8380000);
2999 emitField(0x38, 2, insn->tex.gatherComp);
3000 emitField(0x37, 2, insn->tex.useOffsets == 4);
3001 emitField(0x36, 2, insn->tex.useOffsets == 1);
3002 emitField(0x24, 13, insn->tex.r);
3003 }
3004
3005 emitField(0x32, 1, insn->tex.target.isShadow());
3006 emitField(0x31, 1, insn->tex.liveOnly);
3007 emitField(0x23, 1, insn->tex.derivAll);
3008 emitField(0x1f, 4, insn->tex.mask);
3009 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3010 insn->tex.target.getDim() - 1);
3011 emitField(0x1c, 1, insn->tex.target.isArray());
3012 emitTEXs (0x14);
3013 emitGPR (0x08, insn->src(0));
3014 emitGPR (0x00, insn->def(0));
3015 }
3016
3017 void
emitTXD()3018 CodeEmitterGM107::emitTXD()
3019 {
3020 const TexInstruction *insn = this->insn->asTex();
3021
3022 if (insn->tex.rIndirectSrc >= 0) {
3023 emitInsn (0xde780000);
3024 } else {
3025 emitInsn (0xde380000);
3026 emitField(0x24, 13, insn->tex.r);
3027 }
3028
3029 emitField(0x31, 1, insn->tex.liveOnly);
3030 emitField(0x23, 1, insn->tex.useOffsets == 1);
3031 emitField(0x1f, 4, insn->tex.mask);
3032 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3033 insn->tex.target.getDim() - 1);
3034 emitField(0x1c, 1, insn->tex.target.isArray());
3035 emitTEXs (0x14);
3036 emitGPR (0x08, insn->src(0));
3037 emitGPR (0x00, insn->def(0));
3038 }
3039
3040 void
emitTMML()3041 CodeEmitterGM107::emitTMML()
3042 {
3043 const TexInstruction *insn = this->insn->asTex();
3044
3045 if (insn->tex.rIndirectSrc >= 0) {
3046 emitInsn (0xdf600000);
3047 } else {
3048 emitInsn (0xdf580000);
3049 emitField(0x24, 13, insn->tex.r);
3050 }
3051
3052 emitField(0x31, 1, insn->tex.liveOnly);
3053 emitField(0x23, 1, insn->tex.derivAll);
3054 emitField(0x1f, 4, insn->tex.mask);
3055 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3056 insn->tex.target.getDim() - 1);
3057 emitField(0x1c, 1, insn->tex.target.isArray());
3058 emitTEXs (0x14);
3059 emitGPR (0x08, insn->src(0));
3060 emitGPR (0x00, insn->def(0));
3061 }
3062
3063 void
emitTXQ()3064 CodeEmitterGM107::emitTXQ()
3065 {
3066 const TexInstruction *insn = this->insn->asTex();
3067 int type = 0;
3068
3069 switch (insn->tex.query) {
3070 case TXQ_DIMS : type = 0x01; break;
3071 case TXQ_TYPE : type = 0x02; break;
3072 case TXQ_SAMPLE_POSITION: type = 0x05; break;
3073 case TXQ_FILTER : type = 0x10; break;
3074 case TXQ_LOD : type = 0x12; break;
3075 case TXQ_WRAP : type = 0x14; break;
3076 case TXQ_BORDER_COLOUR : type = 0x16; break;
3077 default:
3078 assert(!"invalid txq query");
3079 break;
3080 }
3081
3082 if (insn->tex.rIndirectSrc >= 0) {
3083 emitInsn (0xdf500000);
3084 } else {
3085 emitInsn (0xdf480000);
3086 emitField(0x24, 13, insn->tex.r);
3087 }
3088
3089 emitField(0x31, 1, insn->tex.liveOnly);
3090 emitField(0x1f, 4, insn->tex.mask);
3091 emitField(0x16, 6, type);
3092 emitGPR (0x08, insn->src(0));
3093 emitGPR (0x00, insn->def(0));
3094 }
3095
3096 void
emitDEPBAR()3097 CodeEmitterGM107::emitDEPBAR()
3098 {
3099 emitInsn (0xf0f00000);
3100 emitField(0x1d, 1, 1); /* le */
3101 emitField(0x1a, 3, 5);
3102 emitField(0x14, 6, insn->subOp);
3103 emitField(0x00, 6, insn->subOp);
3104 }
3105
3106 /*******************************************************************************
3107 * misc
3108 ******************************************************************************/
3109
3110 void
emitNOP()3111 CodeEmitterGM107::emitNOP()
3112 {
3113 emitInsn(0x50b00000);
3114 }
3115
3116 void
emitKIL()3117 CodeEmitterGM107::emitKIL()
3118 {
3119 emitInsn (0xe3300000);
3120 emitCond5(0x00, CC_TR);
3121 }
3122
3123 void
emitOUT()3124 CodeEmitterGM107::emitOUT()
3125 {
3126 const int cut = insn->op == OP_RESTART || insn->subOp;
3127 const int emit = insn->op == OP_EMIT;
3128
3129 switch (insn->src(1).getFile()) {
3130 case FILE_GPR:
3131 emitInsn(0xfbe00000);
3132 emitGPR (0x14, insn->src(1));
3133 break;
3134 case FILE_IMMEDIATE:
3135 emitInsn(0xf6e00000);
3136 emitIMMD(0x14, 19, insn->src(1));
3137 break;
3138 case FILE_MEMORY_CONST:
3139 emitInsn(0xebe00000);
3140 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3141 break;
3142 default:
3143 assert(!"bad src1 file");
3144 break;
3145 }
3146
3147 emitField(0x27, 2, (cut << 1) | emit);
3148 emitGPR (0x08, insn->src(0));
3149 emitGPR (0x00, insn->def(0));
3150 }
3151
3152 void
emitBAR()3153 CodeEmitterGM107::emitBAR()
3154 {
3155 uint8_t subop;
3156
3157 emitInsn (0xf0a80000);
3158
3159 switch (insn->subOp) {
3160 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3161 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
3162 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
3163 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
3164 default:
3165 subop = 0x80;
3166 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3167 break;
3168 }
3169
3170 emitField(0x20, 8, subop);
3171
3172 // barrier id
3173 if (insn->src(0).getFile() == FILE_GPR) {
3174 emitGPR(0x08, insn->src(0));
3175 } else {
3176 ImmediateValue *imm = insn->getSrc(0)->asImm();
3177 assert(imm);
3178 emitField(0x08, 8, imm->reg.data.u32);
3179 emitField(0x2b, 1, 1);
3180 }
3181
3182 // thread count
3183 if (insn->src(1).getFile() == FILE_GPR) {
3184 emitGPR(0x14, insn->src(1));
3185 } else {
3186 ImmediateValue *imm = insn->getSrc(0)->asImm();
3187 assert(imm);
3188 emitField(0x14, 12, imm->reg.data.u32);
3189 emitField(0x2c, 1, 1);
3190 }
3191
3192 if (insn->srcExists(2) && (insn->predSrc != 2)) {
3193 emitPRED (0x27, insn->src(2));
3194 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3195 } else {
3196 emitField(0x27, 3, 7);
3197 }
3198 }
3199
3200 void
emitMEMBAR()3201 CodeEmitterGM107::emitMEMBAR()
3202 {
3203 emitInsn (0xef980000);
3204 emitField(0x08, 2, insn->subOp >> 2);
3205 }
3206
3207 void
emitVOTE()3208 CodeEmitterGM107::emitVOTE()
3209 {
3210 const ImmediateValue *imm;
3211 uint32_t u32;
3212
3213 int r = -1, p = -1;
3214 for (int i = 0; insn->defExists(i); i++) {
3215 if (insn->def(i).getFile() == FILE_GPR)
3216 r = i;
3217 else if (insn->def(i).getFile() == FILE_PREDICATE)
3218 p = i;
3219 }
3220
3221 emitInsn (0x50d80000);
3222 emitField(0x30, 2, insn->subOp);
3223 if (r >= 0)
3224 emitGPR (0x00, insn->def(r));
3225 else
3226 emitGPR (0x00);
3227 if (p >= 0)
3228 emitPRED (0x2d, insn->def(p));
3229 else
3230 emitPRED (0x2d);
3231
3232 switch (insn->src(0).getFile()) {
3233 case FILE_PREDICATE:
3234 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3235 emitPRED (0x27, insn->src(0));
3236 break;
3237 case FILE_IMMEDIATE:
3238 imm = insn->getSrc(0)->asImm();
3239 assert(imm);
3240 u32 = imm->reg.data.u32;
3241 assert(u32 == 0 || u32 == 1);
3242 emitPRED(0x27);
3243 emitField(0x2a, 1, u32 == 0);
3244 break;
3245 default:
3246 assert(!"Unhandled src");
3247 break;
3248 }
3249 }
3250
3251 void
emitSUTarget()3252 CodeEmitterGM107::emitSUTarget()
3253 {
3254 const TexInstruction *insn = this->insn->asTex();
3255 int target = 0;
3256
3257 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3258
3259 if (insn->tex.target == TEX_TARGET_BUFFER) {
3260 target = 2;
3261 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3262 target = 4;
3263 } else if (insn->tex.target == TEX_TARGET_2D ||
3264 insn->tex.target == TEX_TARGET_RECT) {
3265 target = 6;
3266 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3267 insn->tex.target == TEX_TARGET_CUBE ||
3268 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3269 target = 8;
3270 } else if (insn->tex.target == TEX_TARGET_3D) {
3271 target = 10;
3272 } else {
3273 assert(insn->tex.target == TEX_TARGET_1D);
3274 }
3275 emitField(0x20, 4, target);
3276 }
3277
3278 void
emitSUHandle(const int s)3279 CodeEmitterGM107::emitSUHandle(const int s)
3280 {
3281 const TexInstruction *insn = this->insn->asTex();
3282
3283 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3284
3285 if (insn->src(s).getFile() == FILE_GPR) {
3286 emitGPR(0x27, insn->src(s));
3287 } else {
3288 ImmediateValue *imm = insn->getSrc(s)->asImm();
3289 assert(imm);
3290 emitField(0x33, 1, 1);
3291 emitField(0x24, 13, imm->reg.data.u32);
3292 }
3293 }
3294
3295 void
emitSUSTx()3296 CodeEmitterGM107::emitSUSTx()
3297 {
3298 const TexInstruction *insn = this->insn->asTex();
3299
3300 emitInsn(0xeb200000);
3301 if (insn->op == OP_SUSTB)
3302 emitField(0x34, 1, 1);
3303 emitSUTarget();
3304
3305 emitLDSTc(0x18);
3306 emitField(0x14, 4, 0xf); // rgba
3307 emitGPR (0x08, insn->src(0));
3308 emitGPR (0x00, insn->src(1));
3309
3310 emitSUHandle(2);
3311 }
3312
3313 void
emitSULDx()3314 CodeEmitterGM107::emitSULDx()
3315 {
3316 const TexInstruction *insn = this->insn->asTex();
3317 int type = 0;
3318
3319 emitInsn(0xeb000000);
3320 if (insn->op == OP_SULDB)
3321 emitField(0x34, 1, 1);
3322 emitSUTarget();
3323
3324 switch (insn->dType) {
3325 case TYPE_S8: type = 1; break;
3326 case TYPE_U16: type = 2; break;
3327 case TYPE_S16: type = 3; break;
3328 case TYPE_U32: type = 4; break;
3329 case TYPE_U64: type = 5; break;
3330 case TYPE_B128: type = 6; break;
3331 default:
3332 assert(insn->dType == TYPE_U8);
3333 break;
3334 }
3335 emitLDSTc(0x18);
3336 emitField(0x14, 3, type);
3337 emitGPR (0x00, insn->def(0));
3338 emitGPR (0x08, insn->src(0));
3339
3340 emitSUHandle(1);
3341 }
3342
3343 void
emitSUREDx()3344 CodeEmitterGM107::emitSUREDx()
3345 {
3346 const TexInstruction *insn = this->insn->asTex();
3347 uint8_t type = 0, subOp;
3348
3349 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3350 emitInsn(0xeac00000);
3351 else
3352 emitInsn(0xea600000);
3353
3354 if (insn->op == OP_SUREDB)
3355 emitField(0x34, 1, 1);
3356 emitSUTarget();
3357
3358 // destination type
3359 switch (insn->dType) {
3360 case TYPE_S32: type = 1; break;
3361 case TYPE_U64: type = 2; break;
3362 case TYPE_F32: type = 3; break;
3363 case TYPE_S64: type = 5; break;
3364 default:
3365 assert(insn->dType == TYPE_U32);
3366 break;
3367 }
3368
3369 // atomic operation
3370 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3371 subOp = 0;
3372 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3373 subOp = 8;
3374 } else {
3375 subOp = insn->subOp;
3376 }
3377
3378 emitField(0x24, 3, type);
3379 emitField(0x1d, 4, subOp);
3380 emitGPR (0x14, insn->src(1));
3381 emitGPR (0x08, insn->src(0));
3382 emitGPR (0x00, insn->def(0));
3383
3384 emitSUHandle(2);
3385 }
3386
3387 /*******************************************************************************
3388 * assembler front-end
3389 ******************************************************************************/
3390
3391 bool
emitInstruction(Instruction * i)3392 CodeEmitterGM107::emitInstruction(Instruction *i)
3393 {
3394 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3395 bool ret = true;
3396
3397 insn = i;
3398
3399 if (insn->encSize != 8) {
3400 ERROR("skipping undecodable instruction: "); insn->print();
3401 return false;
3402 } else
3403 if (codeSize + size > codeSizeLimit) {
3404 ERROR("code emitter output buffer too small\n");
3405 return false;
3406 }
3407
3408 if (writeIssueDelays) {
3409 int n = ((codeSize & 0x1f) / 8) - 1;
3410 if (n < 0) {
3411 data = code;
3412 data[0] = 0x00000000;
3413 data[1] = 0x00000000;
3414 code += 2;
3415 codeSize += 8;
3416 n++;
3417 }
3418
3419 emitField(data, n * 21, 21, insn->sched);
3420 }
3421
3422 switch (insn->op) {
3423 case OP_EXIT:
3424 emitEXIT();
3425 break;
3426 case OP_BRA:
3427 emitBRA();
3428 break;
3429 case OP_CALL:
3430 emitCAL();
3431 break;
3432 case OP_PRECONT:
3433 emitPCNT();
3434 break;
3435 case OP_CONT:
3436 emitCONT();
3437 break;
3438 case OP_PREBREAK:
3439 emitPBK();
3440 break;
3441 case OP_BREAK:
3442 emitBRK();
3443 break;
3444 case OP_PRERET:
3445 emitPRET();
3446 break;
3447 case OP_RET:
3448 emitRET();
3449 break;
3450 case OP_JOINAT:
3451 emitSSY();
3452 break;
3453 case OP_JOIN:
3454 emitSYNC();
3455 break;
3456 case OP_QUADON:
3457 emitSAM();
3458 break;
3459 case OP_QUADPOP:
3460 emitRAM();
3461 break;
3462 case OP_MOV:
3463 emitMOV();
3464 break;
3465 case OP_RDSV:
3466 if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3467 emitCS2R();
3468 else
3469 emitS2R();
3470 break;
3471 case OP_ABS:
3472 case OP_NEG:
3473 case OP_SAT:
3474 case OP_FLOOR:
3475 case OP_CEIL:
3476 case OP_TRUNC:
3477 case OP_CVT:
3478 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3479 insn->src(0).getFile() == FILE_PREDICATE)) {
3480 emitMOV();
3481 } else if (isFloatType(insn->dType)) {
3482 if (isFloatType(insn->sType))
3483 emitF2F();
3484 else
3485 emitI2F();
3486 } else {
3487 if (isFloatType(insn->sType))
3488 emitF2I();
3489 else
3490 emitI2I();
3491 }
3492 break;
3493 case OP_SHFL:
3494 emitSHFL();
3495 break;
3496 case OP_ADD:
3497 case OP_SUB:
3498 if (isFloatType(insn->dType)) {
3499 if (insn->dType == TYPE_F64)
3500 emitDADD();
3501 else
3502 emitFADD();
3503 } else {
3504 emitIADD();
3505 }
3506 break;
3507 case OP_MUL:
3508 if (isFloatType(insn->dType)) {
3509 if (insn->dType == TYPE_F64)
3510 emitDMUL();
3511 else
3512 emitFMUL();
3513 } else {
3514 emitIMUL();
3515 }
3516 break;
3517 case OP_MAD:
3518 case OP_FMA:
3519 if (isFloatType(insn->dType)) {
3520 if (insn->dType == TYPE_F64)
3521 emitDFMA();
3522 else
3523 emitFFMA();
3524 } else {
3525 emitIMAD();
3526 }
3527 break;
3528 case OP_SHLADD:
3529 emitISCADD();
3530 break;
3531 case OP_XMAD:
3532 emitXMAD();
3533 break;
3534 case OP_MIN:
3535 case OP_MAX:
3536 if (isFloatType(insn->dType)) {
3537 if (insn->dType == TYPE_F64)
3538 emitDMNMX();
3539 else
3540 emitFMNMX();
3541 } else {
3542 emitIMNMX();
3543 }
3544 break;
3545 case OP_SHL:
3546 if (typeSizeof(insn->sType) == 8)
3547 emitSHF();
3548 else
3549 emitSHL();
3550 break;
3551 case OP_SHR:
3552 if (typeSizeof(insn->sType) == 8)
3553 emitSHF();
3554 else
3555 emitSHR();
3556 break;
3557 case OP_POPCNT:
3558 emitPOPC();
3559 break;
3560 case OP_INSBF:
3561 emitBFI();
3562 break;
3563 case OP_EXTBF:
3564 emitBFE();
3565 break;
3566 case OP_BFIND:
3567 emitFLO();
3568 break;
3569 case OP_PERMT:
3570 emitPRMT();
3571 break;
3572 case OP_SLCT:
3573 if (isFloatType(insn->dType))
3574 emitFCMP();
3575 else
3576 emitICMP();
3577 break;
3578 case OP_SET:
3579 case OP_SET_AND:
3580 case OP_SET_OR:
3581 case OP_SET_XOR:
3582 if (insn->def(0).getFile() != FILE_PREDICATE) {
3583 if (isFloatType(insn->sType))
3584 if (insn->sType == TYPE_F64)
3585 emitDSET();
3586 else
3587 emitFSET();
3588 else
3589 emitISET();
3590 } else {
3591 if (isFloatType(insn->sType))
3592 if (insn->sType == TYPE_F64)
3593 emitDSETP();
3594 else
3595 emitFSETP();
3596 else
3597 emitISETP();
3598 }
3599 break;
3600 case OP_SELP:
3601 emitSEL();
3602 break;
3603 case OP_PRESIN:
3604 case OP_PREEX2:
3605 emitRRO();
3606 break;
3607 case OP_COS:
3608 case OP_SIN:
3609 case OP_EX2:
3610 case OP_LG2:
3611 case OP_RCP:
3612 case OP_RSQ:
3613 case OP_SQRT:
3614 emitMUFU();
3615 break;
3616 case OP_AND:
3617 case OP_OR:
3618 case OP_XOR:
3619 switch (insn->def(0).getFile()) {
3620 case FILE_GPR: emitLOP(); break;
3621 case FILE_PREDICATE: emitPSETP(); break;
3622 default:
3623 assert(!"invalid bool op");
3624 }
3625 break;
3626 case OP_NOT:
3627 emitNOT();
3628 break;
3629 case OP_LOAD:
3630 switch (insn->src(0).getFile()) {
3631 case FILE_MEMORY_CONST : emitLDC(); break;
3632 case FILE_MEMORY_LOCAL : emitLDL(); break;
3633 case FILE_MEMORY_SHARED: emitLDS(); break;
3634 case FILE_MEMORY_GLOBAL: emitLD(); break;
3635 default:
3636 assert(!"invalid load");
3637 emitNOP();
3638 break;
3639 }
3640 break;
3641 case OP_STORE:
3642 switch (insn->src(0).getFile()) {
3643 case FILE_MEMORY_LOCAL : emitSTL(); break;
3644 case FILE_MEMORY_SHARED: emitSTS(); break;
3645 case FILE_MEMORY_GLOBAL: emitST(); break;
3646 default:
3647 assert(!"invalid store");
3648 emitNOP();
3649 break;
3650 }
3651 break;
3652 case OP_ATOM:
3653 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3654 emitATOMS();
3655 else
3656 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3657 emitRED();
3658 else
3659 emitATOM();
3660 break;
3661 case OP_CCTL:
3662 emitCCTL();
3663 break;
3664 case OP_VFETCH:
3665 emitALD();
3666 break;
3667 case OP_EXPORT:
3668 emitAST();
3669 break;
3670 case OP_PFETCH:
3671 emitISBERD();
3672 break;
3673 case OP_AFETCH:
3674 emitAL2P();
3675 break;
3676 case OP_LINTERP:
3677 case OP_PINTERP:
3678 emitIPA();
3679 break;
3680 case OP_PIXLD:
3681 emitPIXLD();
3682 break;
3683 case OP_TEX:
3684 case OP_TXL:
3685 if (insn->asTex()->tex.scalar)
3686 emitTEXS();
3687 else
3688 emitTEX();
3689 break;
3690 case OP_TXB:
3691 emitTEX();
3692 break;
3693 case OP_TXF:
3694 if (insn->asTex()->tex.scalar)
3695 emitTEXS();
3696 else
3697 emitTLD();
3698 break;
3699 case OP_TXG:
3700 if (insn->asTex()->tex.scalar)
3701 emitTEXS();
3702 else
3703 emitTLD4();
3704 break;
3705 case OP_TXD:
3706 emitTXD();
3707 break;
3708 case OP_TXQ:
3709 emitTXQ();
3710 break;
3711 case OP_TXLQ:
3712 emitTMML();
3713 break;
3714 case OP_TEXBAR:
3715 emitDEPBAR();
3716 break;
3717 case OP_QUADOP:
3718 emitFSWZADD();
3719 break;
3720 case OP_NOP:
3721 emitNOP();
3722 break;
3723 case OP_DISCARD:
3724 emitKIL();
3725 break;
3726 case OP_EMIT:
3727 case OP_RESTART:
3728 emitOUT();
3729 break;
3730 case OP_BAR:
3731 emitBAR();
3732 break;
3733 case OP_MEMBAR:
3734 emitMEMBAR();
3735 break;
3736 case OP_VOTE:
3737 emitVOTE();
3738 break;
3739 case OP_SUSTB:
3740 case OP_SUSTP:
3741 emitSUSTx();
3742 break;
3743 case OP_SULDB:
3744 case OP_SULDP:
3745 emitSULDx();
3746 break;
3747 case OP_SUREDB:
3748 case OP_SUREDP:
3749 emitSUREDx();
3750 break;
3751 default:
3752 assert(!"invalid opcode");
3753 emitNOP();
3754 ret = false;
3755 break;
3756 }
3757
3758 if (insn->join) {
3759 /*XXX*/
3760 }
3761
3762 code += 2;
3763 codeSize += 8;
3764 return ret;
3765 }
3766
3767 uint32_t
getMinEncodingSize(const Instruction * i) const3768 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3769 {
3770 return 8;
3771 }
3772
3773 /*******************************************************************************
3774 * sched data calculator
3775 ******************************************************************************/
3776
3777 inline void
emitStall(Instruction * insn,uint8_t cnt)3778 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3779 {
3780 assert(cnt < 16);
3781 insn->sched |= cnt;
3782 }
3783
3784 inline void
emitYield(Instruction * insn)3785 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3786 {
3787 insn->sched |= 1 << 4;
3788 }
3789
3790 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3791 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3792 {
3793 assert(id < 6);
3794 if ((insn->sched & 0xe0) == 0xe0)
3795 insn->sched ^= 0xe0;
3796 insn->sched |= id << 5;
3797 }
3798
3799 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3800 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3801 {
3802 assert(id < 6);
3803 if ((insn->sched & 0x700) == 0x700)
3804 insn->sched ^= 0x700;
3805 insn->sched |= id << 8;
3806 }
3807
3808 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3809 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3810 {
3811 assert(id < 6);
3812 insn->sched |= 1 << (11 + id);
3813 }
3814
3815 inline void
emitReuse(Instruction * insn,uint8_t id)3816 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3817 {
3818 assert(id < 4);
3819 insn->sched |= 1 << (17 + id);
3820 }
3821
3822 inline void
printSchedInfo(int cycle,const Instruction * insn) const3823 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3824 const Instruction *insn) const
3825 {
3826 uint8_t st, yl, wr, rd, wt, ru;
3827
3828 st = (insn->sched & 0x00000f) >> 0;
3829 yl = (insn->sched & 0x000010) >> 4;
3830 wr = (insn->sched & 0x0000e0) >> 5;
3831 rd = (insn->sched & 0x000700) >> 8;
3832 wt = (insn->sched & 0x01f800) >> 11;
3833 ru = (insn->sched & 0x1e0000) >> 17;
3834
3835 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3836 cycle, st, yl, wr, rd, wt, ru);
3837 }
3838
3839 inline int
getStall(const Instruction * insn) const3840 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3841 {
3842 return insn->sched & 0xf;
3843 }
3844
3845 inline int
getWrDepBar(const Instruction * insn) const3846 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3847 {
3848 return (insn->sched & 0x0000e0) >> 5;
3849 }
3850
3851 inline int
getRdDepBar(const Instruction * insn) const3852 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3853 {
3854 return (insn->sched & 0x000700) >> 8;
3855 }
3856
3857 inline int
getWtDepBar(const Instruction * insn) const3858 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3859 {
3860 return (insn->sched & 0x01f800) >> 11;
3861 }
3862
3863 // Emit the reuse flag which allows to make use of the new memory hierarchy
3864 // introduced since Maxwell, the operand reuse cache.
3865 //
3866 // It allows to reduce bank conflicts by caching operands. Each time you issue
3867 // an instruction, that flag can tell the hw which operands are going to be
3868 // re-used by the next instruction. Note that the next instruction has to use
3869 // the same GPR id in the same operand slot.
3870 void
setReuseFlag(Instruction * insn)3871 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3872 {
3873 Instruction *next = insn->next;
3874 BitSet defs(255, 1);
3875
3876 if (!targ->isReuseSupported(insn))
3877 return;
3878
3879 for (int d = 0; insn->defExists(d); ++d) {
3880 const Value *def = insn->def(d).rep();
3881 if (insn->def(d).getFile() != FILE_GPR)
3882 continue;
3883 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3884 continue;
3885 defs.set(def->reg.data.id);
3886 }
3887
3888 for (int s = 0; insn->srcExists(s); s++) {
3889 const Value *src = insn->src(s).rep();
3890 if (insn->src(s).getFile() != FILE_GPR)
3891 continue;
3892 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3893 continue;
3894 if (defs.test(src->reg.data.id))
3895 continue;
3896 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3897 continue;
3898 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3899 continue;
3900 assert(s < 4);
3901 emitReuse(insn, s);
3902 }
3903 }
3904
3905 void
recordWr(const Value * v,int cycle,int ready)3906 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3907 {
3908 int a = v->reg.data.id, b;
3909
3910 switch (v->reg.file) {
3911 case FILE_GPR:
3912 b = a + v->reg.size / 4;
3913 for (int r = a; r < b; ++r)
3914 score->rd.r[r] = ready;
3915 break;
3916 case FILE_PREDICATE:
3917 // To immediately use a predicate set by any instructions, the minimum
3918 // number of stall counts is 13.
3919 score->rd.p[a] = cycle + 13;
3920 break;
3921 case FILE_FLAGS:
3922 score->rd.c = ready;
3923 break;
3924 default:
3925 break;
3926 }
3927 }
3928
3929 void
checkRd(const Value * v,int cycle,int & delay) const3930 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3931 {
3932 int a = v->reg.data.id, b;
3933 int ready = cycle;
3934
3935 switch (v->reg.file) {
3936 case FILE_GPR:
3937 b = a + v->reg.size / 4;
3938 for (int r = a; r < b; ++r)
3939 ready = MAX2(ready, score->rd.r[r]);
3940 break;
3941 case FILE_PREDICATE:
3942 ready = MAX2(ready, score->rd.p[a]);
3943 break;
3944 case FILE_FLAGS:
3945 ready = MAX2(ready, score->rd.c);
3946 break;
3947 default:
3948 break;
3949 }
3950 if (cycle < ready)
3951 delay = MAX2(delay, ready - cycle);
3952 }
3953
3954 void
commitInsn(const Instruction * insn,int cycle)3955 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3956 {
3957 const int ready = cycle + targ->getLatency(insn);
3958
3959 for (int d = 0; insn->defExists(d); ++d)
3960 recordWr(insn->getDef(d), cycle, ready);
3961
3962 #ifdef GM107_DEBUG_SCHED_DATA
3963 score->print(cycle);
3964 #endif
3965 }
3966
3967 #define GM107_MIN_ISSUE_DELAY 0x1
3968 #define GM107_MAX_ISSUE_DELAY 0xf
3969
3970 int
calcDelay(const Instruction * insn,int cycle) const3971 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3972 {
3973 int delay = 0, ready = cycle;
3974
3975 for (int s = 0; insn->srcExists(s); ++s)
3976 checkRd(insn->getSrc(s), cycle, delay);
3977
3978 // TODO: make use of getReadLatency()!
3979
3980 return MAX2(delay, ready - cycle);
3981 }
3982
3983 void
setDelay(Instruction * insn,int delay,const Instruction * next)3984 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3985 const Instruction *next)
3986 {
3987 const OpClass cl = targ->getOpClass(insn->op);
3988 int wr, rd;
3989
3990 if (insn->op == OP_EXIT ||
3991 insn->op == OP_BAR ||
3992 insn->op == OP_MEMBAR) {
3993 delay = GM107_MAX_ISSUE_DELAY;
3994 } else
3995 if (insn->op == OP_QUADON ||
3996 insn->op == OP_QUADPOP) {
3997 delay = 0xd;
3998 } else
3999 if (cl == OPCLASS_FLOW || insn->join) {
4000 delay = 0xd;
4001 }
4002
4003 if (!next || !targ->canDualIssue(insn, next)) {
4004 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4005 } else {
4006 delay = 0x0; // dual-issue
4007 }
4008
4009 wr = getWrDepBar(insn);
4010 rd = getRdDepBar(insn);
4011
4012 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4013 // Barriers take one additional clock cycle to become active on top of
4014 // the clock consumed by the instruction producing it.
4015 if (!next || insn->bb != next->bb) {
4016 delay = 0x2;
4017 } else {
4018 int wt = getWtDepBar(next);
4019 if ((wt & (1 << wr)) | (wt & (1 << rd)))
4020 delay = 0x2;
4021 }
4022 }
4023
4024 emitStall(insn, delay);
4025 }
4026
4027
4028 // Return true when the given instruction needs to emit a read dependency
4029 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4030 // setting the maximum number of stall counts is not enough.
4031 bool
needRdDepBar(const Instruction * insn) const4032 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4033 {
4034 BitSet srcs(255, 1), defs(255, 1);
4035 int a, b;
4036
4037 if (!targ->isBarrierRequired(insn))
4038 return false;
4039
4040 // Do not emit a read dependency barrier when the instruction doesn't use
4041 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4042 for (int s = 0; insn->srcExists(s); ++s) {
4043 const Value *src = insn->src(s).rep();
4044 if (insn->src(s).getFile() != FILE_GPR)
4045 continue;
4046 if (src->reg.data.id == 255)
4047 continue;
4048
4049 a = src->reg.data.id;
4050 b = a + src->reg.size / 4;
4051 for (int r = a; r < b; ++r)
4052 srcs.set(r);
4053 }
4054
4055 if (!srcs.popCount())
4056 return false;
4057
4058 // Do not emit a read dependency barrier when the output GPRs are equal to
4059 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4060 // be produced and WaR hazards are prevented.
4061 for (int d = 0; insn->defExists(d); ++d) {
4062 const Value *def = insn->def(d).rep();
4063 if (insn->def(d).getFile() != FILE_GPR)
4064 continue;
4065 if (def->reg.data.id == 255)
4066 continue;
4067
4068 a = def->reg.data.id;
4069 b = a + def->reg.size / 4;
4070 for (int r = a; r < b; ++r)
4071 defs.set(r);
4072 }
4073
4074 srcs.andNot(defs);
4075 if (!srcs.popCount())
4076 return false;
4077
4078 return true;
4079 }
4080
4081 // Return true when the given instruction needs to emit a write dependency
4082 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4083 // setting the maximum number of stall counts is not enough. This is only legal
4084 // if the instruction output something.
4085 bool
needWrDepBar(const Instruction * insn) const4086 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4087 {
4088 if (!targ->isBarrierRequired(insn))
4089 return false;
4090
4091 for (int d = 0; insn->defExists(d); ++d) {
4092 if (insn->def(d).getFile() == FILE_GPR ||
4093 insn->def(d).getFile() == FILE_FLAGS ||
4094 insn->def(d).getFile() == FILE_PREDICATE)
4095 return true;
4096 }
4097 return false;
4098 }
4099
4100 // Helper function for findFirstUse() and findFirstDef()
4101 bool
doesInsnWriteTo(const Instruction * insn,const Value * val) const4102 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4103 const Value *val) const
4104 {
4105 if (val->reg.file != FILE_GPR &&
4106 val->reg.file != FILE_PREDICATE &&
4107 val->reg.file != FILE_FLAGS)
4108 return false;
4109
4110 for (int d = 0; insn->defExists(d); ++d) {
4111 const Value* def = insn->getDef(d);
4112 int minGPR = def->reg.data.id;
4113 int maxGPR = minGPR + def->reg.size / 4 - 1;
4114
4115 if (def->reg.file != val->reg.file)
4116 continue;
4117
4118 if (def->reg.file == FILE_GPR) {
4119 if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4120 val->reg.data.id > maxGPR)
4121 continue;
4122 return true;
4123 } else
4124 if (def->reg.file == FILE_PREDICATE) {
4125 if (val->reg.data.id != minGPR)
4126 continue;
4127 return true;
4128 } else
4129 if (def->reg.file == FILE_FLAGS) {
4130 if (val->reg.data.id != minGPR)
4131 continue;
4132 return true;
4133 }
4134 }
4135
4136 return false;
4137 }
4138
4139 // Find the next instruction inside the same basic block which uses (reads or
4140 // writes from) the output of the given instruction in order to avoid RaW and
4141 // WaW hazards.
4142 Instruction *
findFirstUse(const Instruction * bari) const4143 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4144 {
4145 Instruction *insn, *next;
4146
4147 if (!bari->defExists(0))
4148 return NULL;
4149
4150 for (insn = bari->next; insn != NULL; insn = next) {
4151 next = insn->next;
4152
4153 for (int s = 0; insn->srcExists(s); ++s)
4154 if (doesInsnWriteTo(bari, insn->getSrc(s)))
4155 return insn;
4156
4157 for (int d = 0; insn->defExists(d); ++d)
4158 if (doesInsnWriteTo(bari, insn->getDef(d)))
4159 return insn;
4160 }
4161 return NULL;
4162 }
4163
4164 // Find the next instruction inside the same basic block which overwrites, at
4165 // least, one source of the given instruction in order to avoid WaR hazards.
4166 Instruction *
findFirstDef(const Instruction * bari) const4167 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4168 {
4169 Instruction *insn, *next;
4170
4171 if (!bari->srcExists(0))
4172 return NULL;
4173
4174 for (insn = bari->next; insn != NULL; insn = next) {
4175 next = insn->next;
4176
4177 for (int s = 0; bari->srcExists(s); ++s)
4178 if (doesInsnWriteTo(insn, bari->getSrc(s)))
4179 return insn;
4180 }
4181 return NULL;
4182 }
4183
4184 // Dependency barriers:
4185 // This pass is a bit ugly and could probably be improved by performing a
4186 // better allocation.
4187 //
4188 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4189 // dependency barriers using the control codes.
4190 bool
insertBarriers(BasicBlock * bb)4191 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4192 {
4193 std::list<LiveBarUse> live_uses;
4194 std::list<LiveBarDef> live_defs;
4195 Instruction *insn, *next;
4196 BitSet bars(6, 1);
4197 int bar_id;
4198
4199 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4200 Instruction *usei = NULL, *defi = NULL;
4201 bool need_wr_bar, need_rd_bar;
4202
4203 next = insn->next;
4204
4205 // Expire old barrier uses.
4206 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4207 it != live_uses.end();) {
4208 if (insn->serial >= it->usei->serial) {
4209 int wr = getWrDepBar(it->insn);
4210 emitWtDepBar(insn, wr);
4211 bars.clr(wr); // free barrier
4212 it = live_uses.erase(it);
4213 continue;
4214 }
4215 ++it;
4216 }
4217
4218 // Expire old barrier defs.
4219 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4220 it != live_defs.end();) {
4221 if (insn->serial >= it->defi->serial) {
4222 int rd = getRdDepBar(it->insn);
4223 emitWtDepBar(insn, rd);
4224 bars.clr(rd); // free barrier
4225 it = live_defs.erase(it);
4226 continue;
4227 }
4228 ++it;
4229 }
4230
4231 need_wr_bar = needWrDepBar(insn);
4232 need_rd_bar = needRdDepBar(insn);
4233
4234 if (need_wr_bar) {
4235 // When the instruction requires to emit a write dependency barrier
4236 // (all which write something at a variable latency), find the next
4237 // instruction which reads the outputs (or writes to them, potentially
4238 // completing before this insn.
4239 usei = findFirstUse(insn);
4240
4241 // Allocate and emit a new barrier.
4242 bar_id = bars.findFreeRange(1);
4243 if (bar_id == -1)
4244 bar_id = 5;
4245 bars.set(bar_id);
4246 emitWrDepBar(insn, bar_id);
4247 if (usei)
4248 live_uses.push_back(LiveBarUse(insn, usei));
4249 }
4250
4251 if (need_rd_bar) {
4252 // When the instruction requires to emit a read dependency barrier
4253 // (all which read something at a variable latency), find the next
4254 // instruction which will write the inputs.
4255 defi = findFirstDef(insn);
4256
4257 if (usei && defi && usei->serial <= defi->serial)
4258 continue;
4259
4260 // Allocate and emit a new barrier.
4261 bar_id = bars.findFreeRange(1);
4262 if (bar_id == -1)
4263 bar_id = 5;
4264 bars.set(bar_id);
4265 emitRdDepBar(insn, bar_id);
4266 if (defi)
4267 live_defs.push_back(LiveBarDef(insn, defi));
4268 }
4269 }
4270
4271 // Remove unnecessary barrier waits.
4272 BitSet alive_bars(6, 1);
4273 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4274 int wr, rd, wt;
4275
4276 next = insn->next;
4277
4278 wr = getWrDepBar(insn);
4279 rd = getRdDepBar(insn);
4280 wt = getWtDepBar(insn);
4281
4282 for (int idx = 0; idx < 6; ++idx) {
4283 if (!(wt & (1 << idx)))
4284 continue;
4285 if (!alive_bars.test(idx)) {
4286 insn->sched &= ~(1 << (11 + idx));
4287 } else {
4288 alive_bars.clr(idx);
4289 }
4290 }
4291
4292 if (wr < 6)
4293 alive_bars.set(wr);
4294 if (rd < 6)
4295 alive_bars.set(rd);
4296 }
4297
4298 return true;
4299 }
4300
4301 bool
visit(Function * func)4302 SchedDataCalculatorGM107::visit(Function *func)
4303 {
4304 ArrayList insns;
4305
4306 func->orderInstructions(insns);
4307
4308 scoreBoards.resize(func->cfg.getSize());
4309 for (size_t i = 0; i < scoreBoards.size(); ++i)
4310 scoreBoards[i].wipe();
4311 return true;
4312 }
4313
4314 bool
visit(BasicBlock * bb)4315 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4316 {
4317 Instruction *insn, *next = NULL;
4318 int cycle = 0;
4319
4320 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4321 /*XXX*/
4322 insn->sched = 0x7e0;
4323 }
4324
4325 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4326 return true;
4327
4328 // Insert read/write dependency barriers for instructions which don't
4329 // operate at a fixed latency.
4330 insertBarriers(bb);
4331
4332 score = &scoreBoards.at(bb->getId());
4333
4334 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4335 // back branches will wait until all target dependencies are satisfied
4336 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4337 continue;
4338 BasicBlock *in = BasicBlock::get(ei.getNode());
4339 score->setMax(&scoreBoards.at(in->getId()));
4340 }
4341
4342 #ifdef GM107_DEBUG_SCHED_DATA
4343 INFO("=== BB:%i initial scores\n", bb->getId());
4344 score->print(cycle);
4345 #endif
4346
4347 // Because barriers are allocated locally (intra-BB), we have to make sure
4348 // that all produced barriers have been consumed before entering inside a
4349 // new basic block. The best way is to do a global allocation pre RA but
4350 // it's really more difficult, especially because of the phi nodes. Anyways,
4351 // it seems like that waiting on a barrier which has already been consumed
4352 // doesn't add any additional cost, it's just not elegant!
4353 Instruction *start = bb->getEntry();
4354 if (start && bb->cfg.incidentCount() > 0) {
4355 for (int b = 0; b < 6; b++)
4356 emitWtDepBar(start, b);
4357 }
4358
4359 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4360 next = insn->next;
4361
4362 commitInsn(insn, cycle);
4363 int delay = calcDelay(next, cycle);
4364 setDelay(insn, delay, next);
4365 cycle += getStall(insn);
4366
4367 setReuseFlag(insn);
4368
4369 // XXX: The yield flag seems to destroy a bunch of things when it is
4370 // set on every instruction, need investigation.
4371 //emitYield(insn);
4372
4373 #ifdef GM107_DEBUG_SCHED_DATA
4374 printSchedInfo(cycle, insn);
4375 insn->print();
4376 next->print();
4377 #endif
4378 }
4379
4380 if (!insn)
4381 return true;
4382 commitInsn(insn, cycle);
4383
4384 int bbDelay = -1;
4385
4386 #ifdef GM107_DEBUG_SCHED_DATA
4387 fprintf(stderr, "last instruction is : ");
4388 insn->print();
4389 fprintf(stderr, "cycle=%d\n", cycle);
4390 #endif
4391
4392 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4393 BasicBlock *out = BasicBlock::get(ei.getNode());
4394
4395 if (ei.getType() != Graph::Edge::BACK) {
4396 // Only test the first instruction of the outgoing block.
4397 next = out->getEntry();
4398 if (next) {
4399 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4400 } else {
4401 // When the outgoing BB is empty, make sure to set the number of
4402 // stall counts needed by the instruction because we don't know the
4403 // next instruction.
4404 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4405 }
4406 } else {
4407 // Wait until all dependencies are satisfied.
4408 const int regsFree = score->getLatest();
4409 next = out->getFirst();
4410 for (int c = cycle; next && c < regsFree; next = next->next) {
4411 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4412 c += getStall(next);
4413 }
4414 next = NULL;
4415 }
4416 }
4417 if (bb->cfg.outgoingCount() != 1)
4418 next = NULL;
4419 setDelay(insn, bbDelay, next);
4420 cycle += getStall(insn);
4421
4422 score->rebase(cycle); // common base for initializing out blocks' scores
4423 return true;
4424 }
4425
4426 /*******************************************************************************
4427 * main
4428 ******************************************************************************/
4429
4430 void
prepareEmission(Function * func)4431 CodeEmitterGM107::prepareEmission(Function *func)
4432 {
4433 SchedDataCalculatorGM107 sched(targGM107);
4434 CodeEmitter::prepareEmission(func);
4435 sched.run(func, true, true);
4436 }
4437
sizeToBundlesGM107(uint32_t size)4438 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4439 {
4440 return (size + 23) / 24;
4441 }
4442
4443 void
prepareEmission(Program * prog)4444 CodeEmitterGM107::prepareEmission(Program *prog)
4445 {
4446 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4447 !fi.end(); fi.next()) {
4448 Function *func = reinterpret_cast<Function *>(fi.get());
4449 func->binPos = prog->binSize;
4450 prepareEmission(func);
4451
4452 // adjust sizes & positions for schedulding info:
4453 if (prog->getTarget()->hasSWSched) {
4454 uint32_t adjPos = func->binPos;
4455 BasicBlock *bb = NULL;
4456 for (int i = 0; i < func->bbCount; ++i) {
4457 bb = func->bbArray[i];
4458 int32_t adjSize = bb->binSize;
4459 if (adjPos % 32) {
4460 adjSize -= 32 - adjPos % 32;
4461 if (adjSize < 0)
4462 adjSize = 0;
4463 }
4464 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4465 bb->binPos = adjPos;
4466 bb->binSize = adjSize;
4467 adjPos += adjSize;
4468 }
4469 if (bb)
4470 func->binSize = adjPos - func->binPos;
4471 }
4472
4473 prog->binSize += func->binSize;
4474 }
4475 }
4476
CodeEmitterGM107(const TargetGM107 * target)4477 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4478 : CodeEmitter(target),
4479 targGM107(target),
4480 writeIssueDelays(target->hasSWSched)
4481 {
4482 code = NULL;
4483 codeSize = codeSizeLimit = 0;
4484 relocInfo = NULL;
4485 }
4486
4487 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4488 TargetGM107::createCodeEmitterGM107(Program::Type type)
4489 {
4490 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4491 emit->setProgramType(type);
4492 return emit;
4493 }
4494
4495 } // namespace nv50_ir
4496