1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
setProgramType(Program::Type pType)42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
emitGPR(int pos)61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
emitGPR(int pos,const ValueRef & ref)64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
emitGPR(int pos,const ValueRef * ref)67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
emitGPR(int pos,const ValueDef & def)70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
emitPRED(int pos)78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
emitPRED(int pos,const ValueRef & ref)81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
emitPRED(int pos,const ValueDef & def)84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
emitRND(int pos)105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitMOV();
126 void emitS2R();
127 void emitF2F();
128 void emitF2I();
129 void emitI2F();
130 void emitI2I();
131 void emitSEL();
132 void emitSHFL();
133
134 void emitDADD();
135 void emitDMUL();
136 void emitDFMA();
137 void emitDMNMX();
138 void emitDSET();
139 void emitDSETP();
140
141 void emitFADD();
142 void emitFMUL();
143 void emitFFMA();
144 void emitMUFU();
145 void emitFMNMX();
146 void emitRRO();
147 void emitFCMP();
148 void emitFSET();
149 void emitFSETP();
150 void emitFSWZADD();
151
152 void emitLOP();
153 void emitNOT();
154 void emitIADD();
155 void emitIMUL();
156 void emitIMAD();
157 void emitISCADD();
158 void emitIMNMX();
159 void emitICMP();
160 void emitISET();
161 void emitISETP();
162 void emitSHL();
163 void emitSHR();
164 void emitSHF();
165 void emitPOPC();
166 void emitBFI();
167 void emitBFE();
168 void emitFLO();
169
170 void emitLDSTs(int, DataType);
171 void emitLDSTc(int);
172 void emitLDC();
173 void emitLDL();
174 void emitLDS();
175 void emitLD();
176 void emitSTL();
177 void emitSTS();
178 void emitST();
179 void emitALD();
180 void emitAST();
181 void emitISBERD();
182 void emitAL2P();
183 void emitIPA();
184 void emitATOM();
185 void emitATOMS();
186 void emitRED();
187 void emitCCTL();
188
189 void emitPIXLD();
190
191 void emitTEXs(int);
192 void emitTEX();
193 void emitTLD();
194 void emitTLD4();
195 void emitTXD();
196 void emitTXQ();
197 void emitTMML();
198 void emitDEPBAR();
199
200 void emitNOP();
201 void emitKIL();
202 void emitOUT();
203
204 void emitBAR();
205 void emitMEMBAR();
206
207 void emitVOTE();
208
209 void emitSUTarget();
210 void emitSUHandle(const int s);
211 void emitSUSTx();
212 void emitSULDx();
213 void emitSUREDx();
214 };
215
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
219
220 void
emitField(uint32_t * data,int b,int s,uint32_t v)221 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
222 {
223 if (b >= 0) {
224 uint32_t m = ((1ULL << s) - 1);
225 uint64_t d = (uint64_t)(v & m) << b;
226 assert(!(v & ~m) || (v & ~m) == ~m);
227 data[1] |= d >> 32;
228 data[0] |= d;
229 }
230 }
231
232 void
emitPred()233 CodeEmitterGM107::emitPred()
234 {
235 if (insn->predSrc >= 0) {
236 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
237 emitField(19, 1, insn->cc == CC_NOT_P);
238 } else {
239 emitField(16, 3, 7);
240 }
241 }
242
243 void
emitInsn(uint32_t hi,bool pred)244 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
245 {
246 code[0] = 0x00000000;
247 code[1] = hi;
248 if (pred)
249 emitPred();
250 }
251
252 void
emitGPR(int pos,const Value * val)253 CodeEmitterGM107::emitGPR(int pos, const Value *val)
254 {
255 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
256 val->reg.data.id : 255);
257 }
258
259 void
emitSYS(int pos,const Value * val)260 CodeEmitterGM107::emitSYS(int pos, const Value *val)
261 {
262 int id = val ? val->reg.data.id : -1;
263
264 switch (id) {
265 case SV_LANEID : id = 0x00; break;
266 case SV_VERTEX_COUNT : id = 0x10; break;
267 case SV_INVOCATION_ID : id = 0x11; break;
268 case SV_THREAD_KILL : id = 0x13; break;
269 case SV_INVOCATION_INFO: id = 0x1d; break;
270 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
271 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
272 case SV_LANEMASK_EQ : id = 0x38; break;
273 case SV_LANEMASK_LT : id = 0x39; break;
274 case SV_LANEMASK_LE : id = 0x3a; break;
275 case SV_LANEMASK_GT : id = 0x3b; break;
276 case SV_LANEMASK_GE : id = 0x3c; break;
277 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
278 default:
279 assert(!"invalid system value");
280 id = 0;
281 break;
282 }
283
284 emitField(pos, 8, id);
285 }
286
287 void
emitPRED(int pos,const Value * val)288 CodeEmitterGM107::emitPRED(int pos, const Value *val)
289 {
290 emitField(pos, 3, val ? val->reg.data.id : 7);
291 }
292
293 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)294 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
295 const ValueRef &ref)
296 {
297 const Value *v = ref.get();
298 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
299 if (gpr >= 0)
300 emitGPR(gpr, ref.getIndirect(0));
301 emitField(off, len, v->reg.data.offset >> shr);
302 }
303
304 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)305 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
306 const ValueRef &ref)
307 {
308 const Value *v = ref.get();
309 const Symbol *s = v->asSym();
310
311 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
312
313 emitField(buf, 5, v->reg.fileIndex);
314 if (gpr >= 0)
315 emitGPR(gpr, ref.getIndirect(0));
316 emitField(off, 16, s->reg.data.offset >> shr);
317 }
318
319 bool
longIMMD(const ValueRef & ref)320 CodeEmitterGM107::longIMMD(const ValueRef &ref)
321 {
322 if (ref.getFile() == FILE_IMMEDIATE) {
323 const ImmediateValue *imm = ref.get()->asImm();
324 if (isFloatType(insn->sType)) {
325 if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
326 return true;
327 } else {
328 if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
329 (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
330 return true;
331 }
332 }
333 return false;
334 }
335
336 void
emitIMMD(int pos,int len,const ValueRef & ref)337 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
338 {
339 const ImmediateValue *imm = ref.get()->asImm();
340 uint32_t val = imm->reg.data.u32;
341
342 if (len == 19) {
343 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
344 assert(!(val & 0x00000fff));
345 val >>= 12;
346 } else if (insn->sType == TYPE_F64) {
347 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
348 val = imm->reg.data.u64 >> 44;
349 }
350 assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
351 emitField( 56, 1, (val & 0x80000) >> 19);
352 emitField(pos, len, (val & 0x7ffff));
353 } else {
354 emitField(pos, len, val);
355 }
356 }
357
358 /*******************************************************************************
359 * modifiers
360 ******************************************************************************/
361
362 void
emitCond3(int pos,CondCode code)363 CodeEmitterGM107::emitCond3(int pos, CondCode code)
364 {
365 int data = 0;
366
367 switch (code) {
368 case CC_FL : data = 0x00; break;
369 case CC_LTU:
370 case CC_LT : data = 0x01; break;
371 case CC_EQU:
372 case CC_EQ : data = 0x02; break;
373 case CC_LEU:
374 case CC_LE : data = 0x03; break;
375 case CC_GTU:
376 case CC_GT : data = 0x04; break;
377 case CC_NEU:
378 case CC_NE : data = 0x05; break;
379 case CC_GEU:
380 case CC_GE : data = 0x06; break;
381 case CC_TR : data = 0x07; break;
382 default:
383 assert(!"invalid cond3");
384 break;
385 }
386
387 emitField(pos, 3, data);
388 }
389
390 void
emitCond4(int pos,CondCode code)391 CodeEmitterGM107::emitCond4(int pos, CondCode code)
392 {
393 int data = 0;
394
395 switch (code) {
396 case CC_FL: data = 0x00; break;
397 case CC_LT: data = 0x01; break;
398 case CC_EQ: data = 0x02; break;
399 case CC_LE: data = 0x03; break;
400 case CC_GT: data = 0x04; break;
401 case CC_NE: data = 0x05; break;
402 case CC_GE: data = 0x06; break;
403 // case CC_NUM: data = 0x07; break;
404 // case CC_NAN: data = 0x08; break;
405 case CC_LTU: data = 0x09; break;
406 case CC_EQU: data = 0x0a; break;
407 case CC_LEU: data = 0x0b; break;
408 case CC_GTU: data = 0x0c; break;
409 case CC_NEU: data = 0x0d; break;
410 case CC_GEU: data = 0x0e; break;
411 case CC_TR: data = 0x0f; break;
412 default:
413 assert(!"invalid cond4");
414 break;
415 }
416
417 emitField(pos, 4, data);
418 }
419
420 void
emitO(int pos)421 CodeEmitterGM107::emitO(int pos)
422 {
423 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
424 }
425
426 void
emitP(int pos)427 CodeEmitterGM107::emitP(int pos)
428 {
429 emitField(pos, 1, insn->perPatch);
430 }
431
432 void
emitSAT(int pos)433 CodeEmitterGM107::emitSAT(int pos)
434 {
435 emitField(pos, 1, insn->saturate);
436 }
437
438 void
emitCC(int pos)439 CodeEmitterGM107::emitCC(int pos)
440 {
441 emitField(pos, 1, insn->flagsDef >= 0);
442 }
443
444 void
emitX(int pos)445 CodeEmitterGM107::emitX(int pos)
446 {
447 emitField(pos, 1, insn->flagsSrc >= 0);
448 }
449
450 void
emitABS(int pos,const ValueRef & ref)451 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
452 {
453 emitField(pos, 1, ref.mod.abs());
454 }
455
456 void
emitNEG(int pos,const ValueRef & ref)457 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
458 {
459 emitField(pos, 1, ref.mod.neg());
460 }
461
462 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)463 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
464 {
465 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
466 }
467
468 void
emitFMZ(int pos,int len)469 CodeEmitterGM107::emitFMZ(int pos, int len)
470 {
471 emitField(pos, len, insn->dnz << 1 | insn->ftz);
472 }
473
474 void
emitRND(int rmp,RoundMode rnd,int rip)475 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
476 {
477 int rm = 0, ri = 0;
478 switch (rnd) {
479 case ROUND_NI: ri = 1;
480 case ROUND_N : rm = 0; break;
481 case ROUND_MI: ri = 1;
482 case ROUND_M : rm = 1; break;
483 case ROUND_PI: ri = 1;
484 case ROUND_P : rm = 2; break;
485 case ROUND_ZI: ri = 1;
486 case ROUND_Z : rm = 3; break;
487 default:
488 assert(!"invalid round mode");
489 break;
490 }
491 emitField(rip, 1, ri);
492 emitField(rmp, 2, rm);
493 }
494
495 void
emitPDIV(int pos)496 CodeEmitterGM107::emitPDIV(int pos)
497 {
498 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
499 if (insn->postFactor > 0)
500 emitField(pos, 3, 7 - insn->postFactor);
501 else
502 emitField(pos, 3, 0 - insn->postFactor);
503 }
504
505 void
emitINV(int pos,const ValueRef & ref)506 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
507 {
508 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
509 }
510
511 /*******************************************************************************
512 * control flow
513 ******************************************************************************/
514
515 void
emitEXIT()516 CodeEmitterGM107::emitEXIT()
517 {
518 emitInsn (0xe3000000);
519 emitCond5(0x00, CC_TR);
520 }
521
522 void
emitBRA()523 CodeEmitterGM107::emitBRA()
524 {
525 const FlowInstruction *insn = this->insn->asFlow();
526 int gpr = -1;
527
528 if (insn->indirect) {
529 if (insn->absolute)
530 emitInsn(0xe2000000); // JMX
531 else
532 emitInsn(0xe2500000); // BRX
533 gpr = 0x08;
534 } else {
535 if (insn->absolute)
536 emitInsn(0xe2100000); // JMP
537 else
538 emitInsn(0xe2400000); // BRA
539 emitField(0x07, 1, insn->allWarp);
540 }
541
542 emitField(0x06, 1, insn->limit);
543 emitCond5(0x00, CC_TR);
544
545 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
546 int32_t pos = insn->target.bb->binPos;
547 if (writeIssueDelays && !(pos & 0x1f))
548 pos += 8;
549 if (!insn->absolute)
550 emitField(0x14, 24, pos - (codeSize + 8));
551 else
552 emitField(0x14, 32, pos);
553 } else {
554 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
555 emitField(0x05, 1, 1);
556 }
557 }
558
559 void
emitCAL()560 CodeEmitterGM107::emitCAL()
561 {
562 const FlowInstruction *insn = this->insn->asFlow();
563
564 if (insn->absolute) {
565 emitInsn(0xe2200000, 0); // JCAL
566 } else {
567 emitInsn(0xe2600000, 0); // CAL
568 }
569
570 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
571 if (!insn->absolute)
572 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
573 else {
574 if (insn->builtin) {
575 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
576 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
577 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
578 } else {
579 emitField(0x14, 32, insn->target.bb->binPos);
580 }
581 }
582 } else {
583 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
584 emitField(0x05, 1, 1);
585 }
586 }
587
588 void
emitPCNT()589 CodeEmitterGM107::emitPCNT()
590 {
591 const FlowInstruction *insn = this->insn->asFlow();
592
593 emitInsn(0xe2b00000, 0);
594
595 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
596 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
597 } else {
598 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
599 emitField(0x05, 1, 1);
600 }
601 }
602
603 void
emitCONT()604 CodeEmitterGM107::emitCONT()
605 {
606 emitInsn (0xe3500000);
607 emitCond5(0x00, CC_TR);
608 }
609
610 void
emitPBK()611 CodeEmitterGM107::emitPBK()
612 {
613 const FlowInstruction *insn = this->insn->asFlow();
614
615 emitInsn(0xe2a00000, 0);
616
617 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
618 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
619 } else {
620 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
621 emitField(0x05, 1, 1);
622 }
623 }
624
625 void
emitBRK()626 CodeEmitterGM107::emitBRK()
627 {
628 emitInsn (0xe3400000);
629 emitCond5(0x00, CC_TR);
630 }
631
632 void
emitPRET()633 CodeEmitterGM107::emitPRET()
634 {
635 const FlowInstruction *insn = this->insn->asFlow();
636
637 emitInsn(0xe2700000, 0);
638
639 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
640 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
641 } else {
642 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
643 emitField(0x05, 1, 1);
644 }
645 }
646
647 void
emitRET()648 CodeEmitterGM107::emitRET()
649 {
650 emitInsn (0xe3200000);
651 emitCond5(0x00, CC_TR);
652 }
653
654 void
emitSSY()655 CodeEmitterGM107::emitSSY()
656 {
657 const FlowInstruction *insn = this->insn->asFlow();
658
659 emitInsn(0xe2900000, 0);
660
661 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
662 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
663 } else {
664 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
665 emitField(0x05, 1, 1);
666 }
667 }
668
669 void
emitSYNC()670 CodeEmitterGM107::emitSYNC()
671 {
672 emitInsn (0xf0f80000);
673 emitCond5(0x00, CC_TR);
674 }
675
676 void
emitSAM()677 CodeEmitterGM107::emitSAM()
678 {
679 emitInsn(0xe3700000, 0);
680 }
681
682 void
emitRAM()683 CodeEmitterGM107::emitRAM()
684 {
685 emitInsn(0xe3800000, 0);
686 }
687
688 /*******************************************************************************
689 * predicate/cc
690 ******************************************************************************/
691
692 /*******************************************************************************
693 * movement / conversion
694 ******************************************************************************/
695
696 void
emitMOV()697 CodeEmitterGM107::emitMOV()
698 {
699 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
700 switch (insn->src(0).getFile()) {
701 case FILE_GPR:
702 if (insn->def(0).getFile() == FILE_PREDICATE) {
703 emitInsn(0x5b6a0000);
704 emitGPR (0x08);
705 } else {
706 emitInsn(0x5c980000);
707 }
708 emitGPR (0x14, insn->src(0));
709 break;
710 case FILE_MEMORY_CONST:
711 emitInsn(0x4c980000);
712 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
713 break;
714 case FILE_IMMEDIATE:
715 emitInsn(0x38980000);
716 emitIMMD(0x14, 19, insn->src(0));
717 break;
718 case FILE_PREDICATE:
719 emitInsn(0x50880000);
720 emitPRED(0x0c, insn->src(0));
721 emitPRED(0x1d);
722 emitPRED(0x27);
723 break;
724 default:
725 assert(!"bad src file");
726 break;
727 }
728 if (insn->def(0).getFile() != FILE_PREDICATE &&
729 insn->src(0).getFile() != FILE_PREDICATE)
730 emitField(0x27, 4, insn->lanes);
731 } else {
732 emitInsn (0x01000000);
733 emitIMMD (0x14, 32, insn->src(0));
734 emitField(0x0c, 4, insn->lanes);
735 }
736
737 if (insn->def(0).getFile() == FILE_PREDICATE) {
738 emitPRED(0x27);
739 emitPRED(0x03, insn->def(0));
740 emitPRED(0x00);
741 } else {
742 emitGPR(0x00, insn->def(0));
743 }
744 }
745
746 void
emitS2R()747 CodeEmitterGM107::emitS2R()
748 {
749 emitInsn(0xf0c80000);
750 emitSYS (0x14, insn->src(0));
751 emitGPR (0x00, insn->def(0));
752 }
753
754 void
emitF2F()755 CodeEmitterGM107::emitF2F()
756 {
757 RoundMode rnd = insn->rnd;
758
759 switch (insn->op) {
760 case OP_FLOOR: rnd = ROUND_MI; break;
761 case OP_CEIL : rnd = ROUND_PI; break;
762 case OP_TRUNC: rnd = ROUND_ZI; break;
763 default:
764 break;
765 }
766
767 switch (insn->src(0).getFile()) {
768 case FILE_GPR:
769 emitInsn(0x5ca80000);
770 emitGPR (0x14, insn->src(0));
771 break;
772 case FILE_MEMORY_CONST:
773 emitInsn(0x4ca80000);
774 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
775 break;
776 case FILE_IMMEDIATE:
777 emitInsn(0x38a80000);
778 emitIMMD(0x14, 19, insn->src(0));
779 break;
780 default:
781 assert(!"bad src0 file");
782 break;
783 }
784
785 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
786 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
787 emitCC (0x2f);
788 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
789 emitFMZ (0x2c, 1);
790 emitField(0x29, 1, insn->subOp);
791 emitRND (0x27, rnd, 0x2a);
792 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
793 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
794 emitGPR (0x00, insn->def(0));
795 }
796
797 void
emitF2I()798 CodeEmitterGM107::emitF2I()
799 {
800 RoundMode rnd = insn->rnd;
801
802 switch (insn->op) {
803 case OP_FLOOR: rnd = ROUND_M; break;
804 case OP_CEIL : rnd = ROUND_P; break;
805 case OP_TRUNC: rnd = ROUND_Z; break;
806 default:
807 break;
808 }
809
810 switch (insn->src(0).getFile()) {
811 case FILE_GPR:
812 emitInsn(0x5cb00000);
813 emitGPR (0x14, insn->src(0));
814 break;
815 case FILE_MEMORY_CONST:
816 emitInsn(0x4cb00000);
817 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
818 break;
819 case FILE_IMMEDIATE:
820 emitInsn(0x38b00000);
821 emitIMMD(0x14, 19, insn->src(0));
822 break;
823 default:
824 assert(!"bad src0 file");
825 break;
826 }
827
828 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
829 emitCC (0x2f);
830 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
831 emitFMZ (0x2c, 1);
832 emitRND (0x27, rnd, 0x2a);
833 emitField(0x0c, 1, isSignedType(insn->dType));
834 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
835 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
836 emitGPR (0x00, insn->def(0));
837 }
838
839 void
emitI2F()840 CodeEmitterGM107::emitI2F()
841 {
842 RoundMode rnd = insn->rnd;
843
844 switch (insn->op) {
845 case OP_FLOOR: rnd = ROUND_M; break;
846 case OP_CEIL : rnd = ROUND_P; break;
847 case OP_TRUNC: rnd = ROUND_Z; break;
848 default:
849 break;
850 }
851
852 switch (insn->src(0).getFile()) {
853 case FILE_GPR:
854 emitInsn(0x5cb80000);
855 emitGPR (0x14, insn->src(0));
856 break;
857 case FILE_MEMORY_CONST:
858 emitInsn(0x4cb80000);
859 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
860 break;
861 case FILE_IMMEDIATE:
862 emitInsn(0x38b80000);
863 emitIMMD(0x14, 19, insn->src(0));
864 break;
865 default:
866 assert(!"bad src0 file");
867 break;
868 }
869
870 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
871 emitCC (0x2f);
872 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
873 emitField(0x29, 2, insn->subOp);
874 emitRND (0x27, rnd, -1);
875 emitField(0x0d, 1, isSignedType(insn->sType));
876 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
877 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
878 emitGPR (0x00, insn->def(0));
879 }
880
881 void
emitI2I()882 CodeEmitterGM107::emitI2I()
883 {
884 switch (insn->src(0).getFile()) {
885 case FILE_GPR:
886 emitInsn(0x5ce00000);
887 emitGPR (0x14, insn->src(0));
888 break;
889 case FILE_MEMORY_CONST:
890 emitInsn(0x4ce00000);
891 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
892 break;
893 case FILE_IMMEDIATE:
894 emitInsn(0x38e00000);
895 emitIMMD(0x14, 19, insn->src(0));
896 break;
897 default:
898 assert(!"bad src0 file");
899 break;
900 }
901
902 emitSAT (0x32);
903 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
904 emitCC (0x2f);
905 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
906 emitField(0x29, 2, insn->subOp);
907 emitField(0x0d, 1, isSignedType(insn->sType));
908 emitField(0x0c, 1, isSignedType(insn->dType));
909 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
910 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
911 emitGPR (0x00, insn->def(0));
912 }
913
914 static void
selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)915 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
916 {
917 int loc = entry->loc;
918 if (data.force_persample_interp)
919 code[loc + 1] |= 1 << 10;
920 else
921 code[loc + 1] &= ~(1 << 10);
922 }
923
924 void
emitSEL()925 CodeEmitterGM107::emitSEL()
926 {
927 switch (insn->src(1).getFile()) {
928 case FILE_GPR:
929 emitInsn(0x5ca00000);
930 emitGPR (0x14, insn->src(1));
931 break;
932 case FILE_MEMORY_CONST:
933 emitInsn(0x4ca00000);
934 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
935 break;
936 case FILE_IMMEDIATE:
937 emitInsn(0x38a00000);
938 emitIMMD(0x14, 19, insn->src(1));
939 break;
940 default:
941 assert(!"bad src1 file");
942 break;
943 }
944
945 emitINV (0x2a, insn->src(2));
946 emitPRED(0x27, insn->src(2));
947 emitGPR (0x08, insn->src(0));
948 emitGPR (0x00, insn->def(0));
949
950 if (insn->subOp == 1) {
951 addInterp(0, 0, selpFlip);
952 }
953 }
954
955 void
emitSHFL()956 CodeEmitterGM107::emitSHFL()
957 {
958 int type = 0;
959
960 emitInsn (0xef100000);
961
962 switch (insn->src(1).getFile()) {
963 case FILE_GPR:
964 emitGPR(0x14, insn->src(1));
965 break;
966 case FILE_IMMEDIATE:
967 emitIMMD(0x14, 5, insn->src(1));
968 type |= 1;
969 break;
970 default:
971 assert(!"invalid src1 file");
972 break;
973 }
974
975 switch (insn->src(2).getFile()) {
976 case FILE_GPR:
977 emitGPR(0x27, insn->src(2));
978 break;
979 case FILE_IMMEDIATE:
980 emitIMMD(0x22, 13, insn->src(2));
981 type |= 2;
982 break;
983 default:
984 assert(!"invalid src2 file");
985 break;
986 }
987
988 if (!insn->defExists(1))
989 emitPRED(0x30);
990 else {
991 assert(insn->def(1).getFile() == FILE_PREDICATE);
992 emitPRED(0x30, insn->def(1));
993 }
994
995 emitField(0x1e, 2, insn->subOp);
996 emitField(0x1c, 2, type);
997 emitGPR (0x08, insn->src(0));
998 emitGPR (0x00, insn->def(0));
999 }
1000
1001 /*******************************************************************************
1002 * double
1003 ******************************************************************************/
1004
1005 void
emitDADD()1006 CodeEmitterGM107::emitDADD()
1007 {
1008 switch (insn->src(1).getFile()) {
1009 case FILE_GPR:
1010 emitInsn(0x5c700000);
1011 emitGPR (0x14, insn->src(1));
1012 break;
1013 case FILE_MEMORY_CONST:
1014 emitInsn(0x4c700000);
1015 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1016 break;
1017 case FILE_IMMEDIATE:
1018 emitInsn(0x38700000);
1019 emitIMMD(0x14, 19, insn->src(1));
1020 break;
1021 default:
1022 assert(!"bad src1 file");
1023 break;
1024 }
1025 emitABS(0x31, insn->src(1));
1026 emitNEG(0x30, insn->src(0));
1027 emitCC (0x2f);
1028 emitABS(0x2e, insn->src(0));
1029 emitNEG(0x2d, insn->src(1));
1030
1031 if (insn->op == OP_SUB)
1032 code[1] ^= 0x00002000;
1033
1034 emitGPR(0x08, insn->src(0));
1035 emitGPR(0x00, insn->def(0));
1036 }
1037
1038 void
emitDMUL()1039 CodeEmitterGM107::emitDMUL()
1040 {
1041 switch (insn->src(1).getFile()) {
1042 case FILE_GPR:
1043 emitInsn(0x5c800000);
1044 emitGPR (0x14, insn->src(1));
1045 break;
1046 case FILE_MEMORY_CONST:
1047 emitInsn(0x4c800000);
1048 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1049 break;
1050 case FILE_IMMEDIATE:
1051 emitInsn(0x38800000);
1052 emitIMMD(0x14, 19, insn->src(1));
1053 break;
1054 default:
1055 assert(!"bad src1 file");
1056 break;
1057 }
1058
1059 emitNEG2(0x30, insn->src(0), insn->src(1));
1060 emitCC (0x2f);
1061 emitRND (0x27);
1062 emitGPR (0x08, insn->src(0));
1063 emitGPR (0x00, insn->def(0));
1064 }
1065
1066 void
emitDFMA()1067 CodeEmitterGM107::emitDFMA()
1068 {
1069 switch(insn->src(2).getFile()) {
1070 case FILE_GPR:
1071 switch (insn->src(1).getFile()) {
1072 case FILE_GPR:
1073 emitInsn(0x5b700000);
1074 emitGPR (0x14, insn->src(1));
1075 break;
1076 case FILE_MEMORY_CONST:
1077 emitInsn(0x4b700000);
1078 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1079 break;
1080 case FILE_IMMEDIATE:
1081 emitInsn(0x36700000);
1082 emitIMMD(0x14, 19, insn->src(1));
1083 break;
1084 default:
1085 assert(!"bad src1 file");
1086 break;
1087 }
1088 emitGPR (0x27, insn->src(2));
1089 break;
1090 case FILE_MEMORY_CONST:
1091 emitInsn(0x53700000);
1092 emitGPR (0x27, insn->src(1));
1093 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1094 break;
1095 default:
1096 assert(!"bad src2 file");
1097 break;
1098 }
1099
1100 emitRND (0x32);
1101 emitNEG (0x31, insn->src(2));
1102 emitNEG2(0x30, insn->src(0), insn->src(1));
1103 emitCC (0x2f);
1104 emitGPR (0x08, insn->src(0));
1105 emitGPR (0x00, insn->def(0));
1106 }
1107
1108 void
emitDMNMX()1109 CodeEmitterGM107::emitDMNMX()
1110 {
1111 switch (insn->src(1).getFile()) {
1112 case FILE_GPR:
1113 emitInsn(0x5c500000);
1114 emitGPR (0x14, insn->src(1));
1115 break;
1116 case FILE_MEMORY_CONST:
1117 emitInsn(0x4c500000);
1118 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1119 break;
1120 case FILE_IMMEDIATE:
1121 emitInsn(0x38500000);
1122 emitIMMD(0x14, 19, insn->src(1));
1123 break;
1124 default:
1125 assert(!"bad src1 file");
1126 break;
1127 }
1128
1129 emitABS (0x31, insn->src(1));
1130 emitNEG (0x30, insn->src(0));
1131 emitCC (0x2f);
1132 emitABS (0x2e, insn->src(0));
1133 emitNEG (0x2d, insn->src(1));
1134 emitField(0x2a, 1, insn->op == OP_MAX);
1135 emitPRED (0x27);
1136 emitGPR (0x08, insn->src(0));
1137 emitGPR (0x00, insn->def(0));
1138 }
1139
1140 void
emitDSET()1141 CodeEmitterGM107::emitDSET()
1142 {
1143 const CmpInstruction *insn = this->insn->asCmp();
1144
1145 switch (insn->src(1).getFile()) {
1146 case FILE_GPR:
1147 emitInsn(0x59000000);
1148 emitGPR (0x14, insn->src(1));
1149 break;
1150 case FILE_MEMORY_CONST:
1151 emitInsn(0x49000000);
1152 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1153 break;
1154 case FILE_IMMEDIATE:
1155 emitInsn(0x32000000);
1156 emitIMMD(0x14, 19, insn->src(1));
1157 break;
1158 default:
1159 assert(!"bad src1 file");
1160 break;
1161 }
1162
1163 if (insn->op != OP_SET) {
1164 switch (insn->op) {
1165 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1166 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1167 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1168 default:
1169 assert(!"invalid set op");
1170 break;
1171 }
1172 emitPRED(0x27, insn->src(2));
1173 } else {
1174 emitPRED(0x27);
1175 }
1176
1177 emitABS (0x36, insn->src(0));
1178 emitNEG (0x35, insn->src(1));
1179 emitField(0x34, 1, insn->dType == TYPE_F32);
1180 emitCond4(0x30, insn->setCond);
1181 emitCC (0x2f);
1182 emitABS (0x2c, insn->src(1));
1183 emitNEG (0x2b, insn->src(0));
1184 emitGPR (0x08, insn->src(0));
1185 emitGPR (0x00, insn->def(0));
1186 }
1187
1188 void
emitDSETP()1189 CodeEmitterGM107::emitDSETP()
1190 {
1191 const CmpInstruction *insn = this->insn->asCmp();
1192
1193 switch (insn->src(1).getFile()) {
1194 case FILE_GPR:
1195 emitInsn(0x5b800000);
1196 emitGPR (0x14, insn->src(1));
1197 break;
1198 case FILE_MEMORY_CONST:
1199 emitInsn(0x4b800000);
1200 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1201 break;
1202 case FILE_IMMEDIATE:
1203 emitInsn(0x36800000);
1204 emitIMMD(0x14, 19, insn->src(1));
1205 break;
1206 default:
1207 assert(!"bad src1 file");
1208 break;
1209 }
1210
1211 if (insn->op != OP_SET) {
1212 switch (insn->op) {
1213 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1214 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1215 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1216 default:
1217 assert(!"invalid set op");
1218 break;
1219 }
1220 emitPRED(0x27, insn->src(2));
1221 } else {
1222 emitPRED(0x27);
1223 }
1224
1225 emitCond4(0x30, insn->setCond);
1226 emitABS (0x2c, insn->src(1));
1227 emitNEG (0x2b, insn->src(0));
1228 emitGPR (0x08, insn->src(0));
1229 emitABS (0x07, insn->src(0));
1230 emitNEG (0x06, insn->src(1));
1231 emitPRED (0x03, insn->def(0));
1232 if (insn->defExists(1))
1233 emitPRED(0x00, insn->def(1));
1234 else
1235 emitPRED(0x00);
1236 }
1237
1238 /*******************************************************************************
1239 * float
1240 ******************************************************************************/
1241
1242 void
emitFADD()1243 CodeEmitterGM107::emitFADD()
1244 {
1245 if (!longIMMD(insn->src(1))) {
1246 switch (insn->src(1).getFile()) {
1247 case FILE_GPR:
1248 emitInsn(0x5c580000);
1249 emitGPR (0x14, insn->src(1));
1250 break;
1251 case FILE_MEMORY_CONST:
1252 emitInsn(0x4c580000);
1253 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1254 break;
1255 case FILE_IMMEDIATE:
1256 emitInsn(0x38580000);
1257 emitIMMD(0x14, 19, insn->src(1));
1258 break;
1259 default:
1260 assert(!"bad src1 file");
1261 break;
1262 }
1263 emitSAT(0x32);
1264 emitABS(0x31, insn->src(1));
1265 emitNEG(0x30, insn->src(0));
1266 emitCC (0x2f);
1267 emitABS(0x2e, insn->src(0));
1268 emitNEG(0x2d, insn->src(1));
1269 emitFMZ(0x2c, 1);
1270
1271 if (insn->op == OP_SUB)
1272 code[1] ^= 0x00002000;
1273 } else {
1274 emitInsn(0x08000000);
1275 emitABS(0x39, insn->src(1));
1276 emitNEG(0x38, insn->src(0));
1277 emitFMZ(0x37, 1);
1278 emitABS(0x36, insn->src(0));
1279 emitNEG(0x35, insn->src(1));
1280 emitCC (0x34);
1281 emitIMMD(0x14, 32, insn->src(1));
1282
1283 if (insn->op == OP_SUB)
1284 code[1] ^= 0x00080000;
1285 }
1286
1287 emitGPR(0x08, insn->src(0));
1288 emitGPR(0x00, insn->def(0));
1289 }
1290
1291 void
emitFMUL()1292 CodeEmitterGM107::emitFMUL()
1293 {
1294 if (!longIMMD(insn->src(1))) {
1295 switch (insn->src(1).getFile()) {
1296 case FILE_GPR:
1297 emitInsn(0x5c680000);
1298 emitGPR (0x14, insn->src(1));
1299 break;
1300 case FILE_MEMORY_CONST:
1301 emitInsn(0x4c680000);
1302 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1303 break;
1304 case FILE_IMMEDIATE:
1305 emitInsn(0x38680000);
1306 emitIMMD(0x14, 19, insn->src(1));
1307 break;
1308 default:
1309 assert(!"bad src1 file");
1310 break;
1311 }
1312 emitSAT (0x32);
1313 emitNEG2(0x30, insn->src(0), insn->src(1));
1314 emitCC (0x2f);
1315 emitFMZ (0x2c, 2);
1316 emitPDIV(0x29);
1317 emitRND (0x27);
1318 } else {
1319 emitInsn(0x1e000000);
1320 emitSAT (0x37);
1321 emitFMZ (0x35, 2);
1322 emitCC (0x34);
1323 emitIMMD(0x14, 32, insn->src(1));
1324 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1325 code[1] ^= 0x00080000; /* flip immd sign bit */
1326 }
1327
1328 emitGPR(0x08, insn->src(0));
1329 emitGPR(0x00, insn->def(0));
1330 }
1331
1332 void
emitFFMA()1333 CodeEmitterGM107::emitFFMA()
1334 {
1335 bool isLongIMMD = false;
1336 switch(insn->src(2).getFile()) {
1337 case FILE_GPR:
1338 switch (insn->src(1).getFile()) {
1339 case FILE_GPR:
1340 emitInsn(0x59800000);
1341 emitGPR (0x14, insn->src(1));
1342 break;
1343 case FILE_MEMORY_CONST:
1344 emitInsn(0x49800000);
1345 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1346 break;
1347 case FILE_IMMEDIATE:
1348 if (longIMMD(insn->getSrc(1))) {
1349 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1350 isLongIMMD = true;
1351 emitInsn(0x0c000000);
1352 emitIMMD(0x14, 32, insn->src(1));
1353 } else {
1354 emitInsn(0x32800000);
1355 emitIMMD(0x14, 19, insn->src(1));
1356 }
1357 break;
1358 default:
1359 assert(!"bad src1 file");
1360 break;
1361 }
1362 if (!isLongIMMD)
1363 emitGPR (0x27, insn->src(2));
1364 break;
1365 case FILE_MEMORY_CONST:
1366 emitInsn(0x51800000);
1367 emitGPR (0x27, insn->src(1));
1368 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1369 break;
1370 default:
1371 assert(!"bad src2 file");
1372 break;
1373 }
1374
1375 if (isLongIMMD) {
1376 emitNEG (0x39, insn->src(2));
1377 emitNEG2(0x38, insn->src(0), insn->src(1));
1378 emitSAT (0x37);
1379 emitCC (0x34);
1380 } else {
1381 emitRND (0x33);
1382 emitSAT (0x32);
1383 emitNEG (0x31, insn->src(2));
1384 emitNEG2(0x30, insn->src(0), insn->src(1));
1385 emitCC (0x2f);
1386 }
1387
1388 emitFMZ(0x35, 2);
1389 emitGPR(0x08, insn->src(0));
1390 emitGPR(0x00, insn->def(0));
1391 }
1392
1393 void
emitMUFU()1394 CodeEmitterGM107::emitMUFU()
1395 {
1396 int mufu = 0;
1397
1398 switch (insn->op) {
1399 case OP_COS: mufu = 0; break;
1400 case OP_SIN: mufu = 1; break;
1401 case OP_EX2: mufu = 2; break;
1402 case OP_LG2: mufu = 3; break;
1403 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1404 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1405 default:
1406 assert(!"invalid mufu");
1407 break;
1408 }
1409
1410 emitInsn (0x50800000);
1411 emitSAT (0x32);
1412 emitNEG (0x30, insn->src(0));
1413 emitABS (0x2e, insn->src(0));
1414 emitField(0x14, 3, mufu);
1415 emitGPR (0x08, insn->src(0));
1416 emitGPR (0x00, insn->def(0));
1417 }
1418
1419 void
emitFMNMX()1420 CodeEmitterGM107::emitFMNMX()
1421 {
1422 switch (insn->src(1).getFile()) {
1423 case FILE_GPR:
1424 emitInsn(0x5c600000);
1425 emitGPR (0x14, insn->src(1));
1426 break;
1427 case FILE_MEMORY_CONST:
1428 emitInsn(0x4c600000);
1429 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1430 break;
1431 case FILE_IMMEDIATE:
1432 emitInsn(0x38600000);
1433 emitIMMD(0x14, 19, insn->src(1));
1434 break;
1435 default:
1436 assert(!"bad src1 file");
1437 break;
1438 }
1439
1440 emitField(0x2a, 1, insn->op == OP_MAX);
1441 emitPRED (0x27);
1442
1443 emitABS(0x31, insn->src(1));
1444 emitNEG(0x30, insn->src(0));
1445 emitCC (0x2f);
1446 emitABS(0x2e, insn->src(0));
1447 emitNEG(0x2d, insn->src(1));
1448 emitFMZ(0x2c, 1);
1449 emitGPR(0x08, insn->src(0));
1450 emitGPR(0x00, insn->def(0));
1451 }
1452
1453 void
emitRRO()1454 CodeEmitterGM107::emitRRO()
1455 {
1456 switch (insn->src(0).getFile()) {
1457 case FILE_GPR:
1458 emitInsn(0x5c900000);
1459 emitGPR (0x14, insn->src(0));
1460 break;
1461 case FILE_MEMORY_CONST:
1462 emitInsn(0x4c900000);
1463 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1464 break;
1465 case FILE_IMMEDIATE:
1466 emitInsn(0x38900000);
1467 emitIMMD(0x14, 19, insn->src(0));
1468 break;
1469 default:
1470 assert(!"bad src file");
1471 break;
1472 }
1473
1474 emitABS (0x31, insn->src(0));
1475 emitNEG (0x2d, insn->src(0));
1476 emitField(0x27, 1, insn->op == OP_PREEX2);
1477 emitGPR (0x00, insn->def(0));
1478 }
1479
1480 void
emitFCMP()1481 CodeEmitterGM107::emitFCMP()
1482 {
1483 const CmpInstruction *insn = this->insn->asCmp();
1484 CondCode cc = insn->setCond;
1485
1486 if (insn->src(2).mod.neg())
1487 cc = reverseCondCode(cc);
1488
1489 switch(insn->src(2).getFile()) {
1490 case FILE_GPR:
1491 switch (insn->src(1).getFile()) {
1492 case FILE_GPR:
1493 emitInsn(0x5ba00000);
1494 emitGPR (0x14, insn->src(1));
1495 break;
1496 case FILE_MEMORY_CONST:
1497 emitInsn(0x4ba00000);
1498 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1499 break;
1500 case FILE_IMMEDIATE:
1501 emitInsn(0x36a00000);
1502 emitIMMD(0x14, 19, insn->src(1));
1503 break;
1504 default:
1505 assert(!"bad src1 file");
1506 break;
1507 }
1508 emitGPR (0x27, insn->src(2));
1509 break;
1510 case FILE_MEMORY_CONST:
1511 emitInsn(0x53a00000);
1512 emitGPR (0x27, insn->src(1));
1513 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1514 break;
1515 default:
1516 assert(!"bad src2 file");
1517 break;
1518 }
1519
1520 emitCond4(0x30, cc);
1521 emitFMZ (0x2f, 1);
1522 emitGPR (0x08, insn->src(0));
1523 emitGPR (0x00, insn->def(0));
1524 }
1525
1526 void
emitFSET()1527 CodeEmitterGM107::emitFSET()
1528 {
1529 const CmpInstruction *insn = this->insn->asCmp();
1530
1531 switch (insn->src(1).getFile()) {
1532 case FILE_GPR:
1533 emitInsn(0x58000000);
1534 emitGPR (0x14, insn->src(1));
1535 break;
1536 case FILE_MEMORY_CONST:
1537 emitInsn(0x48000000);
1538 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1539 break;
1540 case FILE_IMMEDIATE:
1541 emitInsn(0x30000000);
1542 emitIMMD(0x14, 19, insn->src(1));
1543 break;
1544 default:
1545 assert(!"bad src1 file");
1546 break;
1547 }
1548
1549 if (insn->op != OP_SET) {
1550 switch (insn->op) {
1551 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1552 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1553 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1554 default:
1555 assert(!"invalid set op");
1556 break;
1557 }
1558 emitPRED(0x27, insn->src(2));
1559 } else {
1560 emitPRED(0x27);
1561 }
1562
1563 emitFMZ (0x37, 1);
1564 emitABS (0x36, insn->src(0));
1565 emitNEG (0x35, insn->src(1));
1566 emitField(0x34, 1, insn->dType == TYPE_F32);
1567 emitCond4(0x30, insn->setCond);
1568 emitCC (0x2f);
1569 emitABS (0x2c, insn->src(1));
1570 emitNEG (0x2b, insn->src(0));
1571 emitGPR (0x08, insn->src(0));
1572 emitGPR (0x00, insn->def(0));
1573 }
1574
1575 void
emitFSETP()1576 CodeEmitterGM107::emitFSETP()
1577 {
1578 const CmpInstruction *insn = this->insn->asCmp();
1579
1580 switch (insn->src(1).getFile()) {
1581 case FILE_GPR:
1582 emitInsn(0x5bb00000);
1583 emitGPR (0x14, insn->src(1));
1584 break;
1585 case FILE_MEMORY_CONST:
1586 emitInsn(0x4bb00000);
1587 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1588 break;
1589 case FILE_IMMEDIATE:
1590 emitInsn(0x36b00000);
1591 emitIMMD(0x14, 19, insn->src(1));
1592 break;
1593 default:
1594 assert(!"bad src1 file");
1595 break;
1596 }
1597
1598 if (insn->op != OP_SET) {
1599 switch (insn->op) {
1600 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1601 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1602 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1603 default:
1604 assert(!"invalid set op");
1605 break;
1606 }
1607 emitPRED(0x27, insn->src(2));
1608 } else {
1609 emitPRED(0x27);
1610 }
1611
1612 emitCond4(0x30, insn->setCond);
1613 emitFMZ (0x2f, 1);
1614 emitABS (0x2c, insn->src(1));
1615 emitNEG (0x2b, insn->src(0));
1616 emitGPR (0x08, insn->src(0));
1617 emitABS (0x07, insn->src(0));
1618 emitNEG (0x06, insn->src(1));
1619 emitPRED (0x03, insn->def(0));
1620 if (insn->defExists(1))
1621 emitPRED(0x00, insn->def(1));
1622 else
1623 emitPRED(0x00);
1624 }
1625
1626 void
emitFSWZADD()1627 CodeEmitterGM107::emitFSWZADD()
1628 {
1629 emitInsn (0x50f80000);
1630 emitCC (0x2f);
1631 emitFMZ (0x2c, 1);
1632 emitRND (0x27);
1633 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1634 emitField(0x1c, 8, insn->subOp);
1635 if (insn->predSrc != 1)
1636 emitGPR (0x14, insn->src(1));
1637 else
1638 emitGPR (0x14);
1639 emitGPR (0x08, insn->src(0));
1640 emitGPR (0x00, insn->def(0));
1641 }
1642
1643 /*******************************************************************************
1644 * integer
1645 ******************************************************************************/
1646
1647 void
emitLOP()1648 CodeEmitterGM107::emitLOP()
1649 {
1650 int lop = 0;
1651
1652 switch (insn->op) {
1653 case OP_AND: lop = 0; break;
1654 case OP_OR : lop = 1; break;
1655 case OP_XOR: lop = 2; break;
1656 default:
1657 assert(!"invalid lop");
1658 break;
1659 }
1660
1661 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1662 switch (insn->src(1).getFile()) {
1663 case FILE_GPR:
1664 emitInsn(0x5c400000);
1665 emitGPR (0x14, insn->src(1));
1666 break;
1667 case FILE_MEMORY_CONST:
1668 emitInsn(0x4c400000);
1669 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1670 break;
1671 case FILE_IMMEDIATE:
1672 emitInsn(0x38400000);
1673 emitIMMD(0x14, 19, insn->src(1));
1674 break;
1675 default:
1676 assert(!"bad src1 file");
1677 break;
1678 }
1679 emitPRED (0x30);
1680 emitCC (0x2f);
1681 emitX (0x2b);
1682 emitField(0x29, 2, lop);
1683 emitINV (0x28, insn->src(1));
1684 emitINV (0x27, insn->src(0));
1685 } else {
1686 emitInsn (0x04000000);
1687 emitX (0x39);
1688 emitINV (0x38, insn->src(1));
1689 emitINV (0x37, insn->src(0));
1690 emitField(0x35, 2, lop);
1691 emitCC (0x34);
1692 emitIMMD (0x14, 32, insn->src(1));
1693 }
1694
1695 emitGPR (0x08, insn->src(0));
1696 emitGPR (0x00, insn->def(0));
1697 }
1698
1699 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1700 void
emitNOT()1701 CodeEmitterGM107::emitNOT()
1702 {
1703 if (!longIMMD(insn->src(0))) {
1704 switch (insn->src(0).getFile()) {
1705 case FILE_GPR:
1706 emitInsn(0x5c400700);
1707 emitGPR (0x14, insn->src(0));
1708 break;
1709 case FILE_MEMORY_CONST:
1710 emitInsn(0x4c400700);
1711 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1712 break;
1713 case FILE_IMMEDIATE:
1714 emitInsn(0x38400700);
1715 emitIMMD(0x14, 19, insn->src(0));
1716 break;
1717 default:
1718 assert(!"bad src1 file");
1719 break;
1720 }
1721 emitPRED (0x30);
1722 } else {
1723 emitInsn (0x05600000);
1724 emitIMMD (0x14, 32, insn->src(1));
1725 }
1726
1727 emitGPR(0x08);
1728 emitGPR(0x00, insn->def(0));
1729 }
1730
1731 void
emitIADD()1732 CodeEmitterGM107::emitIADD()
1733 {
1734 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1735 switch (insn->src(1).getFile()) {
1736 case FILE_GPR:
1737 emitInsn(0x5c100000);
1738 emitGPR (0x14, insn->src(1));
1739 break;
1740 case FILE_MEMORY_CONST:
1741 emitInsn(0x4c100000);
1742 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1743 break;
1744 case FILE_IMMEDIATE:
1745 emitInsn(0x38100000);
1746 emitIMMD(0x14, 19, insn->src(1));
1747 break;
1748 default:
1749 assert(!"bad src1 file");
1750 break;
1751 }
1752 emitSAT(0x32);
1753 emitNEG(0x31, insn->src(0));
1754 emitNEG(0x30, insn->src(1));
1755 emitCC (0x2f);
1756 emitX (0x2b);
1757 } else {
1758 emitInsn(0x1c000000);
1759 emitNEG (0x38, insn->src(0));
1760 emitSAT (0x36);
1761 emitX (0x35);
1762 emitCC (0x34);
1763 emitIMMD(0x14, 32, insn->src(1));
1764 }
1765
1766 if (insn->op == OP_SUB)
1767 code[1] ^= 0x00010000;
1768
1769 emitGPR(0x08, insn->src(0));
1770 emitGPR(0x00, insn->def(0));
1771 }
1772
1773 void
emitIMUL()1774 CodeEmitterGM107::emitIMUL()
1775 {
1776 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1777 switch (insn->src(1).getFile()) {
1778 case FILE_GPR:
1779 emitInsn(0x5c380000);
1780 emitGPR (0x14, insn->src(1));
1781 break;
1782 case FILE_MEMORY_CONST:
1783 emitInsn(0x4c380000);
1784 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1785 break;
1786 case FILE_IMMEDIATE:
1787 emitInsn(0x38380000);
1788 emitIMMD(0x14, 19, insn->src(1));
1789 break;
1790 default:
1791 assert(!"bad src1 file");
1792 break;
1793 }
1794 emitCC (0x2f);
1795 emitField(0x29, 1, isSignedType(insn->sType));
1796 emitField(0x28, 1, isSignedType(insn->dType));
1797 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1798 } else {
1799 emitInsn (0x1f000000);
1800 emitField(0x37, 1, isSignedType(insn->sType));
1801 emitField(0x36, 1, isSignedType(insn->dType));
1802 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1803 emitCC (0x34);
1804 emitIMMD (0x14, 32, insn->src(1));
1805 }
1806
1807 emitGPR(0x08, insn->src(0));
1808 emitGPR(0x00, insn->def(0));
1809 }
1810
1811 void
emitIMAD()1812 CodeEmitterGM107::emitIMAD()
1813 {
1814 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1815 switch(insn->src(2).getFile()) {
1816 case FILE_GPR:
1817 switch (insn->src(1).getFile()) {
1818 case FILE_GPR:
1819 emitInsn(0x5a000000);
1820 emitGPR (0x14, insn->src(1));
1821 break;
1822 case FILE_MEMORY_CONST:
1823 emitInsn(0x4a000000);
1824 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1825 break;
1826 case FILE_IMMEDIATE:
1827 emitInsn(0x34000000);
1828 emitIMMD(0x14, 19, insn->src(1));
1829 break;
1830 default:
1831 assert(!"bad src1 file");
1832 break;
1833 }
1834 emitGPR (0x27, insn->src(2));
1835 break;
1836 case FILE_MEMORY_CONST:
1837 emitInsn(0x52000000);
1838 emitGPR (0x27, insn->src(1));
1839 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1840 break;
1841 default:
1842 assert(!"bad src2 file");
1843 break;
1844 }
1845
1846 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1847 emitField(0x35, 1, isSignedType(insn->sType));
1848 emitNEG (0x34, insn->src(2));
1849 emitNEG2 (0x33, insn->src(0), insn->src(1));
1850 emitSAT (0x32);
1851 emitX (0x31);
1852 emitField(0x30, 1, isSignedType(insn->dType));
1853 emitCC (0x2f);
1854 emitGPR (0x08, insn->src(0));
1855 emitGPR (0x00, insn->def(0));
1856 }
1857
1858 void
emitISCADD()1859 CodeEmitterGM107::emitISCADD()
1860 {
1861 assert(insn->src(1).get()->asImm());
1862
1863 switch (insn->src(2).getFile()) {
1864 case FILE_GPR:
1865 emitInsn(0x5c180000);
1866 emitGPR (0x14, insn->src(2));
1867 break;
1868 case FILE_MEMORY_CONST:
1869 emitInsn(0x4c180000);
1870 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1871 break;
1872 case FILE_IMMEDIATE:
1873 emitInsn(0x38180000);
1874 emitIMMD(0x14, 19, insn->src(2));
1875 break;
1876 default:
1877 assert(!"bad src1 file");
1878 break;
1879 }
1880 emitNEG (0x31, insn->src(0));
1881 emitNEG (0x30, insn->src(2));
1882 emitCC (0x2f);
1883 emitIMMD(0x27, 5, insn->src(1));
1884 emitGPR (0x08, insn->src(0));
1885 emitGPR (0x00, insn->def(0));
1886 }
1887
1888 void
emitIMNMX()1889 CodeEmitterGM107::emitIMNMX()
1890 {
1891 switch (insn->src(1).getFile()) {
1892 case FILE_GPR:
1893 emitInsn(0x5c200000);
1894 emitGPR (0x14, insn->src(1));
1895 break;
1896 case FILE_MEMORY_CONST:
1897 emitInsn(0x4c200000);
1898 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1899 break;
1900 case FILE_IMMEDIATE:
1901 emitInsn(0x38200000);
1902 emitIMMD(0x14, 19, insn->src(1));
1903 break;
1904 default:
1905 assert(!"bad src1 file");
1906 break;
1907 }
1908
1909 emitField(0x30, 1, isSignedType(insn->dType));
1910 emitCC (0x2f);
1911 emitField(0x2b, 2, insn->subOp);
1912 emitField(0x2a, 1, insn->op == OP_MAX);
1913 emitPRED (0x27);
1914 emitGPR (0x08, insn->src(0));
1915 emitGPR (0x00, insn->def(0));
1916 }
1917
1918 void
emitICMP()1919 CodeEmitterGM107::emitICMP()
1920 {
1921 const CmpInstruction *insn = this->insn->asCmp();
1922 CondCode cc = insn->setCond;
1923
1924 if (insn->src(2).mod.neg())
1925 cc = reverseCondCode(cc);
1926
1927 switch(insn->src(2).getFile()) {
1928 case FILE_GPR:
1929 switch (insn->src(1).getFile()) {
1930 case FILE_GPR:
1931 emitInsn(0x5b400000);
1932 emitGPR (0x14, insn->src(1));
1933 break;
1934 case FILE_MEMORY_CONST:
1935 emitInsn(0x4b400000);
1936 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1937 break;
1938 case FILE_IMMEDIATE:
1939 emitInsn(0x36400000);
1940 emitIMMD(0x14, 19, insn->src(1));
1941 break;
1942 default:
1943 assert(!"bad src1 file");
1944 break;
1945 }
1946 emitGPR (0x27, insn->src(2));
1947 break;
1948 case FILE_MEMORY_CONST:
1949 emitInsn(0x53400000);
1950 emitGPR (0x27, insn->src(1));
1951 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1952 break;
1953 default:
1954 assert(!"bad src2 file");
1955 break;
1956 }
1957
1958 emitCond3(0x31, cc);
1959 emitField(0x30, 1, isSignedType(insn->sType));
1960 emitGPR (0x08, insn->src(0));
1961 emitGPR (0x00, insn->def(0));
1962 }
1963
1964 void
emitISET()1965 CodeEmitterGM107::emitISET()
1966 {
1967 const CmpInstruction *insn = this->insn->asCmp();
1968
1969 switch (insn->src(1).getFile()) {
1970 case FILE_GPR:
1971 emitInsn(0x5b500000);
1972 emitGPR (0x14, insn->src(1));
1973 break;
1974 case FILE_MEMORY_CONST:
1975 emitInsn(0x4b500000);
1976 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1977 break;
1978 case FILE_IMMEDIATE:
1979 emitInsn(0x36500000);
1980 emitIMMD(0x14, 19, insn->src(1));
1981 break;
1982 default:
1983 assert(!"bad src1 file");
1984 break;
1985 }
1986
1987 if (insn->op != OP_SET) {
1988 switch (insn->op) {
1989 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1990 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1991 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1992 default:
1993 assert(!"invalid set op");
1994 break;
1995 }
1996 emitPRED(0x27, insn->src(2));
1997 } else {
1998 emitPRED(0x27);
1999 }
2000
2001 emitCond3(0x31, insn->setCond);
2002 emitField(0x30, 1, isSignedType(insn->sType));
2003 emitCC (0x2f);
2004 emitField(0x2c, 1, insn->dType == TYPE_F32);
2005 emitX (0x2b);
2006 emitGPR (0x08, insn->src(0));
2007 emitGPR (0x00, insn->def(0));
2008 }
2009
2010 void
emitISETP()2011 CodeEmitterGM107::emitISETP()
2012 {
2013 const CmpInstruction *insn = this->insn->asCmp();
2014
2015 switch (insn->src(1).getFile()) {
2016 case FILE_GPR:
2017 emitInsn(0x5b600000);
2018 emitGPR (0x14, insn->src(1));
2019 break;
2020 case FILE_MEMORY_CONST:
2021 emitInsn(0x4b600000);
2022 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2023 break;
2024 case FILE_IMMEDIATE:
2025 emitInsn(0x36600000);
2026 emitIMMD(0x14, 19, insn->src(1));
2027 break;
2028 default:
2029 assert(!"bad src1 file");
2030 break;
2031 }
2032
2033 if (insn->op != OP_SET) {
2034 switch (insn->op) {
2035 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2036 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2037 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2038 default:
2039 assert(!"invalid set op");
2040 break;
2041 }
2042 emitPRED(0x27, insn->src(2));
2043 } else {
2044 emitPRED(0x27);
2045 }
2046
2047 emitCond3(0x31, insn->setCond);
2048 emitField(0x30, 1, isSignedType(insn->sType));
2049 emitX (0x2b);
2050 emitGPR (0x08, insn->src(0));
2051 emitPRED (0x03, insn->def(0));
2052 if (insn->defExists(1))
2053 emitPRED(0x00, insn->def(1));
2054 else
2055 emitPRED(0x00);
2056 }
2057
2058 void
emitSHL()2059 CodeEmitterGM107::emitSHL()
2060 {
2061 switch (insn->src(1).getFile()) {
2062 case FILE_GPR:
2063 emitInsn(0x5c480000);
2064 emitGPR (0x14, insn->src(1));
2065 break;
2066 case FILE_MEMORY_CONST:
2067 emitInsn(0x4c480000);
2068 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2069 break;
2070 case FILE_IMMEDIATE:
2071 emitInsn(0x38480000);
2072 emitIMMD(0x14, 19, insn->src(1));
2073 break;
2074 default:
2075 assert(!"bad src1 file");
2076 break;
2077 }
2078
2079 emitCC (0x2f);
2080 emitX (0x2b);
2081 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2082 emitGPR (0x08, insn->src(0));
2083 emitGPR (0x00, insn->def(0));
2084 }
2085
2086 void
emitSHR()2087 CodeEmitterGM107::emitSHR()
2088 {
2089 switch (insn->src(1).getFile()) {
2090 case FILE_GPR:
2091 emitInsn(0x5c280000);
2092 emitGPR (0x14, insn->src(1));
2093 break;
2094 case FILE_MEMORY_CONST:
2095 emitInsn(0x4c280000);
2096 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2097 break;
2098 case FILE_IMMEDIATE:
2099 emitInsn(0x38280000);
2100 emitIMMD(0x14, 19, insn->src(1));
2101 break;
2102 default:
2103 assert(!"bad src1 file");
2104 break;
2105 }
2106
2107 emitField(0x30, 1, isSignedType(insn->dType));
2108 emitCC (0x2f);
2109 emitX (0x2c);
2110 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2111 emitGPR (0x08, insn->src(0));
2112 emitGPR (0x00, insn->def(0));
2113 }
2114
2115 void
emitSHF()2116 CodeEmitterGM107::emitSHF()
2117 {
2118 unsigned type;
2119
2120 switch (insn->src(1).getFile()) {
2121 case FILE_GPR:
2122 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2123 emitGPR(0x14, insn->src(1));
2124 break;
2125 case FILE_IMMEDIATE:
2126 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2127 emitIMMD(0x14, 19, insn->src(1));
2128 break;
2129 default:
2130 assert(!"bad src1 file");
2131 break;
2132 }
2133
2134 switch (insn->sType) {
2135 case TYPE_U64:
2136 type = 2;
2137 break;
2138 case TYPE_S64:
2139 type = 3;
2140 break;
2141 default:
2142 type = 0;
2143 break;
2144 }
2145
2146 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2147 emitX (0x31);
2148 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2149 emitCC (0x2f);
2150 emitGPR (0x27, insn->src(2));
2151 emitField(0x25, 2, type);
2152 emitGPR (0x08, insn->src(0));
2153 emitGPR (0x00, insn->def(0));
2154 }
2155
2156 void
emitPOPC()2157 CodeEmitterGM107::emitPOPC()
2158 {
2159 switch (insn->src(0).getFile()) {
2160 case FILE_GPR:
2161 emitInsn(0x5c080000);
2162 emitGPR (0x14, insn->src(0));
2163 break;
2164 case FILE_MEMORY_CONST:
2165 emitInsn(0x4c080000);
2166 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2167 break;
2168 case FILE_IMMEDIATE:
2169 emitInsn(0x38080000);
2170 emitIMMD(0x14, 19, insn->src(0));
2171 break;
2172 default:
2173 assert(!"bad src1 file");
2174 break;
2175 }
2176
2177 emitINV(0x28, insn->src(0));
2178 emitGPR(0x00, insn->def(0));
2179 }
2180
2181 void
emitBFI()2182 CodeEmitterGM107::emitBFI()
2183 {
2184 switch(insn->src(2).getFile()) {
2185 case FILE_GPR:
2186 switch (insn->src(1).getFile()) {
2187 case FILE_GPR:
2188 emitInsn(0x5bf00000);
2189 emitGPR (0x14, insn->src(1));
2190 break;
2191 case FILE_MEMORY_CONST:
2192 emitInsn(0x4bf00000);
2193 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2194 break;
2195 case FILE_IMMEDIATE:
2196 emitInsn(0x36f00000);
2197 emitIMMD(0x14, 19, insn->src(1));
2198 break;
2199 default:
2200 assert(!"bad src1 file");
2201 break;
2202 }
2203 emitGPR (0x27, insn->src(2));
2204 break;
2205 case FILE_MEMORY_CONST:
2206 emitInsn(0x53f00000);
2207 emitGPR (0x27, insn->src(1));
2208 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2209 break;
2210 default:
2211 assert(!"bad src2 file");
2212 break;
2213 }
2214
2215 emitCC (0x2f);
2216 emitGPR (0x08, insn->src(0));
2217 emitGPR (0x00, insn->def(0));
2218 }
2219
2220 void
emitBFE()2221 CodeEmitterGM107::emitBFE()
2222 {
2223 switch (insn->src(1).getFile()) {
2224 case FILE_GPR:
2225 emitInsn(0x5c000000);
2226 emitGPR (0x14, insn->src(1));
2227 break;
2228 case FILE_MEMORY_CONST:
2229 emitInsn(0x4c000000);
2230 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2231 break;
2232 case FILE_IMMEDIATE:
2233 emitInsn(0x38000000);
2234 emitIMMD(0x14, 19, insn->src(1));
2235 break;
2236 default:
2237 assert(!"bad src1 file");
2238 break;
2239 }
2240
2241 emitField(0x30, 1, isSignedType(insn->dType));
2242 emitCC (0x2f);
2243 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2244 emitGPR (0x08, insn->src(0));
2245 emitGPR (0x00, insn->def(0));
2246 }
2247
2248 void
emitFLO()2249 CodeEmitterGM107::emitFLO()
2250 {
2251 switch (insn->src(0).getFile()) {
2252 case FILE_GPR:
2253 emitInsn(0x5c300000);
2254 emitGPR (0x14, insn->src(0));
2255 break;
2256 case FILE_MEMORY_CONST:
2257 emitInsn(0x4c300000);
2258 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2259 break;
2260 case FILE_IMMEDIATE:
2261 emitInsn(0x38300000);
2262 emitIMMD(0x14, 19, insn->src(0));
2263 break;
2264 default:
2265 assert(!"bad src1 file");
2266 break;
2267 }
2268
2269 emitField(0x30, 1, isSignedType(insn->dType));
2270 emitCC (0x2f);
2271 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2272 emitINV (0x28, insn->src(0));
2273 emitGPR (0x00, insn->def(0));
2274 }
2275
2276 /*******************************************************************************
2277 * memory
2278 ******************************************************************************/
2279
2280 void
emitLDSTs(int pos,DataType type)2281 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2282 {
2283 int data = 0;
2284
2285 switch (typeSizeof(type)) {
2286 case 1: data = isSignedType(type) ? 1 : 0; break;
2287 case 2: data = isSignedType(type) ? 3 : 2; break;
2288 case 4: data = 4; break;
2289 case 8: data = 5; break;
2290 case 16: data = 6; break;
2291 default:
2292 assert(!"bad type");
2293 break;
2294 }
2295
2296 emitField(pos, 3, data);
2297 }
2298
2299 void
emitLDSTc(int pos)2300 CodeEmitterGM107::emitLDSTc(int pos)
2301 {
2302 int mode = 0;
2303
2304 switch (insn->cache) {
2305 case CACHE_CA: mode = 0; break;
2306 case CACHE_CG: mode = 1; break;
2307 case CACHE_CS: mode = 2; break;
2308 case CACHE_CV: mode = 3; break;
2309 default:
2310 assert(!"invalid caching mode");
2311 break;
2312 }
2313
2314 emitField(pos, 2, mode);
2315 }
2316
2317 void
emitLDC()2318 CodeEmitterGM107::emitLDC()
2319 {
2320 emitInsn (0xef900000);
2321 emitLDSTs(0x30, insn->dType);
2322 emitField(0x2c, 2, insn->subOp);
2323 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2324 emitGPR (0x00, insn->def(0));
2325 }
2326
2327 void
emitLDL()2328 CodeEmitterGM107::emitLDL()
2329 {
2330 emitInsn (0xef400000);
2331 emitLDSTs(0x30, insn->dType);
2332 emitLDSTc(0x2c);
2333 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2334 emitGPR (0x00, insn->def(0));
2335 }
2336
2337 void
emitLDS()2338 CodeEmitterGM107::emitLDS()
2339 {
2340 emitInsn (0xef480000);
2341 emitLDSTs(0x30, insn->dType);
2342 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2343 emitGPR (0x00, insn->def(0));
2344 }
2345
2346 void
emitLD()2347 CodeEmitterGM107::emitLD()
2348 {
2349 emitInsn (0x80000000);
2350 emitPRED (0x3a);
2351 emitLDSTc(0x38);
2352 emitLDSTs(0x35, insn->dType);
2353 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2354 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2355 emitGPR (0x00, insn->def(0));
2356 }
2357
2358 void
emitSTL()2359 CodeEmitterGM107::emitSTL()
2360 {
2361 emitInsn (0xef500000);
2362 emitLDSTs(0x30, insn->dType);
2363 emitLDSTc(0x2c);
2364 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2365 emitGPR (0x00, insn->src(1));
2366 }
2367
2368 void
emitSTS()2369 CodeEmitterGM107::emitSTS()
2370 {
2371 emitInsn (0xef580000);
2372 emitLDSTs(0x30, insn->dType);
2373 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2374 emitGPR (0x00, insn->src(1));
2375 }
2376
2377 void
emitST()2378 CodeEmitterGM107::emitST()
2379 {
2380 emitInsn (0xa0000000);
2381 emitPRED (0x3a);
2382 emitLDSTc(0x38);
2383 emitLDSTs(0x35, insn->dType);
2384 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2385 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2386 emitGPR (0x00, insn->src(1));
2387 }
2388
2389 void
emitALD()2390 CodeEmitterGM107::emitALD()
2391 {
2392 emitInsn (0xefd80000);
2393 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2394 emitGPR (0x27, insn->src(0).getIndirect(1));
2395 emitO (0x20);
2396 emitP (0x1f);
2397 emitADDR (0x08, 20, 10, 0, insn->src(0));
2398 emitGPR (0x00, insn->def(0));
2399 }
2400
2401 void
emitAST()2402 CodeEmitterGM107::emitAST()
2403 {
2404 emitInsn (0xeff00000);
2405 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2406 emitGPR (0x27, insn->src(0).getIndirect(1));
2407 emitP (0x1f);
2408 emitADDR (0x08, 20, 10, 0, insn->src(0));
2409 emitGPR (0x00, insn->src(1));
2410 }
2411
2412 void
emitISBERD()2413 CodeEmitterGM107::emitISBERD()
2414 {
2415 emitInsn(0xefd00000);
2416 emitGPR (0x08, insn->src(0));
2417 emitGPR (0x00, insn->def(0));
2418 }
2419
2420 void
emitAL2P()2421 CodeEmitterGM107::emitAL2P()
2422 {
2423 emitInsn (0xefa00000);
2424 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2425 emitPRED (0x2c);
2426 emitO (0x20);
2427 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2428 emitGPR (0x08, insn->src(0).getIndirect(0));
2429 emitGPR (0x00, insn->def(0));
2430 }
2431
2432 static void
interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2433 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2434 {
2435 int ipa = entry->ipa;
2436 int reg = entry->reg;
2437 int loc = entry->loc;
2438
2439 if (data.flatshade &&
2440 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2441 ipa = NV50_IR_INTERP_FLAT;
2442 reg = 0xff;
2443 } else if (data.force_persample_interp &&
2444 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2445 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2446 ipa |= NV50_IR_INTERP_CENTROID;
2447 }
2448 code[loc + 1] &= ~(0xf << 0x14);
2449 code[loc + 1] |= (ipa & 0x3) << 0x16;
2450 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2451 code[loc + 0] &= ~(0xff << 0x14);
2452 code[loc + 0] |= reg << 0x14;
2453 }
2454
2455 void
emitIPA()2456 CodeEmitterGM107::emitIPA()
2457 {
2458 int ipam = 0, ipas = 0;
2459
2460 switch (insn->getInterpMode()) {
2461 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2462 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2463 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2464 case NV50_IR_INTERP_SC : ipam = 3; break;
2465 default:
2466 assert(!"invalid ipa mode");
2467 break;
2468 }
2469
2470 switch (insn->getSampleMode()) {
2471 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2472 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2473 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2474 default:
2475 assert(!"invalid ipa sample mode");
2476 break;
2477 }
2478
2479 emitInsn (0xe0000000);
2480 emitField(0x36, 2, ipam);
2481 emitField(0x34, 2, ipas);
2482 emitSAT (0x33);
2483 emitField(0x2f, 3, 7);
2484 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2485 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2486 code[1] |= 0x00000040; /* .idx */
2487 emitGPR(0x00, insn->def(0));
2488
2489 if (insn->op == OP_PINTERP) {
2490 emitGPR(0x14, insn->src(1));
2491 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2492 emitGPR(0x27, insn->src(2));
2493 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2494 } else {
2495 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2496 emitGPR(0x27, insn->src(1));
2497 emitGPR(0x14);
2498 addInterp(insn->ipa, 0xff, interpApply);
2499 }
2500
2501 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2502 emitGPR(0x27);
2503 }
2504
2505 void
emitATOM()2506 CodeEmitterGM107::emitATOM()
2507 {
2508 unsigned dType, subOp;
2509
2510 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2511 switch (insn->dType) {
2512 case TYPE_U32: dType = 0; break;
2513 case TYPE_U64: dType = 1; break;
2514 default: assert(!"unexpected dType"); dType = 0; break;
2515 }
2516 subOp = 15;
2517
2518 emitInsn (0xee000000);
2519 } else {
2520 switch (insn->dType) {
2521 case TYPE_U32: dType = 0; break;
2522 case TYPE_S32: dType = 1; break;
2523 case TYPE_U64: dType = 2; break;
2524 case TYPE_F32: dType = 3; break;
2525 case TYPE_B128: dType = 4; break;
2526 case TYPE_S64: dType = 5; break;
2527 default: assert(!"unexpected dType"); dType = 0; break;
2528 }
2529 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2530 subOp = 8;
2531 else
2532 subOp = insn->subOp;
2533
2534 emitInsn (0xed000000);
2535 }
2536
2537 emitField(0x34, 4, subOp);
2538 emitField(0x31, 3, dType);
2539 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2540 emitGPR (0x14, insn->src(1));
2541 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2542 emitGPR (0x00, insn->def(0));
2543 }
2544
2545 void
emitATOMS()2546 CodeEmitterGM107::emitATOMS()
2547 {
2548 unsigned dType, subOp;
2549
2550 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2551 switch (insn->dType) {
2552 case TYPE_U32: dType = 0; break;
2553 case TYPE_U64: dType = 1; break;
2554 default: assert(!"unexpected dType"); dType = 0; break;
2555 }
2556 subOp = 4;
2557
2558 emitInsn (0xee000000);
2559 emitField(0x34, 1, dType);
2560 } else {
2561 switch (insn->dType) {
2562 case TYPE_U32: dType = 0; break;
2563 case TYPE_S32: dType = 1; break;
2564 case TYPE_U64: dType = 2; break;
2565 case TYPE_S64: dType = 3; break;
2566 default: assert(!"unexpected dType"); dType = 0; break;
2567 }
2568
2569 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2570 subOp = 8;
2571 else
2572 subOp = insn->subOp;
2573
2574 emitInsn (0xec000000);
2575 emitField(0x1c, 3, dType);
2576 }
2577
2578 emitField(0x34, 4, subOp);
2579 emitGPR (0x14, insn->src(1));
2580 emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2581 emitGPR (0x00, insn->def(0));
2582 }
2583
2584 void
emitRED()2585 CodeEmitterGM107::emitRED()
2586 {
2587 unsigned dType;
2588
2589 switch (insn->dType) {
2590 case TYPE_U32: dType = 0; break;
2591 case TYPE_S32: dType = 1; break;
2592 case TYPE_U64: dType = 2; break;
2593 case TYPE_F32: dType = 3; break;
2594 case TYPE_B128: dType = 4; break;
2595 case TYPE_S64: dType = 5; break;
2596 default: assert(!"unexpected dType"); dType = 0; break;
2597 }
2598
2599 emitInsn (0xebf80000);
2600 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2601 emitField(0x17, 3, insn->subOp);
2602 emitField(0x14, 3, dType);
2603 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2604 emitGPR (0x00, insn->src(1));
2605 }
2606
2607 void
emitCCTL()2608 CodeEmitterGM107::emitCCTL()
2609 {
2610 unsigned width;
2611 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2612 emitInsn(0xef600000);
2613 width = 30;
2614 } else {
2615 emitInsn(0xef800000);
2616 width = 22;
2617 }
2618 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2619 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2620 emitField(0x00, 4, insn->subOp);
2621 }
2622
2623 /*******************************************************************************
2624 * surface
2625 ******************************************************************************/
2626
2627 void
emitPIXLD()2628 CodeEmitterGM107::emitPIXLD()
2629 {
2630 emitInsn (0xefe80000);
2631 emitPRED (0x2d);
2632 emitField(0x1f, 3, insn->subOp);
2633 emitGPR (0x08, insn->src(0));
2634 emitGPR (0x00, insn->def(0));
2635 }
2636
2637 /*******************************************************************************
2638 * texture
2639 ******************************************************************************/
2640
2641 void
emitTEXs(int pos)2642 CodeEmitterGM107::emitTEXs(int pos)
2643 {
2644 int src1 = insn->predSrc == 1 ? 2 : 1;
2645 if (insn->srcExists(src1))
2646 emitGPR(pos, insn->src(src1));
2647 else
2648 emitGPR(pos);
2649 }
2650
2651 void
emitTEX()2652 CodeEmitterGM107::emitTEX()
2653 {
2654 const TexInstruction *insn = this->insn->asTex();
2655 int lodm = 0;
2656
2657 if (!insn->tex.levelZero) {
2658 switch (insn->op) {
2659 case OP_TEX: lodm = 0; break;
2660 case OP_TXB: lodm = 2; break;
2661 case OP_TXL: lodm = 3; break;
2662 default:
2663 assert(!"invalid tex op");
2664 break;
2665 }
2666 } else {
2667 lodm = 1;
2668 }
2669
2670 if (insn->tex.rIndirectSrc >= 0) {
2671 emitInsn (0xdeb80000);
2672 emitField(0x25, 2, lodm);
2673 emitField(0x24, 1, insn->tex.useOffsets == 1);
2674 } else {
2675 emitInsn (0xc0380000);
2676 emitField(0x37, 2, lodm);
2677 emitField(0x36, 1, insn->tex.useOffsets == 1);
2678 emitField(0x24, 13, insn->tex.r);
2679 }
2680
2681 emitField(0x32, 1, insn->tex.target.isShadow());
2682 emitField(0x31, 1, insn->tex.liveOnly);
2683 emitField(0x23, 1, insn->tex.derivAll);
2684 emitField(0x1f, 4, insn->tex.mask);
2685 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2686 insn->tex.target.getDim() - 1);
2687 emitField(0x1c, 1, insn->tex.target.isArray());
2688 emitTEXs (0x14);
2689 emitGPR (0x08, insn->src(0));
2690 emitGPR (0x00, insn->def(0));
2691 }
2692
2693 void
emitTLD()2694 CodeEmitterGM107::emitTLD()
2695 {
2696 const TexInstruction *insn = this->insn->asTex();
2697
2698 if (insn->tex.rIndirectSrc >= 0) {
2699 emitInsn (0xdd380000);
2700 } else {
2701 emitInsn (0xdc380000);
2702 emitField(0x24, 13, insn->tex.r);
2703 }
2704
2705 emitField(0x37, 1, insn->tex.levelZero == 0);
2706 emitField(0x32, 1, insn->tex.target.isMS());
2707 emitField(0x31, 1, insn->tex.liveOnly);
2708 emitField(0x23, 1, insn->tex.useOffsets == 1);
2709 emitField(0x1f, 4, insn->tex.mask);
2710 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2711 insn->tex.target.getDim() - 1);
2712 emitField(0x1c, 1, insn->tex.target.isArray());
2713 emitTEXs (0x14);
2714 emitGPR (0x08, insn->src(0));
2715 emitGPR (0x00, insn->def(0));
2716 }
2717
2718 void
emitTLD4()2719 CodeEmitterGM107::emitTLD4()
2720 {
2721 const TexInstruction *insn = this->insn->asTex();
2722
2723 if (insn->tex.rIndirectSrc >= 0) {
2724 emitInsn (0xdef80000);
2725 emitField(0x26, 2, insn->tex.gatherComp);
2726 emitField(0x25, 2, insn->tex.useOffsets == 4);
2727 emitField(0x24, 2, insn->tex.useOffsets == 1);
2728 } else {
2729 emitInsn (0xc8380000);
2730 emitField(0x38, 2, insn->tex.gatherComp);
2731 emitField(0x37, 2, insn->tex.useOffsets == 4);
2732 emitField(0x36, 2, insn->tex.useOffsets == 1);
2733 emitField(0x24, 13, insn->tex.r);
2734 }
2735
2736 emitField(0x32, 1, insn->tex.target.isShadow());
2737 emitField(0x31, 1, insn->tex.liveOnly);
2738 emitField(0x23, 1, insn->tex.derivAll);
2739 emitField(0x1f, 4, insn->tex.mask);
2740 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2741 insn->tex.target.getDim() - 1);
2742 emitField(0x1c, 1, insn->tex.target.isArray());
2743 emitTEXs (0x14);
2744 emitGPR (0x08, insn->src(0));
2745 emitGPR (0x00, insn->def(0));
2746 }
2747
2748 void
emitTXD()2749 CodeEmitterGM107::emitTXD()
2750 {
2751 const TexInstruction *insn = this->insn->asTex();
2752
2753 if (insn->tex.rIndirectSrc >= 0) {
2754 emitInsn (0xde780000);
2755 } else {
2756 emitInsn (0xde380000);
2757 emitField(0x24, 13, insn->tex.r);
2758 }
2759
2760 emitField(0x31, 1, insn->tex.liveOnly);
2761 emitField(0x23, 1, insn->tex.useOffsets == 1);
2762 emitField(0x1f, 4, insn->tex.mask);
2763 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2764 insn->tex.target.getDim() - 1);
2765 emitField(0x1c, 1, insn->tex.target.isArray());
2766 emitTEXs (0x14);
2767 emitGPR (0x08, insn->src(0));
2768 emitGPR (0x00, insn->def(0));
2769 }
2770
2771 void
emitTMML()2772 CodeEmitterGM107::emitTMML()
2773 {
2774 const TexInstruction *insn = this->insn->asTex();
2775
2776 if (insn->tex.rIndirectSrc >= 0) {
2777 emitInsn (0xdf600000);
2778 } else {
2779 emitInsn (0xdf580000);
2780 emitField(0x24, 13, insn->tex.r);
2781 }
2782
2783 emitField(0x31, 1, insn->tex.liveOnly);
2784 emitField(0x23, 1, insn->tex.derivAll);
2785 emitField(0x1f, 4, insn->tex.mask);
2786 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2787 insn->tex.target.getDim() - 1);
2788 emitField(0x1c, 1, insn->tex.target.isArray());
2789 emitTEXs (0x14);
2790 emitGPR (0x08, insn->src(0));
2791 emitGPR (0x00, insn->def(0));
2792 }
2793
2794 void
emitTXQ()2795 CodeEmitterGM107::emitTXQ()
2796 {
2797 const TexInstruction *insn = this->insn->asTex();
2798 int type = 0;
2799
2800 switch (insn->tex.query) {
2801 case TXQ_DIMS : type = 0x01; break;
2802 case TXQ_TYPE : type = 0x02; break;
2803 case TXQ_SAMPLE_POSITION: type = 0x05; break;
2804 case TXQ_FILTER : type = 0x10; break;
2805 case TXQ_LOD : type = 0x12; break;
2806 case TXQ_WRAP : type = 0x14; break;
2807 case TXQ_BORDER_COLOUR : type = 0x16; break;
2808 default:
2809 assert(!"invalid txq query");
2810 break;
2811 }
2812
2813 if (insn->tex.rIndirectSrc >= 0) {
2814 emitInsn (0xdf500000);
2815 } else {
2816 emitInsn (0xdf480000);
2817 emitField(0x24, 13, insn->tex.r);
2818 }
2819
2820 emitField(0x31, 1, insn->tex.liveOnly);
2821 emitField(0x1f, 4, insn->tex.mask);
2822 emitField(0x16, 6, type);
2823 emitGPR (0x08, insn->src(0));
2824 emitGPR (0x00, insn->def(0));
2825 }
2826
2827 void
emitDEPBAR()2828 CodeEmitterGM107::emitDEPBAR()
2829 {
2830 emitInsn (0xf0f00000);
2831 emitField(0x1d, 1, 1); /* le */
2832 emitField(0x1a, 3, 5);
2833 emitField(0x14, 6, insn->subOp);
2834 emitField(0x00, 6, insn->subOp);
2835 }
2836
2837 /*******************************************************************************
2838 * misc
2839 ******************************************************************************/
2840
2841 void
emitNOP()2842 CodeEmitterGM107::emitNOP()
2843 {
2844 emitInsn(0x50b00000);
2845 }
2846
2847 void
emitKIL()2848 CodeEmitterGM107::emitKIL()
2849 {
2850 emitInsn (0xe3300000);
2851 emitCond5(0x00, CC_TR);
2852 }
2853
2854 void
emitOUT()2855 CodeEmitterGM107::emitOUT()
2856 {
2857 const int cut = insn->op == OP_RESTART || insn->subOp;
2858 const int emit = insn->op == OP_EMIT;
2859
2860 switch (insn->src(1).getFile()) {
2861 case FILE_GPR:
2862 emitInsn(0xfbe00000);
2863 emitGPR (0x14, insn->src(1));
2864 break;
2865 case FILE_IMMEDIATE:
2866 emitInsn(0xf6e00000);
2867 emitIMMD(0x14, 19, insn->src(1));
2868 break;
2869 case FILE_MEMORY_CONST:
2870 emitInsn(0xebe00000);
2871 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2872 break;
2873 default:
2874 assert(!"bad src1 file");
2875 break;
2876 }
2877
2878 emitField(0x27, 2, (cut << 1) | emit);
2879 emitGPR (0x08, insn->src(0));
2880 emitGPR (0x00, insn->def(0));
2881 }
2882
2883 void
emitBAR()2884 CodeEmitterGM107::emitBAR()
2885 {
2886 uint8_t subop;
2887
2888 emitInsn (0xf0a80000);
2889
2890 switch (insn->subOp) {
2891 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2892 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
2893 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
2894 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
2895 default:
2896 subop = 0x80;
2897 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2898 break;
2899 }
2900
2901 emitField(0x20, 8, subop);
2902
2903 // barrier id
2904 if (insn->src(0).getFile() == FILE_GPR) {
2905 emitGPR(0x08, insn->src(0));
2906 } else {
2907 ImmediateValue *imm = insn->getSrc(0)->asImm();
2908 assert(imm);
2909 emitField(0x08, 8, imm->reg.data.u32);
2910 emitField(0x2b, 1, 1);
2911 }
2912
2913 // thread count
2914 if (insn->src(1).getFile() == FILE_GPR) {
2915 emitGPR(0x14, insn->src(1));
2916 } else {
2917 ImmediateValue *imm = insn->getSrc(0)->asImm();
2918 assert(imm);
2919 emitField(0x14, 12, imm->reg.data.u32);
2920 emitField(0x2c, 1, 1);
2921 }
2922
2923 if (insn->srcExists(2) && (insn->predSrc != 2)) {
2924 emitPRED (0x27, insn->src(2));
2925 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2926 } else {
2927 emitField(0x27, 3, 7);
2928 }
2929 }
2930
2931 void
emitMEMBAR()2932 CodeEmitterGM107::emitMEMBAR()
2933 {
2934 emitInsn (0xef980000);
2935 emitField(0x08, 2, insn->subOp >> 2);
2936 }
2937
2938 void
emitVOTE()2939 CodeEmitterGM107::emitVOTE()
2940 {
2941 const ImmediateValue *imm;
2942 uint32_t u32;
2943
2944 int r = -1, p = -1;
2945 for (int i = 0; insn->defExists(i); i++) {
2946 if (insn->def(i).getFile() == FILE_GPR)
2947 r = i;
2948 else if (insn->def(i).getFile() == FILE_PREDICATE)
2949 p = i;
2950 }
2951
2952 emitInsn (0x50d80000);
2953 emitField(0x30, 2, insn->subOp);
2954 if (r >= 0)
2955 emitGPR (0x00, insn->def(r));
2956 else
2957 emitGPR (0x00);
2958 if (p >= 0)
2959 emitPRED (0x2d, insn->def(p));
2960 else
2961 emitPRED (0x2d);
2962
2963 switch (insn->src(0).getFile()) {
2964 case FILE_PREDICATE:
2965 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2966 emitPRED (0x27, insn->src(0));
2967 break;
2968 case FILE_IMMEDIATE:
2969 imm = insn->getSrc(0)->asImm();
2970 assert(imm);
2971 u32 = imm->reg.data.u32;
2972 assert(u32 == 0 || u32 == 1);
2973 emitPRED(0x27);
2974 emitField(0x2a, 1, u32 == 0);
2975 break;
2976 default:
2977 assert(!"Unhandled src");
2978 break;
2979 }
2980 }
2981
2982 void
emitSUTarget()2983 CodeEmitterGM107::emitSUTarget()
2984 {
2985 const TexInstruction *insn = this->insn->asTex();
2986 int target = 0;
2987
2988 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2989
2990 if (insn->tex.target == TEX_TARGET_BUFFER) {
2991 target = 2;
2992 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2993 target = 4;
2994 } else if (insn->tex.target == TEX_TARGET_2D ||
2995 insn->tex.target == TEX_TARGET_RECT) {
2996 target = 6;
2997 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2998 insn->tex.target == TEX_TARGET_CUBE ||
2999 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3000 target = 8;
3001 } else if (insn->tex.target == TEX_TARGET_3D) {
3002 target = 10;
3003 } else {
3004 assert(insn->tex.target == TEX_TARGET_1D);
3005 }
3006 emitField(0x20, 4, target);
3007 }
3008
3009 void
emitSUHandle(const int s)3010 CodeEmitterGM107::emitSUHandle(const int s)
3011 {
3012 const TexInstruction *insn = this->insn->asTex();
3013
3014 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3015
3016 if (insn->src(s).getFile() == FILE_GPR) {
3017 emitGPR(0x27, insn->src(s));
3018 } else {
3019 ImmediateValue *imm = insn->getSrc(s)->asImm();
3020 assert(imm);
3021 emitField(0x33, 1, 1);
3022 emitField(0x24, 13, imm->reg.data.u32);
3023 }
3024 }
3025
3026 void
emitSUSTx()3027 CodeEmitterGM107::emitSUSTx()
3028 {
3029 const TexInstruction *insn = this->insn->asTex();
3030
3031 emitInsn(0xeb200000);
3032 if (insn->op == OP_SUSTB)
3033 emitField(0x34, 1, 1);
3034 emitSUTarget();
3035
3036 emitLDSTc(0x18);
3037 emitField(0x14, 4, 0xf); // rgba
3038 emitGPR (0x08, insn->src(0));
3039 emitGPR (0x00, insn->src(1));
3040
3041 emitSUHandle(2);
3042 }
3043
3044 void
emitSULDx()3045 CodeEmitterGM107::emitSULDx()
3046 {
3047 const TexInstruction *insn = this->insn->asTex();
3048 int type = 0;
3049
3050 emitInsn(0xeb000000);
3051 if (insn->op == OP_SULDB)
3052 emitField(0x34, 1, 1);
3053 emitSUTarget();
3054
3055 switch (insn->dType) {
3056 case TYPE_S8: type = 1; break;
3057 case TYPE_U16: type = 2; break;
3058 case TYPE_S16: type = 3; break;
3059 case TYPE_U32: type = 4; break;
3060 case TYPE_U64: type = 5; break;
3061 case TYPE_B128: type = 6; break;
3062 default:
3063 assert(insn->dType == TYPE_U8);
3064 break;
3065 }
3066 emitLDSTc(0x18);
3067 emitField(0x14, 3, type);
3068 emitGPR (0x00, insn->def(0));
3069 emitGPR (0x08, insn->src(0));
3070
3071 emitSUHandle(1);
3072 }
3073
3074 void
emitSUREDx()3075 CodeEmitterGM107::emitSUREDx()
3076 {
3077 const TexInstruction *insn = this->insn->asTex();
3078 uint8_t type = 0, subOp;
3079
3080 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3081 emitInsn(0xeac00000);
3082 else
3083 emitInsn(0xea600000);
3084
3085 if (insn->op == OP_SUREDB)
3086 emitField(0x34, 1, 1);
3087 emitSUTarget();
3088
3089 // destination type
3090 switch (insn->dType) {
3091 case TYPE_S32: type = 1; break;
3092 case TYPE_U64: type = 2; break;
3093 case TYPE_F32: type = 3; break;
3094 case TYPE_S64: type = 5; break;
3095 default:
3096 assert(insn->dType == TYPE_U32);
3097 break;
3098 }
3099
3100 // atomic operation
3101 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3102 subOp = 0;
3103 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3104 subOp = 8;
3105 } else {
3106 subOp = insn->subOp;
3107 }
3108
3109 emitField(0x24, 3, type);
3110 emitField(0x1d, 4, subOp);
3111 emitGPR (0x14, insn->src(1));
3112 emitGPR (0x08, insn->src(0));
3113 emitGPR (0x00, insn->def(0));
3114
3115 emitSUHandle(2);
3116 }
3117
3118 /*******************************************************************************
3119 * assembler front-end
3120 ******************************************************************************/
3121
3122 bool
emitInstruction(Instruction * i)3123 CodeEmitterGM107::emitInstruction(Instruction *i)
3124 {
3125 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3126 bool ret = true;
3127
3128 insn = i;
3129
3130 if (insn->encSize != 8) {
3131 ERROR("skipping undecodable instruction: "); insn->print();
3132 return false;
3133 } else
3134 if (codeSize + size > codeSizeLimit) {
3135 ERROR("code emitter output buffer too small\n");
3136 return false;
3137 }
3138
3139 if (writeIssueDelays) {
3140 int n = ((codeSize & 0x1f) / 8) - 1;
3141 if (n < 0) {
3142 data = code;
3143 data[0] = 0x00000000;
3144 data[1] = 0x00000000;
3145 code += 2;
3146 codeSize += 8;
3147 n++;
3148 }
3149
3150 emitField(data, n * 21, 21, insn->sched);
3151 }
3152
3153 switch (insn->op) {
3154 case OP_EXIT:
3155 emitEXIT();
3156 break;
3157 case OP_BRA:
3158 emitBRA();
3159 break;
3160 case OP_CALL:
3161 emitCAL();
3162 break;
3163 case OP_PRECONT:
3164 emitPCNT();
3165 break;
3166 case OP_CONT:
3167 emitCONT();
3168 break;
3169 case OP_PREBREAK:
3170 emitPBK();
3171 break;
3172 case OP_BREAK:
3173 emitBRK();
3174 break;
3175 case OP_PRERET:
3176 emitPRET();
3177 break;
3178 case OP_RET:
3179 emitRET();
3180 break;
3181 case OP_JOINAT:
3182 emitSSY();
3183 break;
3184 case OP_JOIN:
3185 emitSYNC();
3186 break;
3187 case OP_QUADON:
3188 emitSAM();
3189 break;
3190 case OP_QUADPOP:
3191 emitRAM();
3192 break;
3193 case OP_MOV:
3194 emitMOV();
3195 break;
3196 case OP_RDSV:
3197 emitS2R();
3198 break;
3199 case OP_ABS:
3200 case OP_NEG:
3201 case OP_SAT:
3202 case OP_FLOOR:
3203 case OP_CEIL:
3204 case OP_TRUNC:
3205 case OP_CVT:
3206 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3207 insn->src(0).getFile() == FILE_PREDICATE)) {
3208 emitMOV();
3209 } else if (isFloatType(insn->dType)) {
3210 if (isFloatType(insn->sType))
3211 emitF2F();
3212 else
3213 emitI2F();
3214 } else {
3215 if (isFloatType(insn->sType))
3216 emitF2I();
3217 else
3218 emitI2I();
3219 }
3220 break;
3221 case OP_SHFL:
3222 emitSHFL();
3223 break;
3224 case OP_ADD:
3225 case OP_SUB:
3226 if (isFloatType(insn->dType)) {
3227 if (insn->dType == TYPE_F64)
3228 emitDADD();
3229 else
3230 emitFADD();
3231 } else {
3232 emitIADD();
3233 }
3234 break;
3235 case OP_MUL:
3236 if (isFloatType(insn->dType)) {
3237 if (insn->dType == TYPE_F64)
3238 emitDMUL();
3239 else
3240 emitFMUL();
3241 } else {
3242 emitIMUL();
3243 }
3244 break;
3245 case OP_MAD:
3246 case OP_FMA:
3247 if (isFloatType(insn->dType)) {
3248 if (insn->dType == TYPE_F64)
3249 emitDFMA();
3250 else
3251 emitFFMA();
3252 } else {
3253 emitIMAD();
3254 }
3255 break;
3256 case OP_SHLADD:
3257 emitISCADD();
3258 break;
3259 case OP_MIN:
3260 case OP_MAX:
3261 if (isFloatType(insn->dType)) {
3262 if (insn->dType == TYPE_F64)
3263 emitDMNMX();
3264 else
3265 emitFMNMX();
3266 } else {
3267 emitIMNMX();
3268 }
3269 break;
3270 case OP_SHL:
3271 if (typeSizeof(insn->sType) == 8)
3272 emitSHF();
3273 else
3274 emitSHL();
3275 break;
3276 case OP_SHR:
3277 if (typeSizeof(insn->sType) == 8)
3278 emitSHF();
3279 else
3280 emitSHR();
3281 break;
3282 case OP_POPCNT:
3283 emitPOPC();
3284 break;
3285 case OP_INSBF:
3286 emitBFI();
3287 break;
3288 case OP_EXTBF:
3289 emitBFE();
3290 break;
3291 case OP_BFIND:
3292 emitFLO();
3293 break;
3294 case OP_SLCT:
3295 if (isFloatType(insn->dType))
3296 emitFCMP();
3297 else
3298 emitICMP();
3299 break;
3300 case OP_SET:
3301 case OP_SET_AND:
3302 case OP_SET_OR:
3303 case OP_SET_XOR:
3304 if (insn->def(0).getFile() != FILE_PREDICATE) {
3305 if (isFloatType(insn->sType))
3306 if (insn->sType == TYPE_F64)
3307 emitDSET();
3308 else
3309 emitFSET();
3310 else
3311 emitISET();
3312 } else {
3313 if (isFloatType(insn->sType))
3314 if (insn->sType == TYPE_F64)
3315 emitDSETP();
3316 else
3317 emitFSETP();
3318 else
3319 emitISETP();
3320 }
3321 break;
3322 case OP_SELP:
3323 emitSEL();
3324 break;
3325 case OP_PRESIN:
3326 case OP_PREEX2:
3327 emitRRO();
3328 break;
3329 case OP_COS:
3330 case OP_SIN:
3331 case OP_EX2:
3332 case OP_LG2:
3333 case OP_RCP:
3334 case OP_RSQ:
3335 emitMUFU();
3336 break;
3337 case OP_AND:
3338 case OP_OR:
3339 case OP_XOR:
3340 emitLOP();
3341 break;
3342 case OP_NOT:
3343 emitNOT();
3344 break;
3345 case OP_LOAD:
3346 switch (insn->src(0).getFile()) {
3347 case FILE_MEMORY_CONST : emitLDC(); break;
3348 case FILE_MEMORY_LOCAL : emitLDL(); break;
3349 case FILE_MEMORY_SHARED: emitLDS(); break;
3350 case FILE_MEMORY_GLOBAL: emitLD(); break;
3351 default:
3352 assert(!"invalid load");
3353 emitNOP();
3354 break;
3355 }
3356 break;
3357 case OP_STORE:
3358 switch (insn->src(0).getFile()) {
3359 case FILE_MEMORY_LOCAL : emitSTL(); break;
3360 case FILE_MEMORY_SHARED: emitSTS(); break;
3361 case FILE_MEMORY_GLOBAL: emitST(); break;
3362 default:
3363 assert(!"invalid store");
3364 emitNOP();
3365 break;
3366 }
3367 break;
3368 case OP_ATOM:
3369 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3370 emitATOMS();
3371 else
3372 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3373 emitRED();
3374 else
3375 emitATOM();
3376 break;
3377 case OP_CCTL:
3378 emitCCTL();
3379 break;
3380 case OP_VFETCH:
3381 emitALD();
3382 break;
3383 case OP_EXPORT:
3384 emitAST();
3385 break;
3386 case OP_PFETCH:
3387 emitISBERD();
3388 break;
3389 case OP_AFETCH:
3390 emitAL2P();
3391 break;
3392 case OP_LINTERP:
3393 case OP_PINTERP:
3394 emitIPA();
3395 break;
3396 case OP_PIXLD:
3397 emitPIXLD();
3398 break;
3399 case OP_TEX:
3400 case OP_TXB:
3401 case OP_TXL:
3402 emitTEX();
3403 break;
3404 case OP_TXF:
3405 emitTLD();
3406 break;
3407 case OP_TXG:
3408 emitTLD4();
3409 break;
3410 case OP_TXD:
3411 emitTXD();
3412 break;
3413 case OP_TXQ:
3414 emitTXQ();
3415 break;
3416 case OP_TXLQ:
3417 emitTMML();
3418 break;
3419 case OP_TEXBAR:
3420 emitDEPBAR();
3421 break;
3422 case OP_QUADOP:
3423 emitFSWZADD();
3424 break;
3425 case OP_NOP:
3426 emitNOP();
3427 break;
3428 case OP_DISCARD:
3429 emitKIL();
3430 break;
3431 case OP_EMIT:
3432 case OP_RESTART:
3433 emitOUT();
3434 break;
3435 case OP_BAR:
3436 emitBAR();
3437 break;
3438 case OP_MEMBAR:
3439 emitMEMBAR();
3440 break;
3441 case OP_VOTE:
3442 emitVOTE();
3443 break;
3444 case OP_SUSTB:
3445 case OP_SUSTP:
3446 emitSUSTx();
3447 break;
3448 case OP_SULDB:
3449 case OP_SULDP:
3450 emitSULDx();
3451 break;
3452 case OP_SUREDB:
3453 case OP_SUREDP:
3454 emitSUREDx();
3455 break;
3456 default:
3457 assert(!"invalid opcode");
3458 emitNOP();
3459 ret = false;
3460 break;
3461 }
3462
3463 if (insn->join) {
3464 /*XXX*/
3465 }
3466
3467 code += 2;
3468 codeSize += 8;
3469 return ret;
3470 }
3471
3472 uint32_t
getMinEncodingSize(const Instruction * i) const3473 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3474 {
3475 return 8;
3476 }
3477
3478 /*******************************************************************************
3479 * sched data calculator
3480 ******************************************************************************/
3481
3482 class SchedDataCalculatorGM107 : public Pass
3483 {
3484 public:
SchedDataCalculatorGM107(const TargetGM107 * targ)3485 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3486
3487 private:
3488 struct RegScores
3489 {
3490 struct ScoreData {
3491 int r[256];
3492 int p[8];
3493 int c;
3494 } rd, wr;
3495 int base;
3496
rebasenv50_ir::SchedDataCalculatorGM107::RegScores3497 void rebase(const int base)
3498 {
3499 const int delta = this->base - base;
3500 if (!delta)
3501 return;
3502 this->base = 0;
3503
3504 for (int i = 0; i < 256; ++i) {
3505 rd.r[i] += delta;
3506 wr.r[i] += delta;
3507 }
3508 for (int i = 0; i < 8; ++i) {
3509 rd.p[i] += delta;
3510 wr.p[i] += delta;
3511 }
3512 rd.c += delta;
3513 wr.c += delta;
3514 }
wipenv50_ir::SchedDataCalculatorGM107::RegScores3515 void wipe()
3516 {
3517 memset(&rd, 0, sizeof(rd));
3518 memset(&wr, 0, sizeof(wr));
3519 }
getLatestnv50_ir::SchedDataCalculatorGM107::RegScores3520 int getLatest(const ScoreData& d) const
3521 {
3522 int max = 0;
3523 for (int i = 0; i < 256; ++i)
3524 if (d.r[i] > max)
3525 max = d.r[i];
3526 for (int i = 0; i < 8; ++i)
3527 if (d.p[i] > max)
3528 max = d.p[i];
3529 if (d.c > max)
3530 max = d.c;
3531 return max;
3532 }
getLatestRdnv50_ir::SchedDataCalculatorGM107::RegScores3533 inline int getLatestRd() const
3534 {
3535 return getLatest(rd);
3536 }
getLatestWrnv50_ir::SchedDataCalculatorGM107::RegScores3537 inline int getLatestWr() const
3538 {
3539 return getLatest(wr);
3540 }
getLatestnv50_ir::SchedDataCalculatorGM107::RegScores3541 inline int getLatest() const
3542 {
3543 return MAX2(getLatestRd(), getLatestWr());
3544 }
setMaxnv50_ir::SchedDataCalculatorGM107::RegScores3545 void setMax(const RegScores *that)
3546 {
3547 for (int i = 0; i < 256; ++i) {
3548 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3549 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3550 }
3551 for (int i = 0; i < 8; ++i) {
3552 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3553 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3554 }
3555 rd.c = MAX2(rd.c, that->rd.c);
3556 wr.c = MAX2(wr.c, that->wr.c);
3557 }
printnv50_ir::SchedDataCalculatorGM107::RegScores3558 void print(int cycle)
3559 {
3560 for (int i = 0; i < 256; ++i) {
3561 if (rd.r[i] > cycle)
3562 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3563 if (wr.r[i] > cycle)
3564 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3565 }
3566 for (int i = 0; i < 8; ++i) {
3567 if (rd.p[i] > cycle)
3568 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3569 if (wr.p[i] > cycle)
3570 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3571 }
3572 if (rd.c > cycle)
3573 INFO("rd $c @ %i\n", rd.c);
3574 if (wr.c > cycle)
3575 INFO("wr $c @ %i\n", wr.c);
3576 }
3577 };
3578
3579 RegScores *score; // for current BB
3580 std::vector<RegScores> scoreBoards;
3581
3582 const TargetGM107 *targ;
3583 bool visit(Function *);
3584 bool visit(BasicBlock *);
3585
3586 void commitInsn(const Instruction *, int);
3587 int calcDelay(const Instruction *, int) const;
3588 void setDelay(Instruction *, int, const Instruction *);
3589 void recordWr(const Value *, int, int);
3590 void checkRd(const Value *, int, int&) const;
3591
3592 inline void emitYield(Instruction *);
3593 inline void emitStall(Instruction *, uint8_t);
3594 inline void emitReuse(Instruction *, uint8_t);
3595 inline void emitWrDepBar(Instruction *, uint8_t);
3596 inline void emitRdDepBar(Instruction *, uint8_t);
3597 inline void emitWtDepBar(Instruction *, uint8_t);
3598
3599 inline int getStall(const Instruction *) const;
3600 inline int getWrDepBar(const Instruction *) const;
3601 inline int getRdDepBar(const Instruction *) const;
3602 inline int getWtDepBar(const Instruction *) const;
3603
3604 void setReuseFlag(Instruction *);
3605
3606 inline void printSchedInfo(int, const Instruction *) const;
3607
3608 struct LiveBarUse {
LiveBarUsenv50_ir::SchedDataCalculatorGM107::LiveBarUse3609 LiveBarUse(Instruction *insn, Instruction *usei)
3610 : insn(insn), usei(usei) { }
3611 Instruction *insn;
3612 Instruction *usei;
3613 };
3614
3615 struct LiveBarDef {
LiveBarDefnv50_ir::SchedDataCalculatorGM107::LiveBarDef3616 LiveBarDef(Instruction *insn, Instruction *defi)
3617 : insn(insn), defi(defi) { }
3618 Instruction *insn;
3619 Instruction *defi;
3620 };
3621
3622 bool insertBarriers(BasicBlock *);
3623
3624 Instruction *findFirstUse(const Instruction *) const;
3625 Instruction *findFirstDef(const Instruction *) const;
3626
3627 bool needRdDepBar(const Instruction *) const;
3628 bool needWrDepBar(const Instruction *) const;
3629 };
3630
3631 inline void
emitStall(Instruction * insn,uint8_t cnt)3632 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3633 {
3634 assert(cnt < 16);
3635 insn->sched |= cnt;
3636 }
3637
3638 inline void
emitYield(Instruction * insn)3639 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3640 {
3641 insn->sched |= 1 << 4;
3642 }
3643
3644 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3645 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3646 {
3647 assert(id < 6);
3648 if ((insn->sched & 0xe0) == 0xe0)
3649 insn->sched ^= 0xe0;
3650 insn->sched |= id << 5;
3651 }
3652
3653 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3654 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3655 {
3656 assert(id < 6);
3657 if ((insn->sched & 0x700) == 0x700)
3658 insn->sched ^= 0x700;
3659 insn->sched |= id << 8;
3660 }
3661
3662 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3663 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3664 {
3665 assert(id < 6);
3666 insn->sched |= 1 << (11 + id);
3667 }
3668
3669 inline void
emitReuse(Instruction * insn,uint8_t id)3670 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3671 {
3672 assert(id < 4);
3673 insn->sched |= 1 << (17 + id);
3674 }
3675
3676 inline void
printSchedInfo(int cycle,const Instruction * insn) const3677 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3678 const Instruction *insn) const
3679 {
3680 uint8_t st, yl, wr, rd, wt, ru;
3681
3682 st = (insn->sched & 0x00000f) >> 0;
3683 yl = (insn->sched & 0x000010) >> 4;
3684 wr = (insn->sched & 0x0000e0) >> 5;
3685 rd = (insn->sched & 0x000700) >> 8;
3686 wt = (insn->sched & 0x01f800) >> 11;
3687 ru = (insn->sched & 0x1e0000) >> 17;
3688
3689 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3690 cycle, st, yl, wr, rd, wt, ru);
3691 }
3692
3693 inline int
getStall(const Instruction * insn) const3694 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3695 {
3696 return insn->sched & 0xf;
3697 }
3698
3699 inline int
getWrDepBar(const Instruction * insn) const3700 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3701 {
3702 return (insn->sched & 0x0000e0) >> 5;
3703 }
3704
3705 inline int
getRdDepBar(const Instruction * insn) const3706 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3707 {
3708 return (insn->sched & 0x000700) >> 8;
3709 }
3710
3711 inline int
getWtDepBar(const Instruction * insn) const3712 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3713 {
3714 return (insn->sched & 0x01f800) >> 11;
3715 }
3716
3717 // Emit the reuse flag which allows to make use of the new memory hierarchy
3718 // introduced since Maxwell, the operand reuse cache.
3719 //
3720 // It allows to reduce bank conflicts by caching operands. Each time you issue
3721 // an instruction, that flag can tell the hw which operands are going to be
3722 // re-used by the next instruction. Note that the next instruction has to use
3723 // the same GPR id in the same operand slot.
3724 void
setReuseFlag(Instruction * insn)3725 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3726 {
3727 Instruction *next = insn->next;
3728 BitSet defs(255, 1);
3729
3730 if (!targ->isReuseSupported(insn))
3731 return;
3732
3733 for (int d = 0; insn->defExists(d); ++d) {
3734 const Value *def = insn->def(d).rep();
3735 if (insn->def(d).getFile() != FILE_GPR)
3736 continue;
3737 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3738 continue;
3739 defs.set(def->reg.data.id);
3740 }
3741
3742 for (int s = 0; insn->srcExists(s); s++) {
3743 const Value *src = insn->src(s).rep();
3744 if (insn->src(s).getFile() != FILE_GPR)
3745 continue;
3746 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3747 continue;
3748 if (defs.test(src->reg.data.id))
3749 continue;
3750 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3751 continue;
3752 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3753 continue;
3754 assert(s < 4);
3755 emitReuse(insn, s);
3756 }
3757 }
3758
3759 void
recordWr(const Value * v,int cycle,int ready)3760 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3761 {
3762 int a = v->reg.data.id, b;
3763
3764 switch (v->reg.file) {
3765 case FILE_GPR:
3766 b = a + v->reg.size / 4;
3767 for (int r = a; r < b; ++r)
3768 score->rd.r[r] = ready;
3769 break;
3770 case FILE_PREDICATE:
3771 // To immediately use a predicate set by any instructions, the minimum
3772 // number of stall counts is 13.
3773 score->rd.p[a] = cycle + 13;
3774 break;
3775 case FILE_FLAGS:
3776 score->rd.c = ready;
3777 break;
3778 default:
3779 break;
3780 }
3781 }
3782
3783 void
checkRd(const Value * v,int cycle,int & delay) const3784 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3785 {
3786 int a = v->reg.data.id, b;
3787 int ready = cycle;
3788
3789 switch (v->reg.file) {
3790 case FILE_GPR:
3791 b = a + v->reg.size / 4;
3792 for (int r = a; r < b; ++r)
3793 ready = MAX2(ready, score->rd.r[r]);
3794 break;
3795 case FILE_PREDICATE:
3796 ready = MAX2(ready, score->rd.p[a]);
3797 break;
3798 case FILE_FLAGS:
3799 ready = MAX2(ready, score->rd.c);
3800 break;
3801 default:
3802 break;
3803 }
3804 if (cycle < ready)
3805 delay = MAX2(delay, ready - cycle);
3806 }
3807
3808 void
commitInsn(const Instruction * insn,int cycle)3809 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3810 {
3811 const int ready = cycle + targ->getLatency(insn);
3812
3813 for (int d = 0; insn->defExists(d); ++d)
3814 recordWr(insn->getDef(d), cycle, ready);
3815
3816 #ifdef GM107_DEBUG_SCHED_DATA
3817 score->print(cycle);
3818 #endif
3819 }
3820
3821 #define GM107_MIN_ISSUE_DELAY 0x1
3822 #define GM107_MAX_ISSUE_DELAY 0xf
3823
3824 int
calcDelay(const Instruction * insn,int cycle) const3825 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3826 {
3827 int delay = 0, ready = cycle;
3828
3829 for (int s = 0; insn->srcExists(s); ++s)
3830 checkRd(insn->getSrc(s), cycle, delay);
3831
3832 // TODO: make use of getReadLatency()!
3833
3834 return MAX2(delay, ready - cycle);
3835 }
3836
3837 void
setDelay(Instruction * insn,int delay,const Instruction * next)3838 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3839 const Instruction *next)
3840 {
3841 const OpClass cl = targ->getOpClass(insn->op);
3842 int wr, rd;
3843
3844 if (insn->op == OP_EXIT ||
3845 insn->op == OP_BAR ||
3846 insn->op == OP_MEMBAR) {
3847 delay = GM107_MAX_ISSUE_DELAY;
3848 } else
3849 if (insn->op == OP_QUADON ||
3850 insn->op == OP_QUADPOP) {
3851 delay = 0xd;
3852 } else
3853 if (cl == OPCLASS_FLOW || insn->join) {
3854 delay = 0xd;
3855 }
3856
3857 if (!next || !targ->canDualIssue(insn, next)) {
3858 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3859 } else {
3860 delay = 0x0; // dual-issue
3861 }
3862
3863 wr = getWrDepBar(insn);
3864 rd = getRdDepBar(insn);
3865
3866 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3867 // Barriers take one additional clock cycle to become active on top of
3868 // the clock consumed by the instruction producing it.
3869 if (!next || insn->bb != next->bb) {
3870 delay = 0x2;
3871 } else {
3872 int wt = getWtDepBar(next);
3873 if ((wt & (1 << wr)) | (wt & (1 << rd)))
3874 delay = 0x2;
3875 }
3876 }
3877
3878 emitStall(insn, delay);
3879 }
3880
3881
3882 // Return true when the given instruction needs to emit a read dependency
3883 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3884 // setting the maximum number of stall counts is not enough.
3885 bool
needRdDepBar(const Instruction * insn) const3886 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3887 {
3888 BitSet srcs(255, 1), defs(255, 1);
3889 int a, b;
3890
3891 if (!targ->isBarrierRequired(insn))
3892 return false;
3893
3894 // Do not emit a read dependency barrier when the instruction doesn't use
3895 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3896 for (int s = 0; insn->srcExists(s); ++s) {
3897 const Value *src = insn->src(s).rep();
3898 if (insn->src(s).getFile() != FILE_GPR)
3899 continue;
3900 if (src->reg.data.id == 255)
3901 continue;
3902
3903 a = src->reg.data.id;
3904 b = a + src->reg.size / 4;
3905 for (int r = a; r < b; ++r)
3906 srcs.set(r);
3907 }
3908
3909 if (!srcs.popCount())
3910 return false;
3911
3912 // Do not emit a read dependency barrier when the output GPRs are equal to
3913 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3914 // be produced and WaR hazards are prevented.
3915 for (int d = 0; insn->defExists(d); ++d) {
3916 const Value *def = insn->def(d).rep();
3917 if (insn->def(d).getFile() != FILE_GPR)
3918 continue;
3919 if (def->reg.data.id == 255)
3920 continue;
3921
3922 a = def->reg.data.id;
3923 b = a + def->reg.size / 4;
3924 for (int r = a; r < b; ++r)
3925 defs.set(r);
3926 }
3927
3928 srcs.andNot(defs);
3929 if (!srcs.popCount())
3930 return false;
3931
3932 return true;
3933 }
3934
3935 // Return true when the given instruction needs to emit a write dependency
3936 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3937 // setting the maximum number of stall counts is not enough. This is only legal
3938 // if the instruction output something.
3939 bool
needWrDepBar(const Instruction * insn) const3940 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3941 {
3942 if (!targ->isBarrierRequired(insn))
3943 return false;
3944
3945 for (int d = 0; insn->defExists(d); ++d) {
3946 if (insn->def(d).getFile() == FILE_GPR ||
3947 insn->def(d).getFile() == FILE_PREDICATE)
3948 return true;
3949 }
3950 return false;
3951 }
3952
3953 // Find the next instruction inside the same basic block which uses the output
3954 // of the given instruction in order to avoid RaW hazards.
3955 Instruction *
findFirstUse(const Instruction * bari) const3956 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3957 {
3958 Instruction *insn, *next;
3959 int minGPR, maxGPR;
3960
3961 if (!bari->defExists(0))
3962 return NULL;
3963
3964 minGPR = bari->def(0).rep()->reg.data.id;
3965 maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3966
3967 for (insn = bari->next; insn != NULL; insn = next) {
3968 next = insn->next;
3969
3970 for (int s = 0; insn->srcExists(s); ++s) {
3971 const Value *src = insn->src(s).rep();
3972 if (bari->def(0).getFile() == FILE_GPR) {
3973 if (insn->src(s).getFile() != FILE_GPR ||
3974 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3975 src->reg.data.id > maxGPR)
3976 continue;
3977 return insn;
3978 } else
3979 if (bari->def(0).getFile() == FILE_PREDICATE) {
3980 if (insn->src(s).getFile() != FILE_PREDICATE ||
3981 src->reg.data.id != minGPR)
3982 continue;
3983 return insn;
3984 }
3985 }
3986 }
3987 return NULL;
3988 }
3989
3990 // Find the next instruction inside the same basic block which overwrites, at
3991 // least, one source of the given instruction in order to avoid WaR hazards.
3992 Instruction *
findFirstDef(const Instruction * bari) const3993 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3994 {
3995 Instruction *insn, *next;
3996 int minGPR, maxGPR;
3997
3998 for (insn = bari->next; insn != NULL; insn = next) {
3999 next = insn->next;
4000
4001 for (int d = 0; insn->defExists(d); ++d) {
4002 const Value *def = insn->def(d).rep();
4003 if (insn->def(d).getFile() != FILE_GPR)
4004 continue;
4005
4006 minGPR = def->reg.data.id;
4007 maxGPR = minGPR + def->reg.size / 4 - 1;
4008
4009 for (int s = 0; bari->srcExists(s); ++s) {
4010 const Value *src = bari->src(s).rep();
4011 if (bari->src(s).getFile() != FILE_GPR ||
4012 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
4013 src->reg.data.id > maxGPR)
4014 continue;
4015 return insn;
4016 }
4017 }
4018 }
4019 return NULL;
4020 }
4021
4022 // Dependency barriers:
4023 // This pass is a bit ugly and could probably be improved by performing a
4024 // better allocation.
4025 //
4026 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4027 // dependency barriers using the control codes.
4028 bool
insertBarriers(BasicBlock * bb)4029 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4030 {
4031 std::list<LiveBarUse> live_uses;
4032 std::list<LiveBarDef> live_defs;
4033 Instruction *insn, *next;
4034 BitSet bars(6, 1);
4035 int bar_id;
4036
4037 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4038 Instruction *usei = NULL, *defi = NULL;
4039 bool need_wr_bar, need_rd_bar;
4040
4041 next = insn->next;
4042
4043 // Expire old barrier uses.
4044 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4045 it != live_uses.end();) {
4046 if (insn->serial >= it->usei->serial) {
4047 int wr = getWrDepBar(it->insn);
4048 emitWtDepBar(insn, wr);
4049 bars.clr(wr); // free barrier
4050 it = live_uses.erase(it);
4051 continue;
4052 }
4053 ++it;
4054 }
4055
4056 // Expire old barrier defs.
4057 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4058 it != live_defs.end();) {
4059 if (insn->serial >= it->defi->serial) {
4060 int rd = getRdDepBar(it->insn);
4061 emitWtDepBar(insn, rd);
4062 bars.clr(rd); // free barrier
4063 it = live_defs.erase(it);
4064 continue;
4065 }
4066 ++it;
4067 }
4068
4069 need_wr_bar = needWrDepBar(insn);
4070 need_rd_bar = needRdDepBar(insn);
4071
4072 if (need_wr_bar) {
4073 // When the instruction requires to emit a write dependency barrier
4074 // (all which write something at a variable latency), find the next
4075 // instruction which reads the outputs.
4076 usei = findFirstUse(insn);
4077
4078 // Allocate and emit a new barrier.
4079 bar_id = bars.findFreeRange(1);
4080 if (bar_id == -1)
4081 bar_id = 5;
4082 bars.set(bar_id);
4083 emitWrDepBar(insn, bar_id);
4084 if (usei)
4085 live_uses.push_back(LiveBarUse(insn, usei));
4086 }
4087
4088 if (need_rd_bar) {
4089 // When the instruction requires to emit a read dependency barrier
4090 // (all which read something at a variable latency), find the next
4091 // instruction which will write the inputs.
4092 defi = findFirstDef(insn);
4093
4094 if (usei && defi && usei->serial <= defi->serial)
4095 continue;
4096
4097 // Allocate and emit a new barrier.
4098 bar_id = bars.findFreeRange(1);
4099 if (bar_id == -1)
4100 bar_id = 5;
4101 bars.set(bar_id);
4102 emitRdDepBar(insn, bar_id);
4103 if (defi)
4104 live_defs.push_back(LiveBarDef(insn, defi));
4105 }
4106 }
4107
4108 // Remove unnecessary barrier waits.
4109 BitSet alive_bars(6, 1);
4110 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4111 int wr, rd, wt;
4112
4113 next = insn->next;
4114
4115 wr = getWrDepBar(insn);
4116 rd = getRdDepBar(insn);
4117 wt = getWtDepBar(insn);
4118
4119 for (int idx = 0; idx < 6; ++idx) {
4120 if (!(wt & (1 << idx)))
4121 continue;
4122 if (!alive_bars.test(idx)) {
4123 insn->sched &= ~(1 << (11 + idx));
4124 } else {
4125 alive_bars.clr(idx);
4126 }
4127 }
4128
4129 if (wr < 6)
4130 alive_bars.set(wr);
4131 if (rd < 6)
4132 alive_bars.set(rd);
4133 }
4134
4135 return true;
4136 }
4137
4138 bool
visit(Function * func)4139 SchedDataCalculatorGM107::visit(Function *func)
4140 {
4141 ArrayList insns;
4142
4143 func->orderInstructions(insns);
4144
4145 scoreBoards.resize(func->cfg.getSize());
4146 for (size_t i = 0; i < scoreBoards.size(); ++i)
4147 scoreBoards[i].wipe();
4148 return true;
4149 }
4150
4151 bool
visit(BasicBlock * bb)4152 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4153 {
4154 Instruction *insn, *next = NULL;
4155 int cycle = 0;
4156
4157 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4158 /*XXX*/
4159 insn->sched = 0x7e0;
4160 }
4161
4162 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4163 return true;
4164
4165 // Insert read/write dependency barriers for instructions which don't
4166 // operate at a fixed latency.
4167 insertBarriers(bb);
4168
4169 score = &scoreBoards.at(bb->getId());
4170
4171 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4172 // back branches will wait until all target dependencies are satisfied
4173 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4174 continue;
4175 BasicBlock *in = BasicBlock::get(ei.getNode());
4176 score->setMax(&scoreBoards.at(in->getId()));
4177 }
4178
4179 #ifdef GM107_DEBUG_SCHED_DATA
4180 INFO("=== BB:%i initial scores\n", bb->getId());
4181 score->print(cycle);
4182 #endif
4183
4184 // Because barriers are allocated locally (intra-BB), we have to make sure
4185 // that all produced barriers have been consumed before entering inside a
4186 // new basic block. The best way is to do a global allocation pre RA but
4187 // it's really more difficult, especially because of the phi nodes. Anyways,
4188 // it seems like that waiting on a barrier which has already been consumed
4189 // doesn't add any additional cost, it's just not elegant!
4190 Instruction *start = bb->getEntry();
4191 if (start && bb->cfg.incidentCount() > 0) {
4192 for (int b = 0; b < 6; b++)
4193 emitWtDepBar(start, b);
4194 }
4195
4196 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4197 next = insn->next;
4198
4199 commitInsn(insn, cycle);
4200 int delay = calcDelay(next, cycle);
4201 setDelay(insn, delay, next);
4202 cycle += getStall(insn);
4203
4204 setReuseFlag(insn);
4205
4206 // XXX: The yield flag seems to destroy a bunch of things when it is
4207 // set on every instruction, need investigation.
4208 //emitYield(insn);
4209
4210 #ifdef GM107_DEBUG_SCHED_DATA
4211 printSchedInfo(cycle, insn);
4212 insn->print();
4213 next->print();
4214 #endif
4215 }
4216
4217 if (!insn)
4218 return true;
4219 commitInsn(insn, cycle);
4220
4221 int bbDelay = -1;
4222
4223 #ifdef GM107_DEBUG_SCHED_DATA
4224 fprintf(stderr, "last instruction is : ");
4225 insn->print();
4226 fprintf(stderr, "cycle=%d\n", cycle);
4227 #endif
4228
4229 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4230 BasicBlock *out = BasicBlock::get(ei.getNode());
4231
4232 if (ei.getType() != Graph::Edge::BACK) {
4233 // Only test the first instruction of the outgoing block.
4234 next = out->getEntry();
4235 if (next) {
4236 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4237 } else {
4238 // When the outgoing BB is empty, make sure to set the number of
4239 // stall counts needed by the instruction because we don't know the
4240 // next instruction.
4241 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4242 }
4243 } else {
4244 // Wait until all dependencies are satisfied.
4245 const int regsFree = score->getLatest();
4246 next = out->getFirst();
4247 for (int c = cycle; next && c < regsFree; next = next->next) {
4248 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4249 c += getStall(next);
4250 }
4251 next = NULL;
4252 }
4253 }
4254 if (bb->cfg.outgoingCount() != 1)
4255 next = NULL;
4256 setDelay(insn, bbDelay, next);
4257 cycle += getStall(insn);
4258
4259 score->rebase(cycle); // common base for initializing out blocks' scores
4260 return true;
4261 }
4262
4263 /*******************************************************************************
4264 * main
4265 ******************************************************************************/
4266
4267 void
prepareEmission(Function * func)4268 CodeEmitterGM107::prepareEmission(Function *func)
4269 {
4270 SchedDataCalculatorGM107 sched(targGM107);
4271 CodeEmitter::prepareEmission(func);
4272 sched.run(func, true, true);
4273 }
4274
sizeToBundlesGM107(uint32_t size)4275 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4276 {
4277 return (size + 23) / 24;
4278 }
4279
4280 void
prepareEmission(Program * prog)4281 CodeEmitterGM107::prepareEmission(Program *prog)
4282 {
4283 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4284 !fi.end(); fi.next()) {
4285 Function *func = reinterpret_cast<Function *>(fi.get());
4286 func->binPos = prog->binSize;
4287 prepareEmission(func);
4288
4289 // adjust sizes & positions for schedulding info:
4290 if (prog->getTarget()->hasSWSched) {
4291 uint32_t adjPos = func->binPos;
4292 BasicBlock *bb = NULL;
4293 for (int i = 0; i < func->bbCount; ++i) {
4294 bb = func->bbArray[i];
4295 int32_t adjSize = bb->binSize;
4296 if (adjPos % 32) {
4297 adjSize -= 32 - adjPos % 32;
4298 if (adjSize < 0)
4299 adjSize = 0;
4300 }
4301 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4302 bb->binPos = adjPos;
4303 bb->binSize = adjSize;
4304 adjPos += adjSize;
4305 }
4306 if (bb)
4307 func->binSize = adjPos - func->binPos;
4308 }
4309
4310 prog->binSize += func->binSize;
4311 }
4312 }
4313
CodeEmitterGM107(const TargetGM107 * target)4314 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4315 : CodeEmitter(target),
4316 targGM107(target),
4317 writeIssueDelays(target->hasSWSched)
4318 {
4319 code = NULL;
4320 codeSize = codeSizeLimit = 0;
4321 relocInfo = NULL;
4322 }
4323
4324 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4325 TargetGM107::createCodeEmitterGM107(Program::Type type)
4326 {
4327 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4328 emit->setProgramType(type);
4329 return emit;
4330 }
4331
4332 } // namespace nv50_ir
4333