1 /*
2 * Copyright 2020 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "nv50_ir_emit_gv100.h"
23 #include "nv50_ir_sched_gm107.h"
24
25 namespace nv50_ir {
26
27 /*******************************************************************************
28 * instruction format helpers
29 ******************************************************************************/
30
31 #define FA_NODEF (1 << 0)
32 #define FA_RRR (1 << 1)
33 #define FA_RRI (1 << 2)
34 #define FA_RRC (1 << 3)
35 #define FA_RIR (1 << 4)
36 #define FA_RCR (1 << 5)
37
38 #define FA_SRC_MASK 0x0ff
39 #define FA_SRC_NEG 0x100
40 #define FA_SRC_ABS 0x200
41
42 #define EMPTY -1
43 #define __(a) (a) // no source modifiers
44 #define _A(a) ((a) | FA_SRC_ABS)
45 #define N_(a) ((a) | FA_SRC_NEG)
46 #define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS)
47
48 void
emitFormA_I32(int src)49 CodeEmitterGV100::emitFormA_I32(int src)
50 {
51 emitIMMD(32, 32, insn->src(src));
52 if (insn->src(src).mod.abs())
53 code[1] &= 0x7fffffff;
54 if (insn->src(src).mod.neg())
55 code[1] ^= 0x80000000;
56 }
57
58 void
emitFormA_RRC(uint16_t op,int src1,int src2)59 CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2)
60 {
61 emitInsn(op);
62 if (src1 >= 0) {
63 emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
64 emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
65 emitGPR (64, insn->src(src1 & FA_SRC_MASK));
66 }
67 if (src2 >= 0) {
68 emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
69 emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
70 emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK));
71 }
72 }
73
74 void
emitFormA_RRI(uint16_t op,int src1,int src2)75 CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2)
76 {
77 emitInsn(op);
78 if (src1 >= 0) {
79 emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
80 emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
81 emitGPR (64, insn->src(src1 & FA_SRC_MASK));
82 }
83 if (src2 >= 0)
84 emitFormA_I32(src2 & FA_SRC_MASK);
85 }
86
87 void
emitFormA_RRR(uint16_t op,int src1,int src2)88 CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2)
89 {
90 emitInsn(op);
91 if (src2 >= 0) {
92 emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
93 emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
94 emitGPR (64, insn->src(src2 & FA_SRC_MASK));
95 }
96
97 if (src1 >= 0) {
98 emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
99 emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
100 emitGPR (32, insn->src(src1 & FA_SRC_MASK));
101 }
102 }
103
104 void
emitFormA(uint16_t op,uint8_t forms,int src0,int src1,int src2)105 CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms,
106 int src0, int src1, int src2)
107 {
108 switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) {
109 case FILE_GPR:
110 switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) {
111 case FILE_GPR:
112 assert(forms & FA_RRR);
113 emitFormA_RRR((1 << 9) | op, src1, src2);
114 break;
115 case FILE_IMMEDIATE:
116 assert(forms & FA_RRI);
117 emitFormA_RRI((2 << 9) | op, src1, src2);
118 break;
119 case FILE_MEMORY_CONST:
120 assert(forms & FA_RRC);
121 emitFormA_RRC((3 << 9) | op, src1, src2);
122 break;
123 default:
124 assert(!"bad src2 file");
125 break;
126 }
127 break;
128 case FILE_IMMEDIATE:
129 assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
130 assert(forms & FA_RIR);
131 emitFormA_RRI((4 << 9) | op, src2, src1);
132 break;
133 case FILE_MEMORY_CONST:
134 assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
135 assert(forms & FA_RCR);
136 emitFormA_RRC((5 << 9) | op, src2, src1);
137 break;
138 default:
139 assert(!"bad src1 file");
140 break;
141 }
142
143 if (src0 >= 0) {
144 assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR);
145 emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS));
146 emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG));
147 emitGPR(24, insn->src(src0 & FA_SRC_MASK));
148 }
149
150 if (!(forms & FA_NODEF))
151 emitGPR(16, insn->def(0));
152 }
153
154 /*******************************************************************************
155 * control
156 ******************************************************************************/
157
158 void
emitBRA()159 CodeEmitterGV100::emitBRA()
160 {
161 const FlowInstruction *insn = this->insn->asFlow();
162 int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4;
163
164 assert(!insn->indirect && !insn->absolute);
165
166 emitInsn (0x947);
167 emitField(34, 48, target);
168 emitPRED (87);
169 emitField(86, 2, 0); // ./.INC/.DEC
170 }
171
172 void
emitEXIT()173 CodeEmitterGV100::emitEXIT()
174 {
175 emitInsn (0x94d);
176 emitNOT (90);
177 emitPRED (87);
178 emitField(85, 1, 0); // .NO_ATEXIT
179 emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
180 }
181
182 void
emitKILL()183 CodeEmitterGV100::emitKILL()
184 {
185 emitInsn(0x95b);
186 emitPRED(87);
187 }
188
189 void
emitNOP()190 CodeEmitterGV100::emitNOP()
191 {
192 emitInsn(0x918);
193 }
194
195 void
emitWARPSYNC()196 CodeEmitterGV100::emitWARPSYNC()
197 {
198 emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
199 emitNOT (90);
200 emitPRED (87);
201 }
202
203 /*******************************************************************************
204 * movement / conversion
205 ******************************************************************************/
206
207 void
emitCS2R()208 CodeEmitterGV100::emitCS2R()
209 {
210 emitInsn(0x805);
211 emitSYS (72, insn->src(0));
212 emitGPR (16, insn->def(0));
213 }
214
215 void
emitF2F()216 CodeEmitterGV100::emitF2F()
217 {
218 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
219 emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
220 else
221 emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
222 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
223 emitFMZ (80, 1);
224 emitRND (78);
225 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
226 emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3
227 }
228
229 void
emitF2I()230 CodeEmitterGV100::emitF2I()
231 {
232 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
233 emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
234 else
235 emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
236 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
237 emitFMZ (80, 1);
238 emitRND (78);
239 emitField(77, 1, 0); // .NTZ
240 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
241 emitField(72, 1, isSignedType(insn->dType));
242 }
243
244 void
emitFRND()245 CodeEmitterGV100::emitFRND()
246 {
247 int subop = 0;
248
249 switch (insn->op) {
250 case OP_CVT:
251 switch (insn->rnd) {
252 case ROUND_NI: subop = 0; break;
253 case ROUND_MI: subop = 1; break;
254 case ROUND_PI: subop = 2; break;
255 case ROUND_ZI: subop = 3; break;
256 default:
257 assert(!"invalid FRND mode");
258 break;
259 }
260 break;
261 case OP_FLOOR: subop = 1; break;
262 case OP_CEIL : subop = 2; break;
263 case OP_TRUNC: subop = 3; break;
264 default:
265 assert(!"invalid FRND opcode");
266 break;
267 }
268
269 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
270 emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
271 else
272 emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
273 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
274 emitFMZ (80, 1);
275 emitField(78, 2, subop);
276 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
277 }
278
279 void
emitI2F()280 CodeEmitterGV100::emitI2F()
281 {
282 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
283 emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
284 else
285 emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
286 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
287 emitRND (78);
288 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
289 emitField(74, 1, isSignedType(insn->sType));
290 if (typeSizeof(insn->sType) == 2)
291 emitField(60, 2, insn->subOp >> 1);
292 else
293 emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3
294 }
295
296 void
emitMOV()297 CodeEmitterGV100::emitMOV()
298 {
299 switch (insn->def(0).getFile()) {
300 case FILE_GPR:
301 switch (insn->src(0).getFile()) {
302 case FILE_GPR:
303 case FILE_MEMORY_CONST:
304 case FILE_IMMEDIATE:
305 emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
306 emitField(72, 4, insn->lanes);
307 break;
308 case FILE_PREDICATE:
309 emitInsn (0x807);
310 emitGPR (16, insn->def(0));
311 emitGPR (24);
312 emitField(32, 32, 0xffffffff);
313 emitField(90, 1, 1);
314 emitPRED (87, insn->src(0));
315 break;
316 case FILE_BARRIER:
317 case FILE_THREAD_STATE:
318 emitInsn (0x355);
319 emitBTS (24, insn->src(0));
320 emitGPR (16, insn->def(0));
321 break;
322 default:
323 assert(!"bad src file");
324 break;
325 }
326 break;
327 case FILE_PREDICATE:
328 emitInsn (0x20c);
329 emitPRED (87);
330 emitPRED (84);
331 emitNOT (71);
332 emitPRED (68);
333 emitPRED (81, insn->def(0));
334 emitCond3(76, CC_NE);
335 emitGPR (24, insn->src(0));
336 emitGPR (32);
337 break;
338 case FILE_BARRIER:
339 case FILE_THREAD_STATE:
340 switch (insn->src(0).getFile()) {
341 case FILE_GPR:
342 emitInsn (0x356);
343 emitGPR (32, insn->src(0));
344 emitBTS (24, insn->def(0));
345 break;
346 case FILE_BARRIER:
347 emitInsn (0xf56);
348 emitBTS (24, insn->def(0));
349 emitBTS (16, insn->src(0));
350 break;
351 case FILE_THREAD_STATE:
352 assert(insn->def(0).getFile() == FILE_BARRIER);
353 emitInsn (0xf55);
354 emitBTS (24, insn->src(0));
355 emitBTS (16, insn->def(0));
356 break;
357 default:
358 assert(!"bad src file");
359 break;
360 }
361 emitField(84, 1, insn->getDef(0)->reg.data.ts == TS_PQUAD_MACTIVE ? 1 : 0);
362 break;
363 default:
364 assert(!"bad dst file");
365 break;
366 }
367 }
368
369 void
emitPRMT()370 CodeEmitterGV100::emitPRMT()
371 {
372 emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
373 emitField(72, 3, insn->subOp);
374 }
375
376 void
emitS2R()377 CodeEmitterGV100::emitS2R()
378 {
379 emitInsn(0x919);
380 emitSYS (72, insn->src(0));
381 emitGPR (16, insn->def(0));
382 }
383
384 void
gv100_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)385 gv100_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
386 {
387 int loc = entry->loc;
388 bool val = false;
389 switch (entry->ipa) {
390 case 0:
391 val = data.force_persample_interp;
392 break;
393 case 1:
394 val = data.msaa;
395 break;
396 }
397 if (val)
398 code[loc + 2] |= 1 << 26;
399 else
400 code[loc + 2] &= ~(1 << 26);
401 }
402
403 void
emitSEL()404 CodeEmitterGV100::emitSEL()
405 {
406 emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
407 emitNOT (90, insn->src(2));
408 emitPRED (87, insn->src(2));
409 if (insn->subOp >= 1)
410 addInterp(insn->subOp - 1, 0, gv100_selpFlip);
411 }
412
413 void
emitSHFL()414 CodeEmitterGV100::emitSHFL()
415 {
416 switch (insn->src(1).getFile()) {
417 case FILE_GPR:
418 switch (insn->src(2).getFile()) {
419 case FILE_GPR:
420 emitInsn(0x389);
421 emitGPR (64, insn->src(2));
422 break;
423 case FILE_IMMEDIATE:
424 emitInsn(0x589);
425 emitIMMD(40, 13, insn->src(2));
426 break;
427 default:
428 assert(!"bad src2 file");
429 break;
430 }
431 emitGPR(32, insn->src(1));
432 break;
433 case FILE_IMMEDIATE:
434 switch (insn->src(2).getFile()) {
435 case FILE_GPR:
436 emitInsn(0x989);
437 emitGPR (64, insn->src(2));
438 break;
439 case FILE_IMMEDIATE:
440 emitInsn(0xf89);
441 emitIMMD(40, 13, insn->src(2));
442 break;
443 default:
444 assert(!"bad src2 file");
445 break;
446 }
447 emitIMMD(53, 5, insn->src(1));
448 break;
449 default:
450 assert(!"bad src1 file");
451 break;
452 }
453
454 if (insn->defExists(1))
455 emitPRED(81, insn->def(1));
456 else
457 emitPRED(81);
458
459 emitField(58, 2, insn->subOp);
460 emitGPR (24, insn->src(0));
461 emitGPR (16, insn->def(0));
462 }
463
464 /*******************************************************************************
465 * fp32
466 ******************************************************************************/
467
468 void
emitFADD()469 CodeEmitterGV100::emitFADD()
470 {
471 if (insn->src(1).getFile() == FILE_GPR)
472 emitFormA(0x021, FA_RRR , NA(0), NA(1), EMPTY);
473 else
474 emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
475 emitFMZ (80, 1);
476 emitRND (78);
477 emitSAT (77);
478 }
479
480 void
emitFFMA()481 CodeEmitterGV100::emitFFMA()
482 {
483 emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
484 emitField(80, 1, insn->ftz);
485 emitRND (78);
486 emitSAT (77);
487 emitField(76, 1, insn->dnz);
488 }
489
490 void
emitFMNMX()491 CodeEmitterGV100::emitFMNMX()
492 {
493 emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
494 emitField(90, 1, insn->op == OP_MAX);
495 emitPRED (87);
496 emitFMZ (80, 1);
497 }
498
499 void
emitFMUL()500 CodeEmitterGV100::emitFMUL()
501 {
502 emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
503 emitField(80, 1, insn->ftz);
504 emitPDIV (84);
505 emitRND (78);
506 emitSAT (77);
507 emitField(76, 1, insn->dnz);
508 }
509
510 void
emitFSET_BF()511 CodeEmitterGV100::emitFSET_BF()
512 {
513 const CmpInstruction *insn = this->insn->asCmp();
514
515 emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
516 emitFMZ (80, 1);
517 emitCond4(76, insn->setCond);
518
519 if (insn->op != OP_SET) {
520 switch (insn->op) {
521 case OP_SET_AND: emitField(74, 2, 0); break;
522 case OP_SET_OR : emitField(74, 2, 1); break;
523 case OP_SET_XOR: emitField(74, 2, 2); break;
524 default:
525 assert(!"invalid set op");
526 break;
527 }
528 emitNOT (90, insn->src(2));
529 emitPRED(87, insn->src(2));
530 } else {
531 emitPRED(87);
532 }
533 }
534
535 void
emitFSETP()536 CodeEmitterGV100::emitFSETP()
537 {
538 const CmpInstruction *insn = this->insn->asCmp();
539
540 emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
541 emitFMZ (80, 1);
542 emitCond4(76, insn->setCond);
543
544 if (insn->op != OP_SET) {
545 switch (insn->op) {
546 case OP_SET_AND: emitField(74, 2, 0); break;
547 case OP_SET_OR : emitField(74, 2, 1); break;
548 case OP_SET_XOR: emitField(74, 2, 2); break;
549 default:
550 assert(!"invalid set op");
551 break;
552 }
553 emitNOT (90, insn->src(2));
554 emitPRED(87, insn->src(2));
555 } else {
556 emitPRED(87);
557 }
558
559 if (insn->defExists(1))
560 emitPRED(84, insn->def(1));
561 else
562 emitPRED(84);
563 emitPRED(81, insn->def(0));
564 }
565
566 void
emitFSWZADD()567 CodeEmitterGV100::emitFSWZADD()
568 {
569 uint8_t subOp = 0;
570
571 // NP/PN swapped vs SM60
572 for (int i = 0; i < 4; i++) {
573 uint8_t p = ((insn->subOp >> (i * 2)) & 3);
574 if (p == 1 || p == 2)
575 p ^= 3;
576 subOp |= p << (i * 2);
577 }
578
579 emitInsn (0x822);
580 emitFMZ (80, 1);
581 emitRND (78);
582 emitField(77, 1, insn->lanes); /* abused for .ndv */
583 emitGPR (64, insn->src(1));
584 emitField(32, 8, subOp);
585 emitGPR (24, insn->src(0));
586 emitGPR (16, insn->def(0));
587 }
588
589 void
emitMUFU()590 CodeEmitterGV100::emitMUFU()
591 {
592 int mufu = 0;
593
594 switch (insn->op) {
595 case OP_COS : mufu = 0; break;
596 case OP_SIN : mufu = 1; break;
597 case OP_EX2 : mufu = 2; break;
598 case OP_LG2 : mufu = 3; break;
599 case OP_RCP : mufu = 4 + 2 * insn->subOp; break;
600 case OP_RSQ : mufu = 5 + 2 * insn->subOp; break;
601 case OP_SQRT: mufu = 8; break;
602 default:
603 assert(!"invalid mufu");
604 break;
605 }
606
607 emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
608 emitField(74, 4, mufu);
609 }
610
611 /*******************************************************************************
612 * fp64
613 ******************************************************************************/
614
615 void
emitDADD()616 CodeEmitterGV100::emitDADD()
617 {
618 emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
619 emitRND(78);
620 }
621
622 void
emitDFMA()623 CodeEmitterGV100::emitDFMA()
624 {
625 emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
626 emitRND(78);
627 }
628
629 void
emitDMUL()630 CodeEmitterGV100::emitDMUL()
631 {
632 emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
633 emitRND(78);
634 }
635
636 void
emitDSETP()637 CodeEmitterGV100::emitDSETP()
638 {
639 const CmpInstruction *insn = this->insn->asCmp();
640
641 if (insn->src(1).getFile() == FILE_GPR)
642 emitFormA(0x02a, FA_NODEF | FA_RRR , NA(0), NA(1), EMPTY);
643 else
644 emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
645
646 if (insn->op != OP_SET) {
647 switch (insn->op) {
648 case OP_SET_AND: emitField(74, 2, 0); break;
649 case OP_SET_OR : emitField(74, 2, 1); break;
650 case OP_SET_XOR: emitField(74, 2, 2); break;
651 default:
652 assert(!"invalid set op");
653 break;
654 }
655 emitNOT (90, insn->src(2));
656 emitPRED(87, insn->src(2));
657 } else {
658 emitPRED(87);
659 }
660
661 if (insn->defExists(1))
662 emitPRED(84, insn->def(1));
663 else
664 emitPRED(84);
665 emitPRED (81, insn->def(0));
666 emitCond4(76, insn->setCond);
667 }
668
669 /*******************************************************************************
670 * integer
671 ******************************************************************************/
672
673 void
emitBMSK()674 CodeEmitterGV100::emitBMSK()
675 {
676 emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
677 emitField(75, 1, insn->subOp); // .C/.W
678 }
679
680 void
emitBREV()681 CodeEmitterGV100::emitBREV()
682 {
683 emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
684 }
685
686 void
emitFLO()687 CodeEmitterGV100::emitFLO()
688 {
689 emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
690 emitPRED (81);
691 emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
692 emitField(73, 1, isSignedType(insn->dType));
693 emitNOT (63, insn->src(0));
694 }
695
696 void
emitIABS()697 CodeEmitterGV100::emitIABS()
698 {
699 emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
700 }
701
702 void
emitIADD3()703 CodeEmitterGV100::emitIADD3()
704 {
705 // emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2));
706 emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY);
707 emitGPR (64); //XXX: fix when switching back to N_(2)
708 emitPRED (84, NULL); // .CC1
709 emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL);
710 if (insn->flagsSrc >= 0) {
711 emitField(74, 1, 1); // .X
712 emitPRED (87, insn->getSrc(insn->flagsSrc));
713 emitField(77, 4, 0xf); // .X1
714 }
715 }
716
717 void
emitIMAD()718 CodeEmitterGV100::emitIMAD()
719 {
720 emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
721 emitField(73, 1, isSignedType(insn->sType));
722 }
723
724 void
emitIMAD_WIDE()725 CodeEmitterGV100::emitIMAD_WIDE()
726 {
727 emitFormA(0x025, FA_RRR | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
728 emitPRED (81);
729 emitField(73, 1, isSignedType(insn->sType));
730 }
731
732 void
emitISETP()733 CodeEmitterGV100::emitISETP()
734 {
735 const CmpInstruction *insn = this->insn->asCmp();
736
737 emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
738
739 if (insn->op != OP_SET) {
740 switch (insn->op) {
741 case OP_SET_AND: emitField(74, 2, 0); break;
742 case OP_SET_OR : emitField(74, 2, 1); break;
743 case OP_SET_XOR: emitField(74, 2, 2); break;
744 default:
745 assert(!"invalid set op");
746 break;
747 }
748 emitNOT (90, insn->src(2));
749 emitPRED(87, insn->src(2));
750 } else {
751 emitPRED(87);
752 }
753
754 //XXX: CC->pred
755 if (insn->flagsSrc >= 0) {
756 assert(0);
757 emitField(68, 4, 6);
758 } else {
759 emitNOT (71);
760 if (!insn->subOp)
761 emitPRED(68);
762 }
763
764 if (insn->defExists(1))
765 emitPRED(84, insn->def(1));
766 else
767 emitPRED(84);
768 emitPRED (81, insn->def(0));
769 emitCond3(76, insn->setCond);
770 emitField(73, 1, isSignedType(insn->sType));
771
772 if (insn->subOp) { // .EX
773 assert(0);
774 emitField(72, 1, 1);
775 emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2));
776 }
777 }
778
779 void
emitLEA()780 CodeEmitterGV100::emitLEA()
781 {
782 assert(insn->src(1).get()->asImm());
783
784 emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY);
785 emitPRED (81);
786 emitIMMD (75, 5, insn->src(1));
787 emitGPR (64);
788 }
789
790 void
emitLOP3_LUT()791 CodeEmitterGV100::emitLOP3_LUT()
792 {
793 emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2));
794 emitField(90, 1, 1);
795 emitPRED (87);
796 emitPRED (81);
797 emitField(80, 1, 0); // .PAND
798 emitField(72, 8, insn->subOp);
799 }
800
801 void
emitPOPC()802 CodeEmitterGV100::emitPOPC()
803 {
804 emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
805 emitNOT (63, insn->src(0));
806 }
807
808 void
emitSGXT()809 CodeEmitterGV100::emitSGXT()
810 {
811 emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
812 emitField(75, 1, 0); // .W
813 emitField(73, 1, 1); // /.U32
814 }
815
816 void
emitSHF()817 CodeEmitterGV100::emitSHF()
818 {
819 emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
820 emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI));
821 emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R));
822 emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W));
823
824 switch (insn->sType) {
825 case TYPE_S64: emitField(73, 2, 0); break;
826 case TYPE_U64: emitField(73, 2, 1); break;
827 case TYPE_S32: emitField(73, 2, 2); break;
828 case TYPE_U32:
829 default:
830 emitField(73, 2, 3);
831 break;
832 }
833 }
834
835 /*******************************************************************************
836 * load/stores
837 ******************************************************************************/
838
839 void
emitALD()840 CodeEmitterGV100::emitALD()
841 {
842 emitInsn (0x321);
843 emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
844 emitGPR (32, insn->src(0).getIndirect(1));
845 emitO (79);
846 emitField(77, 1, insn->subOp); // .PHYS
847 emitP (76);
848 emitADDR (24, 40, 10, 0, insn->src(0));
849 emitGPR (16, insn->def(0));
850 }
851
852 void
emitAST()853 CodeEmitterGV100::emitAST()
854 {
855 emitInsn (0x322);
856 emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1);
857 emitGPR (64, insn->src(0).getIndirect(1));
858 emitField(77, 1, insn->subOp); // .PHYS
859 emitP (76);
860 emitADDR (24, 40, 10, 0, insn->src(0));
861 emitGPR (32, insn->src(1));
862 }
863
864 void
emitATOM()865 CodeEmitterGV100::emitATOM()
866 {
867 unsigned subOp, dType;
868
869 if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) {
870 emitInsn(0x38a);
871
872 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
873 subOp = 8;
874 else
875 subOp = insn->subOp;
876 emitField(87, 4, subOp);
877
878 switch (insn->dType) {
879 case TYPE_U32 : dType = 0; break;
880 case TYPE_S32 : dType = 1; break;
881 case TYPE_U64 : dType = 2; break;
882 case TYPE_F32 : dType = 3; break;
883 case TYPE_B128: dType = 4; break;
884 case TYPE_S64 : dType = 5; break;
885 default:
886 assert(!"unexpected dType");
887 dType = 0;
888 break;
889 }
890 emitField(73, 3, dType);
891 } else {
892 emitInsn(0x38b);
893
894 switch (insn->dType) {
895 case TYPE_U32: dType = 0; break;
896 case TYPE_U64: dType = 2; break;
897 default:
898 assert(!"unexpected dType");
899 dType = 0;
900 break;
901 }
902 emitField(73, 3, dType);
903 emitGPR (64, insn->src(2));
904 }
905
906 emitPRED (81);
907 if (targ->getChipset() < 0x170) {
908 emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3
909 emitField(77, 2, 3); // .CTA/.SM/.GPU/.SYS
910 } else {
911 emitField(77, 4, 0xa); // .STRONG.SYS
912 }
913 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
914 emitGPR (32, insn->src(1));
915 emitADDR (24, 40, 24, 0, insn->src(0));
916 emitGPR (16, insn->def(0));
917 }
918
919 void
emitATOMS()920 CodeEmitterGV100::emitATOMS()
921 {
922 unsigned dType, subOp;
923
924 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
925 switch (insn->dType) {
926 case TYPE_U32: dType = 0; break;
927 case TYPE_S32: dType = 1; break;
928 case TYPE_U64: dType = 2; break;
929 default: assert(!"unexpected dType"); dType = 0; break;
930 }
931
932 emitInsn (0x38d);
933 emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST
934 emitField(73, 2, dType);
935 emitGPR (64, insn->src(2));
936 } else {
937 emitInsn(0x38c);
938
939 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
940 subOp = 8;
941 else
942 subOp = insn->subOp;
943 emitField(87, 4, subOp);
944
945 switch (insn->dType) {
946 case TYPE_U32: dType = 0; break;
947 case TYPE_S32: dType = 1; break;
948 case TYPE_U64: dType = 2; break;
949 default: assert(!"unexpected dType"); dType = 0; break;
950 }
951
952 emitField(73, 2, dType);
953 }
954
955 emitGPR (32, insn->src(1));
956 emitADDR (24, 40, 24, 0, insn->src(0));
957 emitGPR (16, insn->def(0));
958 }
959
960 void
gv100_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)961 gv100_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
962 {
963 int ipa = entry->ipa;
964 int loc = entry->loc;
965
966 if (data.force_persample_interp &&
967 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
968 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
969 ipa |= NV50_IR_INTERP_CENTROID;
970 }
971
972 int sample;
973 switch (ipa & NV50_IR_INTERP_SAMPLE_MASK) {
974 case NV50_IR_INTERP_DEFAULT : sample = 0; break;
975 case NV50_IR_INTERP_CENTROID: sample = 1; break;
976 case NV50_IR_INTERP_OFFSET : sample = 2; break;
977 default: unreachable("invalid sample mode");
978 }
979
980 int interp;
981 switch (ipa & NV50_IR_INTERP_MODE_MASK) {
982 case NV50_IR_INTERP_LINEAR :
983 case NV50_IR_INTERP_PERSPECTIVE: interp = 0; break;
984 case NV50_IR_INTERP_FLAT : interp = 1; break;
985 case NV50_IR_INTERP_SC : interp = 2; break;
986 default: unreachable("invalid ipa mode");
987 }
988
989 code[loc + 2] &= ~(0xf << 12);
990 code[loc + 2] |= sample << 12;
991 code[loc + 2] |= interp << 14;
992 }
993
994 void
emitIPA()995 CodeEmitterGV100::emitIPA()
996 {
997 emitInsn (0x326);
998 emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL);
999
1000 switch (insn->getInterpMode()) {
1001 case NV50_IR_INTERP_LINEAR :
1002 case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break;
1003 case NV50_IR_INTERP_FLAT : emitField(78, 2, 1); break;
1004 case NV50_IR_INTERP_SC : emitField(78, 2, 2); break;
1005 default:
1006 assert(!"invalid ipa mode");
1007 break;
1008 }
1009
1010 switch (insn->getSampleMode()) {
1011 case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break;
1012 case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break;
1013 case NV50_IR_INTERP_OFFSET : emitField(76, 2, 2); break;
1014 default:
1015 assert(!"invalid sample mode");
1016 break;
1017 }
1018
1019 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) {
1020 emitGPR (32);
1021 addInterp(insn->ipa, 0xff, gv100_interpApply);
1022 } else {
1023 emitGPR (32, insn->src(1));
1024 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gv100_interpApply);
1025 }
1026
1027 assert(!insn->src(0).isIndirect(0));
1028 emitADDR (-1, 64, 8, 2, insn->src(0));
1029 emitGPR (16, insn->def(0));
1030 }
1031
1032 void
emitISBERD()1033 CodeEmitterGV100::emitISBERD()
1034 {
1035 emitInsn(0x923);
1036 emitGPR (24, insn->src(0));
1037 emitGPR (16, insn->def(0));
1038 }
1039
1040 void
emitLDSTc(int posm,int poso)1041 CodeEmitterGV100::emitLDSTc(int posm, int poso)
1042 {
1043 int mode = 0;
1044 int order = 1;
1045 int sm80 = 0;
1046
1047 switch (insn->cache) {
1048 case CACHE_CA: mode = 0; order = 1; sm80 = 0x0; break; // .CTA
1049 case CACHE_CG: mode = 2; order = 2; sm80 = 0x7; break; // .STRONG.GPU
1050 case CACHE_CV: mode = 3; order = 2; sm80 = 0xa; break; // .STRONG.SYS
1051 default:
1052 assert(!"invalid caching mode");
1053 break;
1054 }
1055
1056 if (targ->getChipset() < 0x170) {
1057 emitField(poso, 2, order);
1058 emitField(posm, 2, mode);
1059 } else {
1060 emitField(posm, 4, sm80);
1061 }
1062 }
1063
1064 void
emitLDSTs(int pos,DataType type)1065 CodeEmitterGV100::emitLDSTs(int pos, DataType type)
1066 {
1067 int data = 0;
1068
1069 switch (typeSizeof(type)) {
1070 case 1: data = isSignedType(type) ? 1 : 0; break;
1071 case 2: data = isSignedType(type) ? 3 : 2; break;
1072 case 4: data = 4; break;
1073 case 8: data = 5; break;
1074 case 16: data = 6; break;
1075 default:
1076 assert(!"bad type");
1077 break;
1078 }
1079
1080 emitField(pos, 3, data);
1081 }
1082
1083 void
emitLD()1084 CodeEmitterGV100::emitLD()
1085 {
1086 emitInsn (0x980);
1087 if (targ->getChipset() < 0x170) {
1088 emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO
1089 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1090 } else {
1091 emitField(77, 4, 0x7); // .STRONG.GPU
1092 }
1093 emitLDSTs(73, insn->dType);
1094 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1095 emitADDR (24, 32, 32, 0, insn->src(0));
1096 emitGPR (16, insn->def(0));
1097 }
1098
1099 void
emitLDC()1100 CodeEmitterGV100::emitLDC()
1101 {
1102 emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY);
1103 emitField(78, 2, insn->subOp);
1104 emitLDSTs(73, insn->dType);
1105 emitGPR (24, insn->src(0).getIndirect(0));
1106 }
1107
1108 void
emitLDL()1109 CodeEmitterGV100::emitLDL()
1110 {
1111 emitInsn (0x983);
1112 emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
1113 emitLDSTs(73, insn->dType);
1114 emitADDR (24, 40, 24, 0, insn->src(0));
1115 emitGPR (16, insn->def(0));
1116 }
1117
1118 void
emitLDS()1119 CodeEmitterGV100::emitLDS()
1120 {
1121 emitInsn (0x984);
1122 emitLDSTs(73, insn->dType);
1123 emitADDR (24, 40, 24, 0, insn->src(0));
1124 emitGPR (16, insn->def(0));
1125 }
1126
1127 void
emitOUT()1128 CodeEmitterGV100::emitOUT()
1129 {
1130 const int cut = insn->op == OP_RESTART || insn->subOp;
1131 const int emit = insn->op == OP_EMIT;
1132
1133 if (insn->op != OP_FINAL)
1134 emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY);
1135 else {
1136 emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY);
1137 if (targ->getChipset() >= 0x170)
1138 emitGPR(32);
1139 }
1140 emitField(78, 2, (cut << 1) | emit);
1141 }
1142
1143 void
emitRED()1144 CodeEmitterGV100::emitRED()
1145 {
1146 unsigned dType;
1147
1148 switch (insn->dType) {
1149 case TYPE_U32: dType = 0; break;
1150 case TYPE_S32: dType = 1; break;
1151 case TYPE_U64: dType = 2; break;
1152 case TYPE_F32: dType = 3; break;
1153 case TYPE_B128: dType = 4; break;
1154 case TYPE_S64: dType = 5; break;
1155 default: assert(!"unexpected dType"); dType = 0; break;
1156 }
1157
1158 emitInsn (0x98e);
1159 emitField(87, 3, insn->subOp);
1160 emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
1161 if (targ->getChipset() < 0x170) {
1162 emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3
1163 emitField(77, 2, 3); // .CTA/.SM/.GPU/.SYS
1164 } else {
1165 emitField(77, 4, 0xa); // .STRONG.SYS
1166 }
1167 emitField(73, 3, dType);
1168 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1169 emitGPR (32, insn->src(1));
1170 emitADDR (24, 40, 24, 0, insn->src(0));
1171 }
1172
1173 void
emitST()1174 CodeEmitterGV100::emitST()
1175 {
1176 emitInsn (0x385);
1177 if (targ->getChipset() < 0x170) {
1178 emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO
1179 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1180 } else {
1181 emitField(77, 4, 0x7); // .STRONG.GPU
1182 }
1183 emitLDSTs(73, insn->dType);
1184 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1185 emitGPR (64, insn->src(1));
1186 emitADDR (24, 32, 32, 0, insn->src(0));
1187 }
1188
1189 void
emitSTL()1190 CodeEmitterGV100::emitSTL()
1191 {
1192 emitInsn (0x387);
1193 emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
1194 emitLDSTs(73, insn->dType);
1195 emitADDR (24, 40, 24, 0, insn->src(0));
1196 emitGPR (32, insn->src(1));
1197 }
1198
1199 void
emitSTS()1200 CodeEmitterGV100::emitSTS()
1201 {
1202 emitInsn (0x388);
1203 emitLDSTs(73, insn->dType);
1204 emitADDR (24, 40, 24, 0, insn->src(0));
1205 emitGPR (32, insn->src(1));
1206 }
1207
1208 /*******************************************************************************
1209 * texture
1210 ******************************************************************************/
1211
1212 void
emitTEXs(int pos)1213 CodeEmitterGV100::emitTEXs(int pos)
1214 {
1215 int src1 = insn->predSrc == 1 ? 2 : 1;
1216 if (insn->srcExists(src1))
1217 emitGPR(pos, insn->src(src1));
1218 else
1219 emitGPR(pos);
1220 }
1221
1222 void
emitTEX()1223 CodeEmitterGV100::emitTEX()
1224 {
1225 const TexInstruction *insn = this->insn->asTex();
1226 int lodm = 0;
1227
1228 if (!insn->tex.levelZero) {
1229 switch (insn->op) {
1230 case OP_TEX: lodm = 0; break;
1231 case OP_TXB: lodm = 2; break;
1232 case OP_TXL: lodm = 3; break;
1233 default:
1234 assert(!"invalid tex op");
1235 break;
1236 }
1237 } else {
1238 lodm = 1;
1239 }
1240
1241 if (insn->tex.rIndirectSrc < 0) {
1242 emitInsn (0xb60);
1243 emitField(54, 5, prog->driver->io.auxCBSlot);
1244 emitField(40, 14, insn->tex.r);
1245 } else {
1246 emitInsn (0x361);
1247 emitField(59, 1, 1); // .B
1248 }
1249 emitField(90, 1, insn->tex.liveOnly); // .NODEP
1250 emitField(87, 3, lodm);
1251 emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
1252 emitField(78, 1, insn->tex.target.isShadow()); // .DC
1253 emitField(77, 1, insn->tex.derivAll); // .NDV
1254 emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI
1255 emitPRED (81);
1256 emitGPR (64, insn->def(1));
1257 emitGPR (16, insn->def(0));
1258 emitGPR (24, insn->src(0));
1259 emitTEXs (32);
1260 emitField(63, 1, insn->tex.target.isArray());
1261 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1262 insn->tex.target.getDim() - 1);
1263 emitField(72, 4, insn->tex.mask);
1264 }
1265
1266 void
emitTLD()1267 CodeEmitterGV100::emitTLD()
1268 {
1269 const TexInstruction *insn = this->insn->asTex();
1270
1271 if (insn->tex.rIndirectSrc < 0) {
1272 emitInsn (0xb66);
1273 emitField(54, 5, prog->driver->io.auxCBSlot);
1274 emitField(40, 14, insn->tex.r);
1275 } else {
1276 emitInsn (0x367);
1277 emitField(59, 1, 1); // .B
1278 }
1279 emitField(90, 1, insn->tex.liveOnly);
1280 emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */);
1281 emitPRED (81);
1282 emitField(78, 1, insn->tex.target.isMS());
1283 emitField(76, 1, insn->tex.useOffsets == 1);
1284 emitField(72, 4, insn->tex.mask);
1285 emitGPR (64, insn->def(1));
1286 emitField(63, 1, insn->tex.target.isArray());
1287 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1288 insn->tex.target.getDim() - 1);
1289 emitTEXs (32);
1290 emitGPR (24, insn->src(0));
1291 emitGPR (16, insn->def(0));
1292 }
1293
1294 void
emitTLD4()1295 CodeEmitterGV100::emitTLD4()
1296 {
1297 const TexInstruction *insn = this->insn->asTex();
1298
1299 int offsets = 0;
1300 switch (insn->tex.useOffsets) {
1301 case 4: offsets = 2; break;
1302 case 1: offsets = 1; break;
1303 case 0: offsets = 0; break;
1304 default: assert(!"invalid offsets count"); break;
1305 }
1306
1307 if (insn->tex.rIndirectSrc < 0) {
1308 emitInsn (0xb63);
1309 emitField(54, 5, prog->driver->io.auxCBSlot);
1310 emitField(40, 14, insn->tex.r);
1311 } else {
1312 emitInsn (0x364);
1313 emitField(59, 1, 1); // .B
1314 }
1315 emitField(90, 1, insn->tex.liveOnly);
1316 emitField(87, 2, insn->tex.gatherComp);
1317 emitField(84, 1, 1); // !.EF
1318 emitPRED (81);
1319 emitField(78, 1, insn->tex.target.isShadow());
1320 emitField(76, 2, offsets);
1321 emitField(72, 4, insn->tex.mask);
1322 emitGPR (64, insn->def(1));
1323 emitField(63, 1, insn->tex.target.isArray());
1324 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1325 insn->tex.target.getDim() - 1);
1326 emitTEXs (32);
1327 emitGPR (24, insn->src(0));
1328 emitGPR (16, insn->def(0));
1329 }
1330
1331 void
emitTMML()1332 CodeEmitterGV100::emitTMML()
1333 {
1334 const TexInstruction *insn = this->insn->asTex();
1335
1336 if (insn->tex.rIndirectSrc < 0) {
1337 emitInsn (0xb69);
1338 emitField(54, 5, prog->driver->io.auxCBSlot);
1339 emitField(40, 14, insn->tex.r);
1340 } else {
1341 emitInsn (0x36a);
1342 emitField(59, 1, 1); // .B
1343 }
1344 emitField(90, 1, insn->tex.liveOnly);
1345 emitField(77, 1, insn->tex.derivAll);
1346 emitField(72, 4, insn->tex.mask);
1347 emitGPR (64, insn->def(1));
1348 emitField(63, 1, insn->tex.target.isArray());
1349 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1350 insn->tex.target.getDim() - 1);
1351 emitTEXs (32);
1352 emitGPR (24, insn->src(0));
1353 emitGPR (16, insn->def(0));
1354 }
1355
1356 void
emitTXD()1357 CodeEmitterGV100::emitTXD()
1358 {
1359 const TexInstruction *insn = this->insn->asTex();
1360
1361 if (insn->tex.rIndirectSrc < 0) {
1362 emitInsn (0xb6c);
1363 emitField(54, 5, prog->driver->io.auxCBSlot);
1364 emitField(40, 14, insn->tex.r);
1365 } else {
1366 emitInsn (0x36d);
1367 emitField(59, 1, 1); // .B
1368 }
1369 emitField(90, 1, insn->tex.liveOnly);
1370 emitPRED (81);
1371 emitField(76, 1, insn->tex.useOffsets == 1);
1372 emitField(72, 4, insn->tex.mask);
1373 emitGPR (64, insn->def(1));
1374 emitField(63, 1, insn->tex.target.isArray());
1375 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1376 insn->tex.target.getDim() - 1);
1377 emitTEXs (32);
1378 emitGPR (24, insn->src(0));
1379 emitGPR (16, insn->def(0));
1380 }
1381
1382 void
emitTXQ()1383 CodeEmitterGV100::emitTXQ()
1384 {
1385 const TexInstruction *insn = this->insn->asTex();
1386 int type = 0;
1387
1388 switch (insn->tex.query) {
1389 case TXQ_DIMS : type = 0x00; break;
1390 case TXQ_TYPE : type = 0x01; break;
1391 case TXQ_SAMPLE_POSITION: type = 0x02; break;
1392 default:
1393 assert(!"invalid txq query");
1394 break;
1395 }
1396
1397 if (insn->tex.rIndirectSrc < 0) {
1398 emitInsn (0xb6f);
1399 emitField(54, 5, prog->driver->io.auxCBSlot);
1400 emitField(40, 14, insn->tex.r);
1401 } else {
1402 emitInsn (0x370);
1403 emitField(59, 1, 1); // .B
1404 }
1405 emitField(90, 1, insn->tex.liveOnly);
1406 emitField(72, 4, insn->tex.mask);
1407 emitGPR (64, insn->def(1));
1408 emitField(62, 2, type);
1409 emitGPR (24, insn->src(0));
1410 emitGPR (16, insn->def(0));
1411 }
1412
1413 /*******************************************************************************
1414 * surface
1415 ******************************************************************************/
1416
1417 void
emitSUHandle(const int s)1418 CodeEmitterGV100::emitSUHandle(const int s)
1419 {
1420 const TexInstruction *insn = this->insn->asTex();
1421
1422 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
1423
1424 if (insn->src(s).getFile() == FILE_GPR) {
1425 emitGPR(64, insn->src(s));
1426 } else {
1427 assert(0);
1428 //XXX: not done
1429 ImmediateValue *imm = insn->getSrc(s)->asImm();
1430 assert(imm);
1431 emitField(0x33, 1, 1);
1432 emitField(0x24, 13, imm->reg.data.u32);
1433 }
1434 }
1435
1436 void
emitSUTarget()1437 CodeEmitterGV100::emitSUTarget()
1438 {
1439 const TexInstruction *insn = this->insn->asTex();
1440 int target = 0;
1441
1442 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
1443
1444 if (insn->tex.target == TEX_TARGET_BUFFER) {
1445 target = 1;
1446 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
1447 target = 2;
1448 } else if (insn->tex.target == TEX_TARGET_2D ||
1449 insn->tex.target == TEX_TARGET_RECT) {
1450 target = 3;
1451 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
1452 insn->tex.target == TEX_TARGET_CUBE ||
1453 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
1454 target = 4;
1455 } else if (insn->tex.target == TEX_TARGET_3D) {
1456 target = 5;
1457 } else {
1458 assert(insn->tex.target == TEX_TARGET_1D);
1459 }
1460 emitField(61, 3, target);
1461 }
1462
1463 void
emitSUATOM()1464 CodeEmitterGV100::emitSUATOM()
1465 {
1466 const TexInstruction *insn = this->insn->asTex();
1467 uint8_t type = 0, subOp;
1468
1469 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
1470 emitInsn(0x396); // SUATOM.D.CAS
1471 else
1472 emitInsn(0x394); // SUATOM.D
1473
1474 emitSUTarget();
1475
1476 // destination type
1477 switch (insn->dType) {
1478 case TYPE_S32: type = 1; break;
1479 case TYPE_U64: type = 2; break;
1480 case TYPE_F32: type = 3; break;
1481 case TYPE_S64: type = 5; break;
1482 default:
1483 assert(insn->dType == TYPE_U32);
1484 break;
1485 }
1486
1487 // atomic operation
1488 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
1489 subOp = 0;
1490 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
1491 subOp = 8;
1492 } else {
1493 subOp = insn->subOp;
1494 }
1495
1496 emitField(87, 4, subOp);
1497 emitPRED (81);
1498 if (targ->getChipset() < 0x170)
1499 emitField(79, 2, 1);
1500 emitField(73, 3, type);
1501 emitField(72, 1, 0); // .BA
1502 emitGPR (32, insn->src(1));
1503 emitGPR (24, insn->src(0));
1504 emitGPR (16, insn->def(0));
1505
1506 emitSUHandle(2);
1507 }
1508
1509 void
emitSULD()1510 CodeEmitterGV100::emitSULD()
1511 {
1512 const TexInstruction *insn = this->insn->asTex();
1513 int type = 0;
1514
1515 if (insn->op == OP_SULDB) {
1516 emitInsn(0x99a);
1517 emitSUTarget();
1518
1519 switch (insn->dType) {
1520 case TYPE_U8: type = 0; break;
1521 case TYPE_S8: type = 1; break;
1522 case TYPE_U16: type = 2; break;
1523 case TYPE_S16: type = 3; break;
1524 case TYPE_U32: type = 4; break;
1525 case TYPE_U64: type = 5; break;
1526 case TYPE_B128: type = 6; break;
1527 default:
1528 assert(0);
1529 break;
1530 }
1531 emitField(73, 3, type);
1532 } else {
1533 emitInsn(0x998);
1534 emitSUTarget();
1535 emitField(72, 4, 0xf); // rgba
1536 }
1537
1538 emitPRED (81);
1539 emitLDSTc(77, 79);
1540
1541 emitGPR (16, insn->def(0));
1542 emitGPR (24, insn->src(0));
1543
1544 emitSUHandle(1);
1545 }
1546
1547 void
emitSUST()1548 CodeEmitterGV100::emitSUST()
1549 {
1550 const TexInstruction *insn = this->insn->asTex();
1551
1552 emitInsn(0x99c); // SUST.P
1553 #if 0
1554 if (insn->op == OP_SUSTB)
1555 emitField(0x34, 1, 1);
1556 #endif
1557 emitSUTarget();
1558
1559 emitLDSTc(77, 79);
1560 emitField(72, 4, 0xf); // rgba
1561 emitGPR(32, insn->src(1));
1562 emitGPR(24, insn->src(0));
1563 emitSUHandle(2);
1564 }
1565
1566 /*******************************************************************************
1567 * misc
1568 ******************************************************************************/
1569
1570 void
emitAL2P()1571 CodeEmitterGV100::emitAL2P()
1572 {
1573 emitInsn (0x920);
1574 emitO (79);
1575 emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
1576 emitField(40, 11, insn->src(0).get()->reg.data.offset);
1577 emitGPR (24, insn->src(0).getIndirect(0));
1578 emitGPR (16, insn->def(0));
1579 }
1580
1581 void
emitBAR()1582 CodeEmitterGV100::emitBAR()
1583 {
1584 uint8_t subop, redop = 0x00;
1585
1586 // 80
1587 // 01: DEFER_BLOCKING
1588 // 78:77
1589 // 00: SYNC
1590 // 01: ARV
1591 // 02: RED
1592 // 03: SCAN
1593 // 75:74
1594 // 00: RED.POPC
1595 // 01: RED.AND
1596 // 02: RED.OR
1597
1598 switch (insn->subOp) {
1599 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break;
1600 case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break;
1601 case NV50_IR_SUBOP_BAR_RED_OR : subop = 0x02; redop = 0x02; break;
1602 case NV50_IR_SUBOP_BAR_ARRIVE : subop = 0x01; break;
1603 default:
1604 subop = 0x00;
1605 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
1606 break;
1607 }
1608
1609 if (insn->src(0).getFile() == FILE_GPR) {
1610 emitInsn ((1 << 9) | 0x11d);
1611 emitGPR (32, insn->src(0)); //XXX: nvdisasm shows src0==src1
1612 } else {
1613 ImmediateValue *imm = insn->getSrc(0)->asImm();
1614 assert(imm);
1615 if (insn->src(1).getFile() == FILE_GPR) {
1616 emitInsn ((4 << 9) | 0x11d);
1617 emitGPR (32, insn->src(1));
1618 } else {
1619 emitInsn ((5 << 9) | 0x11d);
1620 }
1621 emitField(54, 4, imm->reg.data.u32);
1622 }
1623
1624 emitField(77, 2, subop);
1625 emitField(74, 2, redop);
1626
1627 if (insn->srcExists(2) && (insn->predSrc != 2)) {
1628 emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
1629 emitPRED (87, insn->src(2));
1630 } else {
1631 emitField(87, 3, 7);
1632 }
1633 }
1634
1635 void
emitCCTL()1636 CodeEmitterGV100::emitCCTL()
1637 {
1638 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL)
1639 emitInsn(0x98f);
1640 else
1641 emitInsn(0x990);
1642 emitField(87, 4, insn->subOp);
1643 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1644 emitADDR (24, 32, 32, 0, insn->src(0));
1645 }
1646
1647 void
emitMEMBAR()1648 CodeEmitterGV100::emitMEMBAR()
1649 {
1650 emitInsn (0x992);
1651 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) {
1652 case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break;
1653 case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break;
1654 case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break;
1655 default:
1656 assert(!"invalid scope");
1657 break;
1658 }
1659 }
1660
1661 void
emitPIXLD()1662 CodeEmitterGV100::emitPIXLD()
1663 {
1664 emitInsn (0x925);
1665 switch (insn->subOp) {
1666 case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK
1667 case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX
1668 default:
1669 assert(0);
1670 break;
1671 }
1672 emitPRED (71);
1673 emitGPR (16, insn->def(0));
1674 }
1675
1676 void
emitPLOP3_LUT()1677 CodeEmitterGV100::emitPLOP3_LUT()
1678 {
1679 uint8_t op[2] = {};
1680
1681 switch (insn->op) {
1682 case OP_AND: op[0] = 0xf0 & 0xcc; break;
1683 case OP_OR : op[0] = 0xf0 | 0xcc; break;
1684 case OP_XOR: op[0] = 0xf0 ^ 0xcc; break;
1685 default:
1686 assert(!"invalid PLOP3");
1687 break;
1688 }
1689
1690 emitInsn(0x81c);
1691 emitNOT (90, insn->src(0));
1692 emitPRED(87, insn->src(0));
1693 emitPRED(84); // def(1)
1694 emitPRED(81, insn->def(0));
1695 emitNOT (80, insn->src(1));
1696 emitPRED(77, insn->src(1));
1697 emitField(72, 5, op[0] >> 3);
1698 emitNOT (71); // src(2)
1699 emitPRED(68); // src(2)
1700 emitField(64, 3, op[0] & 7);
1701 emitField(16, 8, op[1]);
1702 }
1703
1704 void
emitVOTE()1705 CodeEmitterGV100::emitVOTE()
1706 {
1707 const ImmediateValue *imm;
1708 uint32_t u32;
1709
1710 int r = -1, p = -1;
1711 for (int i = 0; insn->defExists(i); i++) {
1712 if (insn->def(i).getFile() == FILE_GPR)
1713 r = i;
1714 else if (insn->def(i).getFile() == FILE_PREDICATE)
1715 p = i;
1716 }
1717
1718 emitInsn (0x806);
1719 emitField(72, 2, insn->subOp);
1720 if (r >= 0)
1721 emitGPR (16, insn->def(r));
1722 else
1723 emitGPR (16);
1724 if (p >= 0)
1725 emitPRED (81, insn->def(p));
1726 else
1727 emitPRED (81);
1728
1729 switch (insn->src(0).getFile()) {
1730 case FILE_PREDICATE:
1731 emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
1732 emitPRED (87, insn->src(0));
1733 break;
1734 case FILE_IMMEDIATE:
1735 imm = insn->getSrc(0)->asImm();
1736 assert(imm);
1737 u32 = imm->reg.data.u32;
1738 assert(u32 == 0 || u32 == 1);
1739 emitField(90, 1, u32 == 0);
1740 emitPRED (87);
1741 break;
1742 default:
1743 assert(!"Unhandled src");
1744 break;
1745 }
1746 }
1747
1748 bool
emitInstruction(Instruction * i)1749 CodeEmitterGV100::emitInstruction(Instruction *i)
1750 {
1751 insn = i;
1752
1753 switch (insn->op) {
1754 case OP_ABS:
1755 assert(!isFloatType(insn->dType));
1756 emitIABS();
1757 break;
1758 case OP_ADD:
1759 if (isFloatType(insn->dType)) {
1760 if (insn->dType == TYPE_F32)
1761 emitFADD();
1762 else
1763 emitDADD();
1764 } else {
1765 emitIADD3();
1766 }
1767 break;
1768 case OP_AFETCH:
1769 emitAL2P();
1770 break;
1771 case OP_AND:
1772 case OP_OR:
1773 case OP_XOR:
1774 if (insn->def(0).getFile() == FILE_PREDICATE) {
1775 emitPLOP3_LUT();
1776 } else {
1777 assert(!"invalid logop");
1778 emitNOP();
1779 }
1780 break;
1781 case OP_ATOM:
1782 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
1783 emitATOMS();
1784 else
1785 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
1786 emitRED();
1787 else
1788 emitATOM();
1789 break;
1790 case OP_BAR:
1791 emitBAR();
1792 break;
1793 case OP_BFIND:
1794 emitFLO();
1795 break;
1796 case OP_BMSK:
1797 emitBMSK();
1798 break;
1799 case OP_BREV:
1800 emitBREV();
1801 break;
1802 case OP_BRA:
1803 case OP_JOIN: //XXX
1804 emitBRA();
1805 break;
1806 case OP_CCTL:
1807 emitCCTL();
1808 break;
1809 case OP_CEIL:
1810 case OP_CVT:
1811 case OP_FLOOR:
1812 case OP_TRUNC:
1813 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
1814 insn->def(0).getFile() == FILE_BARRIER ||
1815 insn->def(0).getFile() == FILE_THREAD_STATE ||
1816 insn->src(0).getFile() == FILE_PREDICATE ||
1817 insn->src(0).getFile() == FILE_BARRIER ||
1818 insn->src(0).getFile() == FILE_THREAD_STATE)) {
1819 emitMOV();
1820 } else if (isFloatType(insn->dType)) {
1821 if (isFloatType(insn->sType)) {
1822 if (insn->sType == insn->dType)
1823 emitFRND();
1824 else
1825 emitF2F();
1826 } else {
1827 emitI2F();
1828 }
1829 } else {
1830 if (isFloatType(insn->sType)) {
1831 emitF2I();
1832 } else {
1833 assert(!"I2I");
1834 emitNOP();
1835 }
1836 }
1837 break;
1838 case OP_COS:
1839 case OP_EX2:
1840 case OP_LG2:
1841 case OP_RCP:
1842 case OP_RSQ:
1843 case OP_SIN:
1844 case OP_SQRT:
1845 emitMUFU();
1846 break;
1847 case OP_DISCARD:
1848 emitKILL();
1849 break;
1850 case OP_EMIT:
1851 case OP_FINAL:
1852 case OP_RESTART:
1853 emitOUT();
1854 break;
1855 case OP_EXIT:
1856 emitEXIT();
1857 break;
1858 case OP_EXPORT:
1859 emitAST();
1860 break;
1861 case OP_FMA:
1862 case OP_MAD:
1863 if (isFloatType(insn->dType)) {
1864 if (insn->dType == TYPE_F32)
1865 emitFFMA();
1866 else
1867 emitDFMA();
1868 } else {
1869 if (typeSizeof(insn->dType) != 8)
1870 emitIMAD();
1871 else
1872 emitIMAD_WIDE();
1873 }
1874 break;
1875 case OP_JOINAT: //XXX
1876 emitNOP();
1877 break;
1878 case OP_LINTERP:
1879 emitIPA();
1880 break;
1881 case OP_LOAD:
1882 switch (insn->src(0).getFile()) {
1883 case FILE_MEMORY_CONST : emitLDC(); break;
1884 case FILE_MEMORY_LOCAL : emitLDL(); break;
1885 case FILE_MEMORY_SHARED: emitLDS(); break;
1886 case FILE_MEMORY_GLOBAL: emitLD(); break;
1887 default:
1888 assert(!"invalid load");
1889 emitNOP();
1890 break;
1891 }
1892 break;
1893 case OP_LOP3_LUT:
1894 emitLOP3_LUT();
1895 break;
1896 case OP_MAX:
1897 case OP_MIN:
1898 if (isFloatType(insn->dType)) {
1899 if (insn->dType == TYPE_F32) {
1900 emitFMNMX();
1901 } else {
1902 assert(!"invalid FMNMX");
1903 emitNOP();
1904 }
1905 } else {
1906 assert(!"invalid MNMX");
1907 emitNOP();
1908 }
1909 break;
1910 case OP_MEMBAR:
1911 emitMEMBAR();
1912 break;
1913 case OP_MOV:
1914 emitMOV();
1915 break;
1916 case OP_MUL:
1917 if (isFloatType(insn->dType)) {
1918 if (insn->dType == TYPE_F32)
1919 emitFMUL();
1920 else
1921 emitDMUL();
1922 } else {
1923 assert(!"invalid IMUL");
1924 emitNOP();
1925 }
1926 break;
1927 case OP_PERMT:
1928 emitPRMT();
1929 break;
1930 case OP_PFETCH:
1931 emitISBERD();
1932 break;
1933 case OP_PIXLD:
1934 emitPIXLD();
1935 break;
1936 case OP_POPCNT:
1937 emitPOPC();
1938 break;
1939 case OP_QUADOP:
1940 emitFSWZADD();
1941 break;
1942 case OP_RDSV:
1943 if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
1944 emitCS2R();
1945 else
1946 emitS2R();
1947 break;
1948 case OP_SELP:
1949 emitSEL();
1950 break;
1951 case OP_SET:
1952 case OP_SET_AND:
1953 case OP_SET_OR:
1954 case OP_SET_XOR:
1955 if (insn->def(0).getFile() != FILE_PREDICATE) {
1956 if (isFloatType(insn->dType)) {
1957 if (insn->dType == TYPE_F32) {
1958 emitFSET_BF();
1959 } else {
1960 assert(!"invalid FSET");
1961 emitNOP();
1962 }
1963 } else {
1964 assert(!"invalid SET");
1965 emitNOP();
1966 }
1967 } else {
1968 if (isFloatType(insn->sType))
1969 if (insn->sType == TYPE_F64)
1970 emitDSETP();
1971 else
1972 emitFSETP();
1973 else
1974 emitISETP();
1975 }
1976 break;
1977 case OP_SGXT:
1978 emitSGXT();
1979 break;
1980 case OP_SHF:
1981 emitSHF();
1982 break;
1983 case OP_SHFL:
1984 emitSHFL();
1985 break;
1986 case OP_SHLADD:
1987 emitLEA();
1988 break;
1989 case OP_STORE:
1990 switch (insn->src(0).getFile()) {
1991 case FILE_MEMORY_LOCAL : emitSTL(); break;
1992 case FILE_MEMORY_SHARED: emitSTS(); break;
1993 case FILE_MEMORY_GLOBAL: emitST(); break;
1994 default:
1995 assert(!"invalid store");
1996 emitNOP();
1997 break;
1998 }
1999 break;
2000 case OP_SULDB:
2001 case OP_SULDP:
2002 emitSULD();
2003 break;
2004 case OP_SUREDB:
2005 case OP_SUREDP:
2006 emitSUATOM();
2007 break;
2008 case OP_SUSTB:
2009 case OP_SUSTP:
2010 emitSUST();
2011 break;
2012 case OP_TEX:
2013 case OP_TXB:
2014 case OP_TXL:
2015 emitTEX();
2016 break;
2017 case OP_TXD:
2018 emitTXD();
2019 break;
2020 case OP_TXF:
2021 emitTLD();
2022 break;
2023 case OP_TXG:
2024 emitTLD4();
2025 break;
2026 case OP_TXLQ:
2027 emitTMML();
2028 break;
2029 case OP_TXQ:
2030 emitTXQ();
2031 break;
2032 case OP_VFETCH:
2033 emitALD();
2034 break;
2035 case OP_VOTE:
2036 emitVOTE();
2037 break;
2038 case OP_WARPSYNC:
2039 emitWARPSYNC();
2040 break;
2041 default:
2042 assert(!"invalid opcode");
2043 emitNOP();
2044 break;
2045 }
2046
2047 code[3] &= 0x000001ff;
2048 code[3] |= insn->sched << 9;
2049 code += 4;
2050 codeSize += 16;
2051 return true;
2052 }
2053
2054 void
prepareEmission(BasicBlock * bb)2055 CodeEmitterGV100::prepareEmission(BasicBlock *bb)
2056 {
2057 Function *func = bb->getFunction();
2058 Instruction *i;
2059 int j;
2060
2061 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
2062
2063 for (; j >= 0; --j) {
2064 BasicBlock *in = func->bbArray[j];
2065 Instruction *exit = in->getExit();
2066
2067 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
2068 in->binSize -= 16;
2069 func->binSize -= 16;
2070
2071 for (++j; j < func->bbCount; ++j)
2072 func->bbArray[j]->binPos -= 16;
2073
2074 in->remove(exit);
2075 }
2076 bb->binPos = in->binPos + in->binSize;
2077 if (in->binSize) // no more no-op branches to bb
2078 break;
2079 }
2080 func->bbArray[func->bbCount++] = bb;
2081
2082 if (!bb->getExit())
2083 return;
2084
2085 for (i = bb->getEntry(); i; i = i->next) {
2086 i->encSize = getMinEncodingSize(i);
2087 bb->binSize += i->encSize;
2088 }
2089
2090 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16));
2091
2092 func->binSize += bb->binSize;
2093 }
2094
2095 void
prepareEmission(Function * func)2096 CodeEmitterGV100::prepareEmission(Function *func)
2097 {
2098 SchedDataCalculatorGM107 sched(targ);
2099 CodeEmitter::prepareEmission(func);
2100 sched.run(func, true, true);
2101 }
2102
2103 void
prepareEmission(Program * prog)2104 CodeEmitterGV100::prepareEmission(Program *prog)
2105 {
2106 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
2107 !fi.end(); fi.next()) {
2108 Function *func = reinterpret_cast<Function *>(fi.get());
2109 func->binPos = prog->binSize;
2110 prepareEmission(func);
2111 prog->binSize += func->binSize;
2112 }
2113
2114 this->prog = prog;
2115 }
2116
CodeEmitterGV100(TargetGV100 * target)2117 CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target)
2118 : CodeEmitter(target), prog(NULL), targ(target), insn(NULL)
2119 {
2120 code = NULL;
2121 codeSize = codeSizeLimit = 0;
2122 relocInfo = NULL;
2123 }
2124 };
2125