1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "pipe/p_shader_tokens.h"
29 #include "toy_compiler.h"
30 #include "toy_tgsi.h"
31 #include "toy_helpers.h"
32 #include "toy_legalize.h"
33
34 /**
35 * Lower an instruction to GEN6_OPCODE_SEND(C).
36 */
37 void
toy_compiler_lower_to_send(struct toy_compiler * tc,struct toy_inst * inst,bool sendc,unsigned sfid)38 toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
39 bool sendc, unsigned sfid)
40 {
41 assert(inst->opcode >= 128);
42
43 inst->opcode = (sendc) ? GEN6_OPCODE_SENDC : GEN6_OPCODE_SEND;
44
45 /* thread control is reserved */
46 assert(inst->thread_ctrl == 0);
47
48 assert(inst->cond_modifier == GEN6_COND_NONE);
49 inst->cond_modifier = sfid;
50 }
51
52 static int
math_op_to_func(unsigned opcode)53 math_op_to_func(unsigned opcode)
54 {
55 switch (opcode) {
56 case TOY_OPCODE_INV: return GEN6_MATH_INV;
57 case TOY_OPCODE_LOG: return GEN6_MATH_LOG;
58 case TOY_OPCODE_EXP: return GEN6_MATH_EXP;
59 case TOY_OPCODE_SQRT: return GEN6_MATH_SQRT;
60 case TOY_OPCODE_RSQ: return GEN6_MATH_RSQ;
61 case TOY_OPCODE_SIN: return GEN6_MATH_SIN;
62 case TOY_OPCODE_COS: return GEN6_MATH_COS;
63 case TOY_OPCODE_FDIV: return GEN6_MATH_FDIV;
64 case TOY_OPCODE_POW: return GEN6_MATH_POW;
65 case TOY_OPCODE_INT_DIV_QUOTIENT: return GEN6_MATH_INT_DIV_QUOTIENT;
66 case TOY_OPCODE_INT_DIV_REMAINDER: return GEN6_MATH_INT_DIV_REMAINDER;
67 default:
68 assert(!"unknown math opcode");
69 return -1;
70 }
71 }
72
73 /**
74 * Lower virtual math opcodes to GEN6_OPCODE_MATH.
75 */
76 void
toy_compiler_lower_math(struct toy_compiler * tc,struct toy_inst * inst)77 toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
78 {
79 struct toy_dst tmp;
80 int i;
81
82 /* see commit 250770b74d33bb8625c780a74a89477af033d13a */
83 for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
84 if (tsrc_is_null(inst->src[i]))
85 break;
86
87 /* no swizzling in align1 */
88 /* XXX how about source modifiers? */
89 if (toy_file_is_virtual(inst->src[i].file) &&
90 !tsrc_is_swizzled(inst->src[i]) &&
91 !inst->src[i].absolute &&
92 !inst->src[i].negate)
93 continue;
94
95 tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
96 tc_MOV(tc, tmp, inst->src[i]);
97 inst->src[i] = tsrc_from(tmp);
98 }
99
100 /* FC[0:3] */
101 assert(inst->cond_modifier == GEN6_COND_NONE);
102 inst->cond_modifier = math_op_to_func(inst->opcode);
103 /* FC[4:5] */
104 assert(inst->thread_ctrl == 0);
105 inst->thread_ctrl = 0;
106
107 inst->opcode = GEN6_OPCODE_MATH;
108 tc_move_inst(tc, inst);
109
110 /* no writemask in align1 */
111 if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
112 struct toy_dst dst = inst->dst;
113 struct toy_inst *inst2;
114
115 tmp = tc_alloc_tmp(tc);
116 tmp.type = inst->dst.type;
117 inst->dst = tmp;
118
119 inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
120 inst2->pred_ctrl = inst->pred_ctrl;
121 }
122 }
123
124 static uint32_t
absolute_imm(uint32_t imm32,enum toy_type type)125 absolute_imm(uint32_t imm32, enum toy_type type)
126 {
127 union fi val = { .ui = imm32 };
128
129 switch (type) {
130 case TOY_TYPE_F:
131 val.f = fabs(val.f);
132 break;
133 case TOY_TYPE_D:
134 if (val.i < 0)
135 val.i = -val.i;
136 break;
137 case TOY_TYPE_W:
138 if ((int16_t) (val.ui & 0xffff) < 0)
139 val.i = -((int16_t) (val.ui & 0xffff));
140 break;
141 case TOY_TYPE_V:
142 assert(!"cannot take absoulte of immediates of type V");
143 break;
144 default:
145 break;
146 }
147
148 return val.ui;
149 }
150
151 static uint32_t
negate_imm(uint32_t imm32,enum toy_type type)152 negate_imm(uint32_t imm32, enum toy_type type)
153 {
154 union fi val = { .ui = imm32 };
155
156 switch (type) {
157 case TOY_TYPE_F:
158 val.f = -val.f;
159 break;
160 case TOY_TYPE_D:
161 case TOY_TYPE_UD:
162 val.i = -val.i;
163 break;
164 case TOY_TYPE_W:
165 case TOY_TYPE_UW:
166 val.i = -((int16_t) (val.ui & 0xffff));
167 break;
168 default:
169 assert(!"negate immediate of unknown type");
170 break;
171 }
172
173 return val.ui;
174 }
175
176 static void
validate_imm(struct toy_compiler * tc,struct toy_inst * inst)177 validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
178 {
179 bool move_inst = false;
180 int i;
181
182 for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
183 struct toy_dst tmp;
184
185 if (tsrc_is_null(inst->src[i]))
186 break;
187
188 if (inst->src[i].file != TOY_FILE_IMM)
189 continue;
190
191 if (inst->src[i].absolute) {
192 inst->src[i].val32 =
193 absolute_imm(inst->src[i].val32, inst->src[i].type);
194 inst->src[i].absolute = false;
195 }
196
197 if (inst->src[i].negate) {
198 inst->src[i].val32 =
199 negate_imm(inst->src[i].val32, inst->src[i].type);
200 inst->src[i].negate = false;
201 }
202
203 /* this is the last operand */
204 if (i + 1 == ARRAY_SIZE(inst->src) || tsrc_is_null(inst->src[i + 1]))
205 break;
206
207 /* need to use a temp if this imm is not the last operand */
208 /* TODO we should simply swap the operands if the op is commutative */
209 tmp = tc_alloc_tmp(tc);
210 tmp = tdst_type(tmp, inst->src[i].type);
211 tc_MOV(tc, tmp, inst->src[i]);
212 inst->src[i] = tsrc_from(tmp);
213
214 move_inst = true;
215 }
216
217 if (move_inst)
218 tc_move_inst(tc, inst);
219 }
220
221 static void
lower_opcode_mul(struct toy_compiler * tc,struct toy_inst * inst)222 lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
223 {
224 const enum toy_type inst_type = inst->dst.type;
225 const struct toy_dst acc0 =
226 tdst_type(tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0), inst_type);
227 struct toy_inst *inst2;
228
229 /* only need to take care of integer multiplications */
230 if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
231 return;
232
233 /* acc0 = (src0 & 0x0000ffff) * src1 */
234 tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
235
236 /* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
237 inst2 = tc_add2(tc, GEN6_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
238 inst->src[0], inst->src[1]);
239 inst2->acc_wr_ctrl = true;
240
241 /* dst = acc0 & 0xffffffff */
242 tc_MOV(tc, inst->dst, tsrc_from(acc0));
243
244 tc_discard_inst(tc, inst);
245 }
246
247 static void
lower_opcode_mac(struct toy_compiler * tc,struct toy_inst * inst)248 lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
249 {
250 const enum toy_type inst_type = inst->dst.type;
251
252 if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
253 const struct toy_dst acc0 = tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0);
254
255 tc_MOV(tc, acc0, inst->src[2]);
256 inst->src[2] = tsrc_null();
257 tc_move_inst(tc, inst);
258 }
259 else {
260 struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
261 struct toy_inst *inst2;
262
263 inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
264 lower_opcode_mul(tc, inst2);
265
266 tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
267
268 tc_discard_inst(tc, inst);
269 }
270 }
271
272 /**
273 * Legalize the instructions for register allocation.
274 */
275 void
toy_compiler_legalize_for_ra(struct toy_compiler * tc)276 toy_compiler_legalize_for_ra(struct toy_compiler *tc)
277 {
278 struct toy_inst *inst;
279
280 tc_head(tc);
281 while ((inst = tc_next(tc)) != NULL) {
282 switch (inst->opcode) {
283 case GEN6_OPCODE_MAC:
284 lower_opcode_mac(tc, inst);
285 break;
286 case GEN6_OPCODE_MAD:
287 /* TODO operands must be floats */
288 break;
289 case GEN6_OPCODE_MUL:
290 lower_opcode_mul(tc, inst);
291 break;
292 default:
293 if (inst->opcode > TOY_OPCODE_LAST_HW)
294 tc_fail(tc, "internal opcodes not lowered");
295 }
296 }
297
298 /* loop again as the previous pass may add new instructions */
299 tc_head(tc);
300 while ((inst = tc_next(tc)) != NULL) {
301 validate_imm(tc, inst);
302 }
303 }
304
305 static void
patch_while_jip(struct toy_compiler * tc,struct toy_inst * inst)306 patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
307 {
308 struct toy_inst *inst2;
309 int nest_level, dist;
310
311 nest_level = 0;
312 dist = -1;
313
314 /* search backward */
315 LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
316 &tc->instructions, list) {
317 if (inst2->marker) {
318 if (inst2->opcode == TOY_OPCODE_DO) {
319 if (nest_level) {
320 nest_level--;
321 }
322 else {
323 /* the following instruction */
324 dist++;
325 break;
326 }
327 }
328
329 continue;
330 }
331
332 if (inst2->opcode == GEN6_OPCODE_WHILE)
333 nest_level++;
334
335 dist--;
336 }
337
338 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8))
339 inst->src[1] = tsrc_imm_d(dist * 16);
340 else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
341 inst->src[1] = tsrc_imm_w(dist * 2);
342 else
343 inst->dst = tdst_imm_w(dist * 2);
344 }
345
346 static void
patch_if_else_jip(struct toy_compiler * tc,struct toy_inst * inst)347 patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
348 {
349 struct toy_inst *inst2;
350 int nest_level, dist;
351 int jip, uip;
352
353 nest_level = 0;
354 dist = 1;
355 jip = 0;
356 uip = 0;
357
358 /* search forward */
359 LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
360 if (inst2->marker)
361 continue;
362
363 if (inst2->opcode == GEN6_OPCODE_ENDIF) {
364 if (nest_level) {
365 nest_level--;
366 }
367 else {
368 uip = dist * 2;
369 if (!jip)
370 jip = uip;
371 break;
372 }
373 }
374 else if (inst2->opcode == GEN6_OPCODE_ELSE &&
375 inst->opcode == GEN6_OPCODE_IF) {
376 if (!nest_level) {
377 /* the following instruction */
378 jip = (dist + 1) * 2;
379
380 if (ilo_dev_gen(tc->dev) == ILO_GEN(6)) {
381 uip = jip;
382 break;
383 }
384 }
385 }
386 else if (inst2->opcode == GEN6_OPCODE_IF) {
387 nest_level++;
388 }
389
390 dist++;
391 }
392
393 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) {
394 inst->dst.type = TOY_TYPE_D;
395 inst->src[0] = tsrc_imm_d(uip * 8);
396 inst->src[1] = tsrc_imm_d(jip * 8);
397 } else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
398 /* what should the type be? */
399 inst->dst.type = TOY_TYPE_D;
400 inst->src[0].type = TOY_TYPE_D;
401 inst->src[1] = tsrc_imm_d(uip << 16 | jip);
402 } else {
403 inst->dst = tdst_imm_w(jip);
404 }
405 }
406
407 static void
patch_endif_jip(struct toy_compiler * tc,struct toy_inst * inst)408 patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
409 {
410 struct toy_inst *inst2;
411 bool found = false;
412 int dist = 1;
413
414 /* search forward for instructions that may enable channels */
415 LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
416 if (inst2->marker)
417 continue;
418
419 switch (inst2->opcode) {
420 case GEN6_OPCODE_ENDIF:
421 case GEN6_OPCODE_ELSE:
422 case GEN6_OPCODE_WHILE:
423 found = true;
424 break;
425 default:
426 break;
427 }
428
429 if (found)
430 break;
431
432 dist++;
433 }
434
435 /* should we set dist to (dist - 1) or 1? */
436 if (!found)
437 dist = 1;
438
439 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8))
440 inst->src[1] = tsrc_imm_d(dist * 16);
441 else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
442 inst->src[1] = tsrc_imm_w(dist * 2);
443 else
444 inst->dst = tdst_imm_w(dist * 2);
445 }
446
447 static void
patch_break_continue_jip(struct toy_compiler * tc,struct toy_inst * inst)448 patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
449 {
450 struct toy_inst *inst2, *inst3;
451 int nest_level, dist, jip, uip;
452
453 nest_level = 0;
454 dist = 1;
455 jip = 1 * 2;
456 uip = 1 * 2;
457
458 /* search forward */
459 LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
460 if (inst2->marker) {
461 if (inst2->opcode == TOY_OPCODE_DO)
462 nest_level++;
463 continue;
464 }
465
466 if (inst2->opcode == GEN6_OPCODE_ELSE ||
467 inst2->opcode == GEN6_OPCODE_ENDIF ||
468 inst2->opcode == GEN6_OPCODE_WHILE) {
469 jip = dist * 2;
470 break;
471 }
472
473 dist++;
474 }
475
476 /* go on to determine uip */
477 inst3 = inst2;
478 LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
479 if (inst2->marker) {
480 if (inst2->opcode == TOY_OPCODE_DO)
481 nest_level++;
482 continue;
483 }
484
485 if (inst2->opcode == GEN6_OPCODE_WHILE) {
486 if (nest_level) {
487 nest_level--;
488 }
489 else {
490 /* the following instruction */
491 if (ilo_dev_gen(tc->dev) == ILO_GEN(6) &&
492 inst->opcode == GEN6_OPCODE_BREAK)
493 dist++;
494
495 uip = dist * 2;
496 break;
497 }
498 }
499
500 dist++;
501 }
502
503 /* should the type be D or W? */
504 inst->dst.type = TOY_TYPE_D;
505 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) {
506 inst->src[0] = tsrc_imm_d(uip * 8);
507 inst->src[1] = tsrc_imm_d(jip * 8);
508 } else {
509 inst->src[0].type = TOY_TYPE_D;
510 inst->src[1] = tsrc_imm_d(uip << 16 | jip);
511 }
512 }
513
514 /**
515 * Legalize the instructions for assembling.
516 */
517 void
toy_compiler_legalize_for_asm(struct toy_compiler * tc)518 toy_compiler_legalize_for_asm(struct toy_compiler *tc)
519 {
520 struct toy_inst *inst;
521 int pc = 0;
522
523 tc_head(tc);
524 while ((inst = tc_next(tc)) != NULL) {
525 int i;
526
527 pc++;
528
529 /*
530 * From the Sandy Bridge PRM, volume 4 part 2, page 112:
531 *
532 * "Specifically, for instructions with a single source, it only
533 * uses the first source operand <src0>. In this case, the second
534 * source operand <src1> must be set to null and also with the same
535 * type as the first source operand <src0>. It is a special case
536 * when <src0> is an immediate, as an immediate <src0> uses DW3 of
537 * the instruction word, which is normally used by <src1>. In this
538 * case, <src1> must be programmed with register file ARF and the
539 * same data type as <src0>."
540 *
541 * Since we already fill unused operands with null, we only need to take
542 * care of the type.
543 */
544 if (tsrc_is_null(inst->src[1]))
545 inst->src[1].type = inst->src[0].type;
546
547 switch (inst->opcode) {
548 case GEN6_OPCODE_MATH:
549 /* math does not support align16 nor exec_size > 8 */
550 inst->access_mode = GEN6_ALIGN_1;
551
552 if (inst->exec_size == GEN6_EXECSIZE_16) {
553 /*
554 * From the Ivy Bridge PRM, volume 4 part 3, page 192:
555 *
556 * "INT DIV function does not support SIMD16."
557 */
558 if (ilo_dev_gen(tc->dev) < ILO_GEN(7) ||
559 inst->cond_modifier == GEN6_MATH_INT_DIV_QUOTIENT ||
560 inst->cond_modifier == GEN6_MATH_INT_DIV_REMAINDER) {
561 struct toy_inst *inst2;
562
563 inst->exec_size = GEN6_EXECSIZE_8;
564 inst->qtr_ctrl = GEN6_QTRCTRL_1Q;
565
566 inst2 = tc_duplicate_inst(tc, inst);
567 inst2->qtr_ctrl = GEN6_QTRCTRL_2Q;
568 inst2->dst = tdst_offset(inst2->dst, 1, 0);
569 inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
570 if (!tsrc_is_null(inst2->src[1]))
571 inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
572
573 pc++;
574 }
575 }
576 break;
577 case GEN6_OPCODE_IF:
578 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7) &&
579 inst->cond_modifier != GEN6_COND_NONE) {
580 struct toy_inst *inst2;
581
582 inst2 = tc_duplicate_inst(tc, inst);
583
584 /* replace the original IF by CMP */
585 inst->opcode = GEN6_OPCODE_CMP;
586
587 /* predicate control instead of condition modifier */
588 inst2->dst = tdst_null();
589 inst2->src[0] = tsrc_null();
590 inst2->src[1] = tsrc_null();
591 inst2->cond_modifier = GEN6_COND_NONE;
592 inst2->pred_ctrl = GEN6_PREDCTRL_NORMAL;
593
594 pc++;
595 }
596 break;
597 default:
598 break;
599 }
600
601 /* MRF to GRF */
602 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
603 for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
604 if (inst->src[i].file != TOY_FILE_MRF)
605 continue;
606 else if (tsrc_is_null(inst->src[i]))
607 break;
608
609 inst->src[i].file = TOY_FILE_GRF;
610 }
611
612 if (inst->dst.file == TOY_FILE_MRF)
613 inst->dst.file = TOY_FILE_GRF;
614 }
615 }
616
617 tc->num_instructions = pc;
618
619 /* set JIP/UIP */
620 tc_head(tc);
621 while ((inst = tc_next(tc)) != NULL) {
622 switch (inst->opcode) {
623 case GEN6_OPCODE_IF:
624 case GEN6_OPCODE_ELSE:
625 patch_if_else_jip(tc, inst);
626 break;
627 case GEN6_OPCODE_ENDIF:
628 patch_endif_jip(tc, inst);
629 break;
630 case GEN6_OPCODE_WHILE:
631 patch_while_jip(tc, inst);
632 break;
633 case GEN6_OPCODE_BREAK:
634 case GEN6_OPCODE_CONT:
635 patch_break_continue_jip(tc, inst);
636 break;
637 default:
638 break;
639 }
640 }
641 }
642