• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 2012-2013 LunarG, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Chia-I Wu <olv@lunarg.com>
26  */
27 
28 #include "pipe/p_shader_tokens.h"
29 #include "toy_compiler.h"
30 #include "toy_tgsi.h"
31 #include "toy_helpers.h"
32 #include "toy_legalize.h"
33 
34 /**
35  * Lower an instruction to GEN6_OPCODE_SEND(C).
36  */
37 void
toy_compiler_lower_to_send(struct toy_compiler * tc,struct toy_inst * inst,bool sendc,unsigned sfid)38 toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
39                            bool sendc, unsigned sfid)
40 {
41    assert(inst->opcode >= 128);
42 
43    inst->opcode = (sendc) ? GEN6_OPCODE_SENDC : GEN6_OPCODE_SEND;
44 
45    /* thread control is reserved */
46    assert(inst->thread_ctrl == 0);
47 
48    assert(inst->cond_modifier == GEN6_COND_NONE);
49    inst->cond_modifier = sfid;
50 }
51 
52 static int
math_op_to_func(unsigned opcode)53 math_op_to_func(unsigned opcode)
54 {
55    switch (opcode) {
56    case TOY_OPCODE_INV:    return GEN6_MATH_INV;
57    case TOY_OPCODE_LOG:    return GEN6_MATH_LOG;
58    case TOY_OPCODE_EXP:    return GEN6_MATH_EXP;
59    case TOY_OPCODE_SQRT:   return GEN6_MATH_SQRT;
60    case TOY_OPCODE_RSQ:    return GEN6_MATH_RSQ;
61    case TOY_OPCODE_SIN:    return GEN6_MATH_SIN;
62    case TOY_OPCODE_COS:    return GEN6_MATH_COS;
63    case TOY_OPCODE_FDIV:   return GEN6_MATH_FDIV;
64    case TOY_OPCODE_POW:    return GEN6_MATH_POW;
65    case TOY_OPCODE_INT_DIV_QUOTIENT:   return GEN6_MATH_INT_DIV_QUOTIENT;
66    case TOY_OPCODE_INT_DIV_REMAINDER:  return GEN6_MATH_INT_DIV_REMAINDER;
67    default:
68        assert(!"unknown math opcode");
69        return -1;
70    }
71 }
72 
73 /**
74  * Lower virtual math opcodes to GEN6_OPCODE_MATH.
75  */
76 void
toy_compiler_lower_math(struct toy_compiler * tc,struct toy_inst * inst)77 toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
78 {
79    struct toy_dst tmp;
80    int i;
81 
82    /* see commit 250770b74d33bb8625c780a74a89477af033d13a */
83    for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
84       if (tsrc_is_null(inst->src[i]))
85          break;
86 
87       /* no swizzling in align1 */
88       /* XXX how about source modifiers? */
89       if (toy_file_is_virtual(inst->src[i].file) &&
90           !tsrc_is_swizzled(inst->src[i]) &&
91           !inst->src[i].absolute &&
92           !inst->src[i].negate)
93          continue;
94 
95       tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
96       tc_MOV(tc, tmp, inst->src[i]);
97       inst->src[i] = tsrc_from(tmp);
98    }
99 
100    /* FC[0:3] */
101    assert(inst->cond_modifier == GEN6_COND_NONE);
102    inst->cond_modifier = math_op_to_func(inst->opcode);
103    /* FC[4:5] */
104    assert(inst->thread_ctrl == 0);
105    inst->thread_ctrl = 0;
106 
107    inst->opcode = GEN6_OPCODE_MATH;
108    tc_move_inst(tc, inst);
109 
110    /* no writemask in align1 */
111    if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
112       struct toy_dst dst = inst->dst;
113       struct toy_inst *inst2;
114 
115       tmp = tc_alloc_tmp(tc);
116       tmp.type = inst->dst.type;
117       inst->dst = tmp;
118 
119       inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
120       inst2->pred_ctrl = inst->pred_ctrl;
121    }
122 }
123 
124 static uint32_t
absolute_imm(uint32_t imm32,enum toy_type type)125 absolute_imm(uint32_t imm32, enum toy_type type)
126 {
127    union fi val = { .ui = imm32 };
128 
129    switch (type) {
130    case TOY_TYPE_F:
131       val.f = fabs(val.f);
132       break;
133    case TOY_TYPE_D:
134       if (val.i < 0)
135          val.i = -val.i;
136       break;
137    case TOY_TYPE_W:
138       if ((int16_t) (val.ui & 0xffff) < 0)
139          val.i = -((int16_t) (val.ui & 0xffff));
140       break;
141    case TOY_TYPE_V:
142       assert(!"cannot take absoulte of immediates of type V");
143       break;
144    default:
145       break;
146    }
147 
148    return val.ui;
149 }
150 
151 static uint32_t
negate_imm(uint32_t imm32,enum toy_type type)152 negate_imm(uint32_t imm32, enum toy_type type)
153 {
154    union fi val = { .ui = imm32 };
155 
156    switch (type) {
157    case TOY_TYPE_F:
158       val.f = -val.f;
159       break;
160    case TOY_TYPE_D:
161    case TOY_TYPE_UD:
162       val.i = -val.i;
163       break;
164    case TOY_TYPE_W:
165    case TOY_TYPE_UW:
166       val.i = -((int16_t) (val.ui & 0xffff));
167       break;
168    default:
169       assert(!"negate immediate of unknown type");
170       break;
171    }
172 
173    return val.ui;
174 }
175 
176 static void
validate_imm(struct toy_compiler * tc,struct toy_inst * inst)177 validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
178 {
179    bool move_inst = false;
180    int i;
181 
182    for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
183       struct toy_dst tmp;
184 
185       if (tsrc_is_null(inst->src[i]))
186          break;
187 
188       if (inst->src[i].file != TOY_FILE_IMM)
189          continue;
190 
191       if (inst->src[i].absolute) {
192          inst->src[i].val32 =
193             absolute_imm(inst->src[i].val32, inst->src[i].type);
194          inst->src[i].absolute = false;
195       }
196 
197       if (inst->src[i].negate) {
198          inst->src[i].val32 =
199             negate_imm(inst->src[i].val32, inst->src[i].type);
200          inst->src[i].negate = false;
201       }
202 
203       /* this is the last operand */
204       if (i + 1 == ARRAY_SIZE(inst->src) || tsrc_is_null(inst->src[i + 1]))
205          break;
206 
207       /* need to use a temp if this imm is not the last operand */
208       /* TODO we should simply swap the operands if the op is commutative */
209       tmp = tc_alloc_tmp(tc);
210       tmp = tdst_type(tmp, inst->src[i].type);
211       tc_MOV(tc, tmp, inst->src[i]);
212       inst->src[i] = tsrc_from(tmp);
213 
214       move_inst = true;
215    }
216 
217    if (move_inst)
218       tc_move_inst(tc, inst);
219 }
220 
221 static void
lower_opcode_mul(struct toy_compiler * tc,struct toy_inst * inst)222 lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
223 {
224    const enum toy_type inst_type = inst->dst.type;
225    const struct toy_dst acc0 =
226       tdst_type(tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0), inst_type);
227    struct toy_inst *inst2;
228 
229    /* only need to take care of integer multiplications */
230    if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
231       return;
232 
233    /* acc0 = (src0 & 0x0000ffff) * src1 */
234    tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
235 
236    /* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
237    inst2 = tc_add2(tc, GEN6_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
238          inst->src[0], inst->src[1]);
239    inst2->acc_wr_ctrl = true;
240 
241    /* dst = acc0 & 0xffffffff */
242    tc_MOV(tc, inst->dst, tsrc_from(acc0));
243 
244    tc_discard_inst(tc, inst);
245 }
246 
247 static void
lower_opcode_mac(struct toy_compiler * tc,struct toy_inst * inst)248 lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
249 {
250    const enum toy_type inst_type = inst->dst.type;
251 
252    if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
253       const struct toy_dst acc0 = tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0);
254 
255       tc_MOV(tc, acc0, inst->src[2]);
256       inst->src[2] = tsrc_null();
257       tc_move_inst(tc, inst);
258    }
259    else {
260       struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
261       struct toy_inst *inst2;
262 
263       inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
264       lower_opcode_mul(tc, inst2);
265 
266       tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
267 
268       tc_discard_inst(tc, inst);
269    }
270 }
271 
272 /**
273  * Legalize the instructions for register allocation.
274  */
275 void
toy_compiler_legalize_for_ra(struct toy_compiler * tc)276 toy_compiler_legalize_for_ra(struct toy_compiler *tc)
277 {
278    struct toy_inst *inst;
279 
280    tc_head(tc);
281    while ((inst = tc_next(tc)) != NULL) {
282       switch (inst->opcode) {
283       case GEN6_OPCODE_MAC:
284          lower_opcode_mac(tc, inst);
285          break;
286       case GEN6_OPCODE_MAD:
287          /* TODO operands must be floats */
288          break;
289       case GEN6_OPCODE_MUL:
290          lower_opcode_mul(tc, inst);
291          break;
292       default:
293          if (inst->opcode > TOY_OPCODE_LAST_HW)
294             tc_fail(tc, "internal opcodes not lowered");
295       }
296    }
297 
298    /* loop again as the previous pass may add new instructions */
299    tc_head(tc);
300    while ((inst = tc_next(tc)) != NULL) {
301       validate_imm(tc, inst);
302    }
303 }
304 
305 static void
patch_while_jip(struct toy_compiler * tc,struct toy_inst * inst)306 patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
307 {
308    struct toy_inst *inst2;
309    int nest_level, dist;
310 
311    nest_level = 0;
312    dist = -1;
313 
314    /* search backward */
315    LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
316          &tc->instructions, list) {
317       if (inst2->marker) {
318          if (inst2->opcode == TOY_OPCODE_DO) {
319             if (nest_level) {
320                nest_level--;
321             }
322             else {
323                /* the following instruction */
324                dist++;
325                break;
326             }
327          }
328 
329          continue;
330       }
331 
332       if (inst2->opcode == GEN6_OPCODE_WHILE)
333          nest_level++;
334 
335       dist--;
336    }
337 
338    if (ilo_dev_gen(tc->dev) >= ILO_GEN(8))
339       inst->src[1] = tsrc_imm_d(dist * 16);
340    else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
341       inst->src[1] = tsrc_imm_w(dist * 2);
342    else
343       inst->dst = tdst_imm_w(dist * 2);
344 }
345 
346 static void
patch_if_else_jip(struct toy_compiler * tc,struct toy_inst * inst)347 patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
348 {
349    struct toy_inst *inst2;
350    int nest_level, dist;
351    int jip, uip;
352 
353    nest_level = 0;
354    dist = 1;
355    jip = 0;
356    uip = 0;
357 
358    /* search forward */
359    LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
360       if (inst2->marker)
361          continue;
362 
363       if (inst2->opcode == GEN6_OPCODE_ENDIF) {
364          if (nest_level) {
365             nest_level--;
366          }
367          else {
368             uip = dist * 2;
369             if (!jip)
370                jip = uip;
371             break;
372          }
373       }
374       else if (inst2->opcode == GEN6_OPCODE_ELSE &&
375                inst->opcode == GEN6_OPCODE_IF) {
376          if (!nest_level) {
377             /* the following instruction */
378             jip = (dist + 1) * 2;
379 
380             if (ilo_dev_gen(tc->dev) == ILO_GEN(6)) {
381                uip = jip;
382                break;
383             }
384          }
385       }
386       else if (inst2->opcode == GEN6_OPCODE_IF) {
387          nest_level++;
388       }
389 
390       dist++;
391    }
392 
393    if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) {
394       inst->dst.type = TOY_TYPE_D;
395       inst->src[0] = tsrc_imm_d(uip * 8);
396       inst->src[1] = tsrc_imm_d(jip * 8);
397    } else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
398       /* what should the type be? */
399       inst->dst.type = TOY_TYPE_D;
400       inst->src[0].type = TOY_TYPE_D;
401       inst->src[1] = tsrc_imm_d(uip << 16 | jip);
402    } else {
403       inst->dst = tdst_imm_w(jip);
404    }
405 }
406 
407 static void
patch_endif_jip(struct toy_compiler * tc,struct toy_inst * inst)408 patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
409 {
410    struct toy_inst *inst2;
411    bool found = false;
412    int dist = 1;
413 
414    /* search forward for instructions that may enable channels */
415    LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
416       if (inst2->marker)
417          continue;
418 
419       switch (inst2->opcode) {
420       case GEN6_OPCODE_ENDIF:
421       case GEN6_OPCODE_ELSE:
422       case GEN6_OPCODE_WHILE:
423          found = true;
424          break;
425       default:
426          break;
427       }
428 
429       if (found)
430          break;
431 
432       dist++;
433    }
434 
435    /* should we set dist to (dist - 1) or 1? */
436    if (!found)
437       dist = 1;
438 
439    if (ilo_dev_gen(tc->dev) >= ILO_GEN(8))
440       inst->src[1] = tsrc_imm_d(dist * 16);
441    else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
442       inst->src[1] = tsrc_imm_w(dist * 2);
443    else
444       inst->dst = tdst_imm_w(dist * 2);
445 }
446 
447 static void
patch_break_continue_jip(struct toy_compiler * tc,struct toy_inst * inst)448 patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
449 {
450    struct toy_inst *inst2, *inst3;
451    int nest_level, dist, jip, uip;
452 
453    nest_level = 0;
454    dist = 1;
455    jip = 1 * 2;
456    uip = 1 * 2;
457 
458    /* search forward */
459    LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
460       if (inst2->marker) {
461          if (inst2->opcode == TOY_OPCODE_DO)
462             nest_level++;
463          continue;
464       }
465 
466       if (inst2->opcode == GEN6_OPCODE_ELSE ||
467           inst2->opcode == GEN6_OPCODE_ENDIF ||
468           inst2->opcode == GEN6_OPCODE_WHILE) {
469          jip = dist * 2;
470          break;
471       }
472 
473       dist++;
474    }
475 
476    /* go on to determine uip */
477    inst3 = inst2;
478    LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
479       if (inst2->marker) {
480          if (inst2->opcode == TOY_OPCODE_DO)
481             nest_level++;
482          continue;
483       }
484 
485       if (inst2->opcode == GEN6_OPCODE_WHILE) {
486          if (nest_level) {
487             nest_level--;
488          }
489          else {
490             /* the following instruction */
491             if (ilo_dev_gen(tc->dev) == ILO_GEN(6) &&
492                 inst->opcode == GEN6_OPCODE_BREAK)
493                dist++;
494 
495             uip = dist * 2;
496             break;
497          }
498       }
499 
500       dist++;
501    }
502 
503    /* should the type be D or W? */
504    inst->dst.type = TOY_TYPE_D;
505    if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) {
506       inst->src[0] = tsrc_imm_d(uip * 8);
507       inst->src[1] = tsrc_imm_d(jip * 8);
508    } else {
509       inst->src[0].type = TOY_TYPE_D;
510       inst->src[1] = tsrc_imm_d(uip << 16 | jip);
511    }
512 }
513 
514 /**
515  * Legalize the instructions for assembling.
516  */
517 void
toy_compiler_legalize_for_asm(struct toy_compiler * tc)518 toy_compiler_legalize_for_asm(struct toy_compiler *tc)
519 {
520    struct toy_inst *inst;
521    int pc = 0;
522 
523    tc_head(tc);
524    while ((inst = tc_next(tc)) != NULL) {
525       int i;
526 
527       pc++;
528 
529       /*
530        * From the Sandy Bridge PRM, volume 4 part 2, page 112:
531        *
532        *     "Specifically, for instructions with a single source, it only
533        *      uses the first source operand <src0>. In this case, the second
534        *      source operand <src1> must be set to null and also with the same
535        *      type as the first source operand <src0>.  It is a special case
536        *      when <src0> is an immediate, as an immediate <src0> uses DW3 of
537        *      the instruction word, which is normally used by <src1>.  In this
538        *      case, <src1> must be programmed with register file ARF and the
539        *      same data type as <src0>."
540        *
541        * Since we already fill unused operands with null, we only need to take
542        * care of the type.
543        */
544       if (tsrc_is_null(inst->src[1]))
545          inst->src[1].type = inst->src[0].type;
546 
547       switch (inst->opcode) {
548       case GEN6_OPCODE_MATH:
549          /* math does not support align16 nor exec_size > 8 */
550          inst->access_mode = GEN6_ALIGN_1;
551 
552          if (inst->exec_size == GEN6_EXECSIZE_16) {
553             /*
554              * From the Ivy Bridge PRM, volume 4 part 3, page 192:
555              *
556              *     "INT DIV function does not support SIMD16."
557              */
558             if (ilo_dev_gen(tc->dev) < ILO_GEN(7) ||
559                 inst->cond_modifier == GEN6_MATH_INT_DIV_QUOTIENT ||
560                 inst->cond_modifier == GEN6_MATH_INT_DIV_REMAINDER) {
561                struct toy_inst *inst2;
562 
563                inst->exec_size = GEN6_EXECSIZE_8;
564                inst->qtr_ctrl = GEN6_QTRCTRL_1Q;
565 
566                inst2 = tc_duplicate_inst(tc, inst);
567                inst2->qtr_ctrl = GEN6_QTRCTRL_2Q;
568                inst2->dst = tdst_offset(inst2->dst, 1, 0);
569                inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
570                if (!tsrc_is_null(inst2->src[1]))
571                   inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
572 
573                pc++;
574             }
575          }
576          break;
577       case GEN6_OPCODE_IF:
578          if (ilo_dev_gen(tc->dev) >= ILO_GEN(7) &&
579              inst->cond_modifier != GEN6_COND_NONE) {
580             struct toy_inst *inst2;
581 
582             inst2 = tc_duplicate_inst(tc, inst);
583 
584             /* replace the original IF by CMP */
585             inst->opcode = GEN6_OPCODE_CMP;
586 
587             /* predicate control instead of condition modifier */
588             inst2->dst = tdst_null();
589             inst2->src[0] = tsrc_null();
590             inst2->src[1] = tsrc_null();
591             inst2->cond_modifier = GEN6_COND_NONE;
592             inst2->pred_ctrl = GEN6_PREDCTRL_NORMAL;
593 
594             pc++;
595          }
596          break;
597       default:
598          break;
599       }
600 
601       /* MRF to GRF */
602       if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
603          for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
604             if (inst->src[i].file != TOY_FILE_MRF)
605                continue;
606             else if (tsrc_is_null(inst->src[i]))
607                break;
608 
609             inst->src[i].file = TOY_FILE_GRF;
610          }
611 
612          if (inst->dst.file == TOY_FILE_MRF)
613             inst->dst.file = TOY_FILE_GRF;
614       }
615    }
616 
617    tc->num_instructions = pc;
618 
619    /* set JIP/UIP */
620    tc_head(tc);
621    while ((inst = tc_next(tc)) != NULL) {
622       switch (inst->opcode) {
623       case GEN6_OPCODE_IF:
624       case GEN6_OPCODE_ELSE:
625          patch_if_else_jip(tc, inst);
626          break;
627       case GEN6_OPCODE_ENDIF:
628          patch_endif_jip(tc, inst);
629          break;
630       case GEN6_OPCODE_WHILE:
631          patch_while_jip(tc, inst);
632          break;
633       case GEN6_OPCODE_BREAK:
634       case GEN6_OPCODE_CONT:
635          patch_break_continue_jip(tc, inst);
636          break;
637       default:
638          break;
639       }
640    }
641 }
642