• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2011 The Chromium OS authors.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "i915_context.h"
29 #include "i915_fpc.h"
30 #include "i915_reg.h"
31 
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_exec.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_string.h"
39 
40 struct i915_optimize_context {
41    int first_write[TGSI_EXEC_NUM_TEMPS];
42    int last_read[TGSI_EXEC_NUM_TEMPS];
43 };
44 
45 static bool
same_src_dst_reg(struct i915_full_src_register * s1,struct i915_full_dst_register * d1)46 same_src_dst_reg(struct i915_full_src_register *s1,
47                  struct i915_full_dst_register *d1)
48 {
49    return (s1->Register.File == d1->Register.File &&
50            s1->Register.Indirect == d1->Register.Indirect &&
51            s1->Register.Dimension == d1->Register.Dimension &&
52            s1->Register.Index == d1->Register.Index);
53 }
54 
55 static bool
same_dst_reg(struct i915_full_dst_register * d1,struct i915_full_dst_register * d2)56 same_dst_reg(struct i915_full_dst_register *d1,
57              struct i915_full_dst_register *d2)
58 {
59    return (d1->Register.File == d2->Register.File &&
60            d1->Register.Indirect == d2->Register.Indirect &&
61            d1->Register.Dimension == d2->Register.Dimension &&
62            d1->Register.Index == d2->Register.Index);
63 }
64 
65 static bool
same_src_reg(struct i915_full_src_register * d1,struct i915_full_src_register * d2)66 same_src_reg(struct i915_full_src_register *d1,
67              struct i915_full_src_register *d2)
68 {
69    return (d1->Register.File == d2->Register.File &&
70            d1->Register.Indirect == d2->Register.Indirect &&
71            d1->Register.Dimension == d2->Register.Dimension &&
72            d1->Register.Index == d2->Register.Index &&
73            d1->Register.Absolute == d2->Register.Absolute &&
74            d1->Register.Negate == d2->Register.Negate);
75 }
76 
77 static const struct {
78    bool is_texture;
79    bool commutes;
80    unsigned neutral_element;
81    unsigned num_dst;
82    unsigned num_src;
83 } op_table[TGSI_OPCODE_LAST] = {
84    [TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2},
85    [TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1},
86    [TGSI_OPCODE_CMP] = {false, false, 0, 1, 2},
87    [TGSI_OPCODE_COS] = {false, false, 0, 1, 1},
88    [TGSI_OPCODE_DDX] = {false, false, 0, 1, 0},
89    [TGSI_OPCODE_DDY] = {false, false, 0, 1, 0},
90    [TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
91    [TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
92    [TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
93    [TGSI_OPCODE_DST] = {false, false, 0, 1, 2},
94    [TGSI_OPCODE_END] = {false, false, 0, 0, 0},
95    [TGSI_OPCODE_EX2] = {false, false, 0, 1, 1},
96    [TGSI_OPCODE_FLR] = {false, false, 0, 1, 1},
97    [TGSI_OPCODE_FRC] = {false, false, 0, 1, 1},
98    [TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1},
99    [TGSI_OPCODE_KILL] = {false, false, 0, 0, 0},
100    [TGSI_OPCODE_LG2] = {false, false, 0, 1, 1},
101    [TGSI_OPCODE_LIT] = {false, false, 0, 1, 1},
102    [TGSI_OPCODE_LRP] = {false, false, 0, 1, 3},
103    [TGSI_OPCODE_MAX] = {false, false, 0, 1, 2},
104    [TGSI_OPCODE_MAD] = {false, false, 0, 1, 3},
105    [TGSI_OPCODE_MIN] = {false, false, 0, 1, 2},
106    [TGSI_OPCODE_MOV] = {false, false, 0, 1, 1},
107    [TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
108    [TGSI_OPCODE_NOP] = {false, false, 0, 0, 0},
109    [TGSI_OPCODE_POW] = {false, false, 0, 1, 2},
110    [TGSI_OPCODE_RCP] = {false, false, 0, 1, 1},
111    [TGSI_OPCODE_RET] = {false, false, 0, 0, 0},
112    [TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1},
113    [TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2},
114    [TGSI_OPCODE_SGE] = {false, false, 0, 1, 2},
115    [TGSI_OPCODE_SGT] = {false, false, 0, 1, 2},
116    [TGSI_OPCODE_SIN] = {false, false, 0, 1, 1},
117    [TGSI_OPCODE_SLE] = {false, false, 0, 1, 2},
118    [TGSI_OPCODE_SLT] = {false, false, 0, 1, 2},
119    [TGSI_OPCODE_SNE] = {false, false, 0, 1, 2},
120    [TGSI_OPCODE_SSG] = {false, false, 0, 1, 1},
121    [TGSI_OPCODE_TEX] = {true, false, 0, 1, 2},
122    [TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1},
123    [TGSI_OPCODE_TXB] = {true, false, 0, 1, 2},
124    [TGSI_OPCODE_TXP] = {true, false, 0, 1, 2},
125 };
126 
127 static bool
op_has_dst(unsigned opcode)128 op_has_dst(unsigned opcode)
129 {
130    return (op_table[opcode].num_dst > 0);
131 }
132 
133 static int
op_num_dst(unsigned opcode)134 op_num_dst(unsigned opcode)
135 {
136    return op_table[opcode].num_dst;
137 }
138 
139 static int
op_num_src(unsigned opcode)140 op_num_src(unsigned opcode)
141 {
142    return op_table[opcode].num_src;
143 }
144 
145 static bool
op_commutes(unsigned opcode)146 op_commutes(unsigned opcode)
147 {
148    return op_table[opcode].commutes;
149 }
150 
151 static bool
is_unswizzled(struct i915_full_src_register * r,unsigned write_mask)152 is_unswizzled(struct i915_full_src_register *r, unsigned write_mask)
153 {
154    if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
155       return false;
156    if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
157       return false;
158    if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
159       return false;
160    if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
161       return false;
162    return true;
163 }
164 
165 static bool
op_is_texture(unsigned opcode)166 op_is_texture(unsigned opcode)
167 {
168    return op_table[opcode].is_texture;
169 }
170 
171 static unsigned
op_neutral_element(unsigned opcode)172 op_neutral_element(unsigned opcode)
173 {
174    unsigned ne = op_table[opcode].neutral_element;
175    if (!ne) {
176       debug_printf("No neutral element for opcode %d\n", opcode);
177       ne = TGSI_SWIZZLE_ZERO;
178    }
179    return ne;
180 }
181 
182 /*
183  * Sets the swizzle to the neutral element for the operation for the bits
184  * of writemask which are set, swizzle to identity otherwise.
185  */
186 static void
set_neutral_element_swizzle(struct i915_full_src_register * r,unsigned write_mask,unsigned neutral)187 set_neutral_element_swizzle(struct i915_full_src_register *r,
188                             unsigned write_mask, unsigned neutral)
189 {
190    if (write_mask & TGSI_WRITEMASK_X)
191       r->Register.SwizzleX = neutral;
192    else
193       r->Register.SwizzleX = TGSI_SWIZZLE_X;
194 
195    if (write_mask & TGSI_WRITEMASK_Y)
196       r->Register.SwizzleY = neutral;
197    else
198       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
199 
200    if (write_mask & TGSI_WRITEMASK_Z)
201       r->Register.SwizzleZ = neutral;
202    else
203       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
204 
205    if (write_mask & TGSI_WRITEMASK_W)
206       r->Register.SwizzleW = neutral;
207    else
208       r->Register.SwizzleW = TGSI_SWIZZLE_W;
209 }
210 
211 static void
copy_src_reg(struct i915_src_register * o,const struct tgsi_src_register * i)212 copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
213 {
214    o->File = i->File;
215    o->Indirect = i->Indirect;
216    o->Dimension = i->Dimension;
217    o->Index = i->Index;
218    o->SwizzleX = i->SwizzleX;
219    o->SwizzleY = i->SwizzleY;
220    o->SwizzleZ = i->SwizzleZ;
221    o->SwizzleW = i->SwizzleW;
222    o->Absolute = i->Absolute;
223    o->Negate = i->Negate;
224 }
225 
226 static void
copy_dst_reg(struct i915_dst_register * o,const struct tgsi_dst_register * i)227 copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
228 {
229    o->File = i->File;
230    o->WriteMask = i->WriteMask;
231    o->Indirect = i->Indirect;
232    o->Dimension = i->Dimension;
233    o->Index = i->Index;
234 }
235 
236 static void
copy_instruction(struct i915_full_instruction * o,const struct tgsi_full_instruction * i)237 copy_instruction(struct i915_full_instruction *o,
238                  const struct tgsi_full_instruction *i)
239 {
240    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
241    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
242 
243    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
244 
245    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
246    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
247    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
248 }
249 
250 static void
copy_token(union i915_full_token * o,union tgsi_full_token * i)251 copy_token(union i915_full_token *o, union tgsi_full_token *i)
252 {
253    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
254       memcpy(o, i, sizeof(*o));
255    else
256       copy_instruction(&o->FullInstruction, &i->FullInstruction);
257 }
258 
259 static void
liveness_mark_written(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int pos)260 liveness_mark_written(struct i915_optimize_context *ctx,
261                       struct i915_full_dst_register *dst_reg, int pos)
262 {
263    int dst_reg_index;
264    if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
265       dst_reg_index = dst_reg->Register.Index;
266       assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
267       /* dead -> live transition */
268       if (ctx->first_write[dst_reg_index] != -1)
269          ctx->first_write[dst_reg_index] = pos;
270    }
271 }
272 
273 static void
liveness_mark_read(struct i915_optimize_context * ctx,struct i915_full_src_register * src_reg,int pos)274 liveness_mark_read(struct i915_optimize_context *ctx,
275                    struct i915_full_src_register *src_reg, int pos)
276 {
277    int src_reg_index;
278    if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
279       src_reg_index = src_reg->Register.Index;
280       assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
281       /* live -> dead transition */
282       if (ctx->last_read[src_reg_index] != -1)
283          ctx->last_read[src_reg_index] = pos;
284    }
285 }
286 
287 static void
liveness_analysis(struct i915_optimize_context * ctx,struct i915_token_list * tokens)288 liveness_analysis(struct i915_optimize_context *ctx,
289                   struct i915_token_list *tokens)
290 {
291    struct i915_full_dst_register *dst_reg;
292    struct i915_full_src_register *src_reg;
293    union i915_full_token *current;
294    unsigned opcode;
295    int num_dst, num_src;
296    int i = 0;
297 
298    for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
299       ctx->first_write[i] = -1;
300       ctx->last_read[i] = -1;
301    }
302 
303    for (i = 0; i < tokens->NumTokens; i++) {
304       current = &tokens->Tokens[i];
305 
306       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
307          continue;
308 
309       opcode = current->FullInstruction.Instruction.Opcode;
310       num_dst = op_num_dst(opcode);
311 
312       switch (num_dst) {
313       case 1:
314          dst_reg = &current->FullInstruction.Dst[0];
315          liveness_mark_written(ctx, dst_reg, i);
316          FALLTHROUGH;
317       case 0:
318          break;
319       default:
320          debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
321          break;
322       }
323    }
324 
325    for (i = tokens->NumTokens - 1; i >= 0; i--) {
326       current = &tokens->Tokens[i];
327 
328       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
329          continue;
330 
331       opcode = current->FullInstruction.Instruction.Opcode;
332       num_src = op_num_src(opcode);
333 
334       switch (num_src) {
335       case 3:
336          src_reg = &current->FullInstruction.Src[2];
337          liveness_mark_read(ctx, src_reg, i);
338          FALLTHROUGH;
339       case 2:
340          src_reg = &current->FullInstruction.Src[1];
341          liveness_mark_read(ctx, src_reg, i);
342          FALLTHROUGH;
343       case 1:
344          src_reg = &current->FullInstruction.Src[0];
345          liveness_mark_read(ctx, src_reg, i);
346          FALLTHROUGH;
347       case 0:
348          break;
349       default:
350          debug_printf("Op %d has %d src regs\n", opcode, num_src);
351          break;
352       }
353    }
354 }
355 
356 static int
unused_from(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int from)357 unused_from(struct i915_optimize_context *ctx,
358             struct i915_full_dst_register *dst_reg, int from)
359 {
360    int dst_reg_index = dst_reg->Register.Index;
361    assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
362    return (from >= ctx->last_read[dst_reg_index]);
363 }
364 
365 /* Returns a mask with the components used for a texture access instruction */
366 static unsigned
i915_tex_mask(union i915_full_token * instr)367 i915_tex_mask(union i915_full_token *instr)
368 {
369    return i915_coord_mask(instr->FullInstruction.Instruction.Opcode,
370                           instr->FullInstruction.Texture.Texture);
371 }
372 
373 static bool
target_is_texture2d(uint32_t tex)374 target_is_texture2d(uint32_t tex)
375 {
376    switch (tex) {
377    case TGSI_TEXTURE_2D:
378    case TGSI_TEXTURE_RECT:
379       return true;
380    default:
381       return false;
382    }
383 }
384 
385 /*
386  * Optimize away useless indirect texture reads:
387  *    MOV TEMP[0].xy, IN[0].xyyy
388  *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
389  * into:
390  *    TEX TEMP[1], IN[0], SAMP[0], 2D
391  *
392  * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
393  */
394 static void
i915_fpc_optimize_mov_before_tex(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)395 i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
396                                  struct i915_token_list *tokens, int index)
397 {
398    union i915_full_token *current = &tokens->Tokens[index - 1];
399    union i915_full_token *next = &tokens->Tokens[index];
400 
401    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
402        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
403        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
404        op_is_texture(next->FullInstruction.Instruction.Opcode) &&
405        target_is_texture2d(next->FullInstruction.Texture.Texture) &&
406        same_src_dst_reg(&next->FullInstruction.Src[0],
407                         &current->FullInstruction.Dst[0]) &&
408        is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
409        unused_from(ctx, &current->FullInstruction.Dst[0], index)) {
410       memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0],
411              sizeof(struct i915_src_register));
412       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
413    }
414 }
415 
416 /*
417  * Optimize away things like:
418  *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
419  *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
420  * into:
421  *    NOP
422  *    MOV OUT[0].xyw, TEMP[1].xyww
423  */
424 static void
i915_fpc_optimize_mov_after_mov(union i915_full_token * current,union i915_full_token * next)425 i915_fpc_optimize_mov_after_mov(union i915_full_token *current,
426                                 union i915_full_token *next)
427 {
428    struct i915_full_src_register *src_reg1, *src_reg2;
429    struct i915_full_dst_register *dst_reg1, *dst_reg2;
430    unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
431 
432    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
433        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
434        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
435        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
436        current->FullInstruction.Instruction.Saturate ==
437           next->FullInstruction.Instruction.Saturate &&
438        same_dst_reg(&next->FullInstruction.Dst[0],
439                     &current->FullInstruction.Dst[0]) &&
440        same_src_reg(&next->FullInstruction.Src[0],
441                     &current->FullInstruction.Src[0]) &&
442        !same_src_dst_reg(&current->FullInstruction.Src[0],
443                          &current->FullInstruction.Dst[0])) {
444       src_reg1 = &current->FullInstruction.Src[0];
445       dst_reg1 = &current->FullInstruction.Dst[0];
446       src_reg2 = &next->FullInstruction.Src[0];
447       dst_reg2 = &next->FullInstruction.Dst[0];
448 
449       /* Start with swizzles from the first mov */
450       swizzle_x = src_reg1->Register.SwizzleX;
451       swizzle_y = src_reg1->Register.SwizzleY;
452       swizzle_z = src_reg1->Register.SwizzleZ;
453       swizzle_w = src_reg1->Register.SwizzleW;
454 
455       /* Pile the second mov on top */
456       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
457          swizzle_x = src_reg2->Register.SwizzleX;
458       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
459          swizzle_y = src_reg2->Register.SwizzleY;
460       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
461          swizzle_z = src_reg2->Register.SwizzleZ;
462       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
463          swizzle_w = src_reg2->Register.SwizzleW;
464 
465       dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
466       src_reg2->Register.SwizzleX = swizzle_x;
467       src_reg2->Register.SwizzleY = swizzle_y;
468       src_reg2->Register.SwizzleZ = swizzle_z;
469       src_reg2->Register.SwizzleW = swizzle_w;
470 
471       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
472 
473       return;
474    }
475 }
476 
477 /*
478  * Optimize away things like:
479  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
480  *    MOV OUT[0].w, TEMP[2]
481  * into:
482  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
483  * This is useful for optimizing texenv.
484  */
485 static void
i915_fpc_optimize_mov_after_alu(union i915_full_token * current,union i915_full_token * next)486 i915_fpc_optimize_mov_after_alu(union i915_full_token *current,
487                                 union i915_full_token *next)
488 {
489    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
490        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
491        op_commutes(current->FullInstruction.Instruction.Opcode) &&
492        current->FullInstruction.Instruction.Saturate ==
493           next->FullInstruction.Instruction.Saturate &&
494        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
495        same_dst_reg(&next->FullInstruction.Dst[0],
496                     &current->FullInstruction.Dst[0]) &&
497        same_src_reg(&next->FullInstruction.Src[0],
498                     &current->FullInstruction.Src[1]) &&
499        !same_src_dst_reg(&next->FullInstruction.Src[0],
500                          &current->FullInstruction.Dst[0]) &&
501        is_unswizzled(&current->FullInstruction.Src[0],
502                      current->FullInstruction.Dst[0].Register.WriteMask) &&
503        is_unswizzled(&current->FullInstruction.Src[1],
504                      current->FullInstruction.Dst[0].Register.WriteMask) &&
505        is_unswizzled(&next->FullInstruction.Src[0],
506                      next->FullInstruction.Dst[0].Register.WriteMask)) {
507       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
508 
509       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
510       set_neutral_element_swizzle(
511          &current->FullInstruction.Src[0],
512          next->FullInstruction.Dst[0].Register.WriteMask,
513          op_neutral_element(current->FullInstruction.Instruction.Opcode));
514 
515       current->FullInstruction.Dst[0].Register.WriteMask =
516          current->FullInstruction.Dst[0].Register.WriteMask |
517          next->FullInstruction.Dst[0].Register.WriteMask;
518       return;
519    }
520 
521    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
522        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
523        op_commutes(current->FullInstruction.Instruction.Opcode) &&
524        current->FullInstruction.Instruction.Saturate ==
525           next->FullInstruction.Instruction.Saturate &&
526        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
527        same_dst_reg(&next->FullInstruction.Dst[0],
528                     &current->FullInstruction.Dst[0]) &&
529        same_src_reg(&next->FullInstruction.Src[0],
530                     &current->FullInstruction.Src[0]) &&
531        !same_src_dst_reg(&next->FullInstruction.Src[0],
532                          &current->FullInstruction.Dst[0]) &&
533        is_unswizzled(&current->FullInstruction.Src[0],
534                      current->FullInstruction.Dst[0].Register.WriteMask) &&
535        is_unswizzled(&current->FullInstruction.Src[1],
536                      current->FullInstruction.Dst[0].Register.WriteMask) &&
537        is_unswizzled(&next->FullInstruction.Src[0],
538                      next->FullInstruction.Dst[0].Register.WriteMask)) {
539       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
540 
541       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
542       set_neutral_element_swizzle(
543          &current->FullInstruction.Src[1],
544          next->FullInstruction.Dst[0].Register.WriteMask,
545          op_neutral_element(current->FullInstruction.Instruction.Opcode));
546 
547       current->FullInstruction.Dst[0].Register.WriteMask =
548          current->FullInstruction.Dst[0].Register.WriteMask |
549          next->FullInstruction.Dst[0].Register.WriteMask;
550       return;
551    }
552 }
553 
554 /*
555  * Optimize away things like:
556  *    MOV TEMP[0].xyz TEMP[0].xyzx
557  * into:
558  *    NOP
559  */
560 static bool
i915_fpc_useless_mov(union tgsi_full_token * tgsi_current)561 i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
562 {
563    union i915_full_token current;
564    copy_token(&current, tgsi_current);
565    if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
566        current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
567        op_has_dst(current.FullInstruction.Instruction.Opcode) &&
568        !current.FullInstruction.Instruction.Saturate &&
569        current.FullInstruction.Src[0].Register.Absolute == 0 &&
570        current.FullInstruction.Src[0].Register.Negate == 0 &&
571        is_unswizzled(&current.FullInstruction.Src[0],
572                      current.FullInstruction.Dst[0].Register.WriteMask) &&
573        same_src_dst_reg(&current.FullInstruction.Src[0],
574                         &current.FullInstruction.Dst[0])) {
575       return true;
576    }
577    return false;
578 }
579 
580 /*
581  * Optimize away things like:
582  *    *** TEMP[0], TEMP[1], TEMP[2]
583  *    MOV OUT[0] TEMP[0]
584  * into:
585  *    *** OUT[0], TEMP[1], TEMP[2]
586  */
587 static void
i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)588 i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
589                                          struct i915_token_list *tokens,
590                                          int index)
591 {
592    union i915_full_token *current = &tokens->Tokens[index - 1];
593    union i915_full_token *next = &tokens->Tokens[index];
594 
595    // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
596    if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
597        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
598        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
599        op_has_dst(current->FullInstruction.Instruction.Opcode) &&
600        !next->FullInstruction.Instruction.Saturate &&
601        next->FullInstruction.Src[0].Register.Absolute == 0 &&
602        next->FullInstruction.Src[0].Register.Negate == 0 &&
603        unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
604        current->FullInstruction.Dst[0].Register.WriteMask ==
605           TGSI_WRITEMASK_XYZW &&
606        is_unswizzled(&next->FullInstruction.Src[0],
607                      next->FullInstruction.Dst[0].Register.WriteMask) &&
608        current->FullInstruction.Dst[0].Register.WriteMask ==
609           next->FullInstruction.Dst[0].Register.WriteMask &&
610        same_src_dst_reg(&next->FullInstruction.Src[0],
611                         &current->FullInstruction.Dst[0])) {
612       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
613 
614       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
615       return;
616    }
617 }
618 
619 struct i915_token_list *
i915_optimize(const struct tgsi_token * tokens)620 i915_optimize(const struct tgsi_token *tokens)
621 {
622    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
623    struct tgsi_parse_context parse;
624    struct i915_optimize_context *ctx;
625    int i = 0;
626 
627    ctx = malloc(sizeof(*ctx));
628 
629    out_tokens->NumTokens = 0;
630 
631    /* Count the tokens */
632    tgsi_parse_init(&parse, tokens);
633    while (!tgsi_parse_end_of_tokens(&parse)) {
634       tgsi_parse_token(&parse);
635       out_tokens->NumTokens++;
636    }
637    tgsi_parse_free(&parse);
638 
639    /* Allocate our tokens */
640    out_tokens->Tokens =
641       MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
642 
643    tgsi_parse_init(&parse, tokens);
644    while (!tgsi_parse_end_of_tokens(&parse)) {
645       tgsi_parse_token(&parse);
646 
647       if (i915_fpc_useless_mov(&parse.FullToken)) {
648          out_tokens->NumTokens--;
649          continue;
650       }
651 
652       copy_token(&out_tokens->Tokens[i], &parse.FullToken);
653 
654       i++;
655    }
656    tgsi_parse_free(&parse);
657 
658    liveness_analysis(ctx, out_tokens);
659 
660    i = 1;
661    while (i < out_tokens->NumTokens) {
662       i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
663       i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1],
664                                       &out_tokens->Tokens[i]);
665       i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1],
666                                       &out_tokens->Tokens[i]);
667       i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
668       i++;
669    }
670 
671    free(ctx);
672 
673    return out_tokens;
674 }
675 
676 void
i915_optimize_free(struct i915_token_list * tokens)677 i915_optimize_free(struct i915_token_list *tokens)
678 {
679    free(tokens->Tokens);
680    free(tokens);
681 }
682