• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2011 The Chromium OS authors.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31 
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_exec.h"
39 
40 struct i915_optimize_context
41 {
42    int first_write[TGSI_EXEC_NUM_TEMPS];
43    int last_read[TGSI_EXEC_NUM_TEMPS];
44 };
45 
same_src_dst_reg(struct i915_full_src_register * s1,struct i915_full_dst_register * d1)46 static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
47 {
48    return (s1->Register.File == d1->Register.File &&
49            s1->Register.Indirect == d1->Register.Indirect &&
50            s1->Register.Dimension == d1->Register.Dimension &&
51            s1->Register.Index == d1->Register.Index);
52 }
53 
same_dst_reg(struct i915_full_dst_register * d1,struct i915_full_dst_register * d2)54 static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
55 {
56    return (d1->Register.File == d2->Register.File &&
57            d1->Register.Indirect == d2->Register.Indirect &&
58            d1->Register.Dimension == d2->Register.Dimension &&
59            d1->Register.Index == d2->Register.Index);
60 }
61 
same_src_reg(struct i915_full_src_register * d1,struct i915_full_src_register * d2)62 static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
63 {
64    return (d1->Register.File == d2->Register.File &&
65            d1->Register.Indirect == d2->Register.Indirect &&
66            d1->Register.Dimension == d2->Register.Dimension &&
67            d1->Register.Index == d2->Register.Index &&
68            d1->Register.Absolute == d2->Register.Absolute &&
69            d1->Register.Negate == d2->Register.Negate);
70 }
71 
72 static const struct {
73    boolean is_texture;
74    boolean commutes;
75    unsigned neutral_element;
76    unsigned num_dst;
77    unsigned num_src;
78 } op_table [TGSI_OPCODE_LAST] = {
79    [ TGSI_OPCODE_ADD     ] = { false,   true,  TGSI_SWIZZLE_ZERO,  1,  2 },
80    [ TGSI_OPCODE_CEIL    ] = { false,  false,                  0,  1,  1 },
81    [ TGSI_OPCODE_CMP     ] = { false,  false,                  0,  1,  2 },
82    [ TGSI_OPCODE_COS     ] = { false,  false,                  0,  1,  1 },
83    [ TGSI_OPCODE_DDX     ] = { false,  false,                  0,  1,  0 },
84    [ TGSI_OPCODE_DDY     ] = { false,  false,                  0,  1,  0 },
85    [ TGSI_OPCODE_DP2     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
86    [ TGSI_OPCODE_DP3     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
87    [ TGSI_OPCODE_DP4     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
88    [ TGSI_OPCODE_DPH     ] = { false,  false,                  0,  1,  2 },
89    [ TGSI_OPCODE_DST     ] = { false,  false,                  0,  1,  2 },
90    [ TGSI_OPCODE_END     ] = { false,  false,                  0,  0,  0 },
91    [ TGSI_OPCODE_EX2     ] = { false,  false,                  0,  1,  1 },
92    [ TGSI_OPCODE_FLR     ] = { false,  false,                  0,  1,  1 },
93    [ TGSI_OPCODE_FRC     ] = { false,  false,                  0,  1,  1 },
94    [ TGSI_OPCODE_KILL_IF ] = { false,  false,                  0,  0,  1 },
95    [ TGSI_OPCODE_KILL    ] = { false,  false,                  0,  0,  0 },
96    [ TGSI_OPCODE_LG2     ] = { false,  false,                  0,  1,  1 },
97    [ TGSI_OPCODE_LIT     ] = { false,  false,                  0,  1,  1 },
98    [ TGSI_OPCODE_LRP     ] = { false,  false,                  0,  1,  3 },
99    [ TGSI_OPCODE_MAX     ] = { false,  false,                  0,  1,  2 },
100    [ TGSI_OPCODE_MAD     ] = { false,  false,                  0,  1,  3 },
101    [ TGSI_OPCODE_MIN     ] = { false,  false,                  0,  1,  2 },
102    [ TGSI_OPCODE_MOV     ] = { false,  false,                  0,  1,  1 },
103    [ TGSI_OPCODE_MUL     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
104    [ TGSI_OPCODE_NOP     ] = { false,  false,                  0,  0,  0 },
105    [ TGSI_OPCODE_POW     ] = { false,  false,                  0,  1,  2 },
106    [ TGSI_OPCODE_RCP     ] = { false,  false,                  0,  1,  1 },
107    [ TGSI_OPCODE_RET     ] = { false,  false,                  0,  0,  0 },
108    [ TGSI_OPCODE_RSQ     ] = { false,  false,                  0,  1,  1 },
109    [ TGSI_OPCODE_SCS     ] = { false,  false,                  0,  1,  1 },
110    [ TGSI_OPCODE_SEQ     ] = { false,  false,                  0,  1,  2 },
111    [ TGSI_OPCODE_SGE     ] = { false,  false,                  0,  1,  2 },
112    [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
113    [ TGSI_OPCODE_SIN     ] = { false,  false,                  0,  1,  1 },
114    [ TGSI_OPCODE_SLE     ] = { false,  false,                  0,  1,  2 },
115    [ TGSI_OPCODE_SLT     ] = { false,  false,                  0,  1,  2 },
116    [ TGSI_OPCODE_SNE     ] = { false,  false,                  0,  1,  2 },
117    [ TGSI_OPCODE_SSG     ] = { false,  false,                  0,  1,  1 },
118    [ TGSI_OPCODE_TEX     ] = {  true,  false,                  0,  1,  2 },
119    [ TGSI_OPCODE_TRUNC   ] = { false,  false,                  0,  1,  1 },
120    [ TGSI_OPCODE_TXB     ] = {  true,  false,                  0,  1,  2 },
121    [ TGSI_OPCODE_TXP     ] = {  true,  false,                  0,  1,  2 },
122    [ TGSI_OPCODE_XPD     ] = { false,  false,                  0,  1,  2 },
123 };
124 
op_has_dst(unsigned opcode)125 static boolean op_has_dst(unsigned opcode)
126 {
127    return (op_table[opcode].num_dst > 0);
128 }
129 
op_num_dst(unsigned opcode)130 static int op_num_dst(unsigned opcode)
131 {
132    return op_table[opcode].num_dst;
133 }
134 
op_num_src(unsigned opcode)135 static int op_num_src(unsigned opcode)
136 {
137    return op_table[opcode].num_src;
138 }
139 
op_commutes(unsigned opcode)140 static boolean op_commutes(unsigned opcode)
141 {
142    return op_table[opcode].commutes;
143 }
144 
mask_for_unswizzled(int num_components)145 static unsigned mask_for_unswizzled(int num_components)
146 {
147    unsigned mask = 0;
148    switch(num_components)
149    {
150       case 4:
151          mask |= TGSI_WRITEMASK_W;
152       case 3:
153          mask |= TGSI_WRITEMASK_Z;
154       case 2:
155          mask |= TGSI_WRITEMASK_Y;
156       case 1:
157          mask |= TGSI_WRITEMASK_X;
158    }
159    return mask;
160 }
161 
is_unswizzled(struct i915_full_src_register * r,unsigned write_mask)162 static boolean is_unswizzled(struct i915_full_src_register *r,
163                              unsigned write_mask)
164 {
165    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
166       return FALSE;
167    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
168       return FALSE;
169    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
170       return FALSE;
171    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
172       return FALSE;
173    return TRUE;
174 }
175 
op_is_texture(unsigned opcode)176 static boolean op_is_texture(unsigned opcode)
177 {
178    return op_table[opcode].is_texture;
179 }
180 
op_neutral_element(unsigned opcode)181 static unsigned op_neutral_element(unsigned opcode)
182 {
183    unsigned ne = op_table[opcode].neutral_element;
184    if (!ne) {
185       debug_printf("No neutral element for opcode %d\n",opcode);
186       ne = TGSI_SWIZZLE_ZERO;
187    }
188    return ne;
189 }
190 
191 /*
192  * Sets the swizzle to the neutral element for the operation for the bits
193  * of writemask which are set, swizzle to identity otherwise.
194  */
set_neutral_element_swizzle(struct i915_full_src_register * r,unsigned write_mask,unsigned neutral)195 static void set_neutral_element_swizzle(struct i915_full_src_register *r,
196                                         unsigned write_mask,
197                                         unsigned neutral)
198 {
199    if ( write_mask & TGSI_WRITEMASK_X )
200       r->Register.SwizzleX = neutral;
201    else
202       r->Register.SwizzleX = TGSI_SWIZZLE_X;
203 
204    if ( write_mask & TGSI_WRITEMASK_Y )
205       r->Register.SwizzleY = neutral;
206    else
207       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
208 
209    if ( write_mask & TGSI_WRITEMASK_Z )
210       r->Register.SwizzleZ = neutral;
211    else
212       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
213 
214    if ( write_mask & TGSI_WRITEMASK_W )
215       r->Register.SwizzleW = neutral;
216    else
217       r->Register.SwizzleW = TGSI_SWIZZLE_W;
218 }
219 
copy_src_reg(struct i915_src_register * o,const struct tgsi_src_register * i)220 static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
221 {
222    o->File      = i->File;
223    o->Indirect  = i->Indirect;
224    o->Dimension = i->Dimension;
225    o->Index     = i->Index;
226    o->SwizzleX  = i->SwizzleX;
227    o->SwizzleY  = i->SwizzleY;
228    o->SwizzleZ  = i->SwizzleZ;
229    o->SwizzleW  = i->SwizzleW;
230    o->Absolute  = i->Absolute;
231    o->Negate    = i->Negate;
232 }
233 
copy_dst_reg(struct i915_dst_register * o,const struct tgsi_dst_register * i)234 static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
235 {
236    o->File      = i->File;
237    o->WriteMask = i->WriteMask;
238    o->Indirect  = i->Indirect;
239    o->Dimension = i->Dimension;
240    o->Index     = i->Index;
241 }
242 
copy_instruction(struct i915_full_instruction * o,const struct tgsi_full_instruction * i)243 static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
244 {
245    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
246    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
247 
248    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
249 
250    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
251    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
252    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
253 }
254 
copy_token(union i915_full_token * o,union tgsi_full_token * i)255 static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
256 {
257    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
258       memcpy(o, i, sizeof(*o));
259    else
260       copy_instruction(&o->FullInstruction, &i->FullInstruction);
261 
262 }
263 
liveness_mark_written(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int pos)264 static void liveness_mark_written(struct i915_optimize_context *ctx,
265                                   struct i915_full_dst_register *dst_reg,
266                                   int pos)
267 {
268    int dst_reg_index;
269    if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
270       dst_reg_index = dst_reg->Register.Index;
271       assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
272       /* dead -> live transition */
273       if (ctx->first_write[dst_reg_index] != -1)
274          ctx->first_write[dst_reg_index] = pos;
275    }
276 }
277 
liveness_mark_read(struct i915_optimize_context * ctx,struct i915_full_src_register * src_reg,int pos)278 static void liveness_mark_read(struct i915_optimize_context *ctx,
279                                struct i915_full_src_register *src_reg,
280                                int pos)
281 {
282    int src_reg_index;
283    if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
284       src_reg_index = src_reg->Register.Index;
285       assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
286       /* live -> dead transition */
287       if (ctx->last_read[src_reg_index] != -1)
288          ctx->last_read[src_reg_index] = pos;
289    }
290 }
291 
liveness_analysis(struct i915_optimize_context * ctx,struct i915_token_list * tokens)292 static void liveness_analysis(struct i915_optimize_context *ctx,
293                               struct i915_token_list *tokens)
294 {
295    struct i915_full_dst_register *dst_reg;
296    struct i915_full_src_register *src_reg;
297    union i915_full_token *current;
298    unsigned opcode;
299    int num_dst, num_src;
300    int i = 0;
301 
302    for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
303    {
304       ctx->first_write[i] = -1;
305       ctx->last_read[i] = -1;
306    }
307 
308    for(i = 0; i < tokens->NumTokens; i++)
309    {
310       current = &tokens->Tokens[i];
311 
312       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
313          continue;
314 
315       opcode = current->FullInstruction.Instruction.Opcode;
316       num_dst = op_num_dst(opcode);
317 
318       switch(num_dst)
319       {
320          case 1:
321             dst_reg = &current->FullInstruction.Dst[0];
322             liveness_mark_written(ctx, dst_reg, i);
323          case 0:
324             break;
325          default:
326             debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
327             break;
328       }
329    }
330 
331    for(i = tokens->NumTokens - 1; i >= 0; i--)
332    {
333       current = &tokens->Tokens[i];
334 
335       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
336          continue;
337 
338       opcode = current->FullInstruction.Instruction.Opcode;
339       num_src = op_num_src(opcode);
340 
341       switch(num_src)
342       {
343          case 3:
344             src_reg = &current->FullInstruction.Src[2];
345             liveness_mark_read(ctx, src_reg, i);
346          case 2:
347             src_reg = &current->FullInstruction.Src[1];
348             liveness_mark_read(ctx, src_reg, i);
349          case 1:
350             src_reg = &current->FullInstruction.Src[0];
351             liveness_mark_read(ctx, src_reg, i);
352          case 0:
353             break;
354          default:
355             debug_printf("Op %d has %d src regs\n", opcode, num_src);
356             break;
357       }
358    }
359 }
360 
unused_from(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int from)361 static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
362 {
363    int dst_reg_index = dst_reg->Register.Index;
364    assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
365    return (from >= ctx->last_read[dst_reg_index]);
366 }
367 
368 /* Returns a mask with the components used for a texture access instruction */
i915_tex_mask(union i915_full_token * instr)369 static unsigned i915_tex_mask(union i915_full_token *instr)
370 {
371    unsigned mask;
372 
373    /* Get the number of coords */
374    mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
375 
376    /* Add the W component if projective */
377    if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
378       mask |= TGSI_WRITEMASK_W;
379 
380    return mask;
381 }
382 
target_is_texture2d(uint tex)383 static boolean target_is_texture2d(uint tex)
384 {
385    switch (tex) {
386    case TGSI_TEXTURE_2D:
387    case TGSI_TEXTURE_RECT:
388       return true;
389    default:
390       return false;
391    }
392 }
393 
394 
395 /*
396  * Optimize away useless indirect texture reads:
397  *    MOV TEMP[0].xy, IN[0].xyyy
398  *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
399  * into:
400  *    TEX TEMP[1], IN[0], SAMP[0], 2D
401  *
402  * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
403  */
i915_fpc_optimize_mov_before_tex(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)404 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
405                                              struct i915_token_list *tokens,
406                                              int index)
407 {
408    union i915_full_token *current = &tokens->Tokens[index - 1];
409    union i915_full_token *next = &tokens->Tokens[index];
410 
411    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
412         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
413         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
414         op_is_texture(next->FullInstruction.Instruction.Opcode) &&
415         target_is_texture2d(next->FullInstruction.Texture.Texture) &&
416         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
417         is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
418         unused_from(ctx, &current->FullInstruction.Dst[0], index))
419    {
420       memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
421       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
422    }
423 }
424 
425 /*
426  * Optimize away things like:
427  *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
428  *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
429  * into:
430  *    NOP
431  *    MOV OUT[0].xyw, TEMP[1].xyww
432  */
i915_fpc_optimize_mov_after_mov(union i915_full_token * current,union i915_full_token * next)433 static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
434 {
435    struct i915_full_src_register *src_reg1, *src_reg2;
436    struct i915_full_dst_register *dst_reg1, *dst_reg2;
437    unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
438 
439    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
440         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
441         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
442         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
443         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
444         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
445         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
446         !same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
447    {
448       src_reg1 = &current->FullInstruction.Src[0];
449       dst_reg1 = &current->FullInstruction.Dst[0];
450       src_reg2 = &next->FullInstruction.Src[0];
451       dst_reg2 = &next->FullInstruction.Dst[0];
452 
453       /* Start with swizzles from the first mov */
454       swizzle_x = src_reg1->Register.SwizzleX;
455       swizzle_y = src_reg1->Register.SwizzleY;
456       swizzle_z = src_reg1->Register.SwizzleZ;
457       swizzle_w = src_reg1->Register.SwizzleW;
458 
459       /* Pile the second mov on top */
460       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
461          swizzle_x = src_reg2->Register.SwizzleX;
462       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
463          swizzle_y = src_reg2->Register.SwizzleY;
464       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
465          swizzle_z = src_reg2->Register.SwizzleZ;
466       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
467          swizzle_w = src_reg2->Register.SwizzleW;
468 
469       dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
470       src_reg2->Register.SwizzleX = swizzle_x;
471       src_reg2->Register.SwizzleY = swizzle_y;
472       src_reg2->Register.SwizzleZ = swizzle_z;
473       src_reg2->Register.SwizzleW = swizzle_w;
474 
475       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
476 
477       return;
478    }
479 }
480 
481 /*
482  * Optimize away things like:
483  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
484  *    MOV OUT[0].w, TEMP[2]
485  * into:
486  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
487  * This is useful for optimizing texenv.
488  */
i915_fpc_optimize_mov_after_alu(union i915_full_token * current,union i915_full_token * next)489 static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
490 {
491    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
492         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
493         op_commutes(current->FullInstruction.Instruction.Opcode) &&
494         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
495         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
496         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
497         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
498         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
499         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
500         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
501         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
502    {
503       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
504 
505       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
506       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
507                                   next->FullInstruction.Dst[0].Register.WriteMask,
508                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
509 
510       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
511                                                            next->FullInstruction.Dst[0].Register.WriteMask;
512       return;
513    }
514 
515    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
516         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
517         op_commutes(current->FullInstruction.Instruction.Opcode) &&
518         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
519         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
520         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
521         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
522         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
523         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
524         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
525         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
526    {
527       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
528 
529       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
530       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
531                                   next->FullInstruction.Dst[0].Register.WriteMask,
532                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
533 
534       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
535                                                            next->FullInstruction.Dst[0].Register.WriteMask;
536       return;
537    }
538 }
539 
540 /*
541  * Optimize away things like:
542  *    MOV TEMP[0].xyz TEMP[0].xyzx
543  * into:
544  *    NOP
545  */
i915_fpc_useless_mov(union tgsi_full_token * tgsi_current)546 static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
547 {
548    union i915_full_token current;
549    copy_token(&current , tgsi_current);
550    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
551         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
552         op_has_dst(current.FullInstruction.Instruction.Opcode) &&
553         !current.FullInstruction.Instruction.Saturate &&
554         current.FullInstruction.Src[0].Register.Absolute == 0 &&
555         current.FullInstruction.Src[0].Register.Negate == 0 &&
556         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
557         same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
558    {
559       return TRUE;
560    }
561    return FALSE;
562 }
563 
564 /*
565  * Optimize away things like:
566  *    *** TEMP[0], TEMP[1], TEMP[2]
567  *    MOV OUT[0] TEMP[0]
568  * into:
569  *    *** OUT[0], TEMP[1], TEMP[2]
570  */
i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)571 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
572                                                      struct i915_token_list *tokens,
573                                                      int index)
574 {
575    union i915_full_token *current = &tokens->Tokens[index - 1];
576    union i915_full_token *next = &tokens->Tokens[index];
577 
578    // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
579    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
580         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
581         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
582         op_has_dst(current->FullInstruction.Instruction.Opcode) &&
583         !next->FullInstruction.Instruction.Saturate &&
584         next->FullInstruction.Src[0].Register.Absolute == 0 &&
585         next->FullInstruction.Src[0].Register.Negate == 0 &&
586         unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
587         current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
588         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
589         current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
590         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
591    {
592       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
593 
594       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
595       return;
596    }
597 }
598 
i915_optimize(const struct tgsi_token * tokens)599 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
600 {
601    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
602    struct tgsi_parse_context parse;
603    struct i915_optimize_context *ctx;
604    int i = 0;
605 
606    ctx = malloc(sizeof(*ctx));
607 
608    out_tokens->NumTokens = 0;
609 
610    /* Count the tokens */
611    tgsi_parse_init( &parse, tokens );
612    while( !tgsi_parse_end_of_tokens( &parse ) ) {
613       tgsi_parse_token( &parse );
614       out_tokens->NumTokens++;
615    }
616    tgsi_parse_free (&parse);
617 
618    /* Allocate our tokens */
619    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
620 
621    tgsi_parse_init( &parse, tokens );
622    while( !tgsi_parse_end_of_tokens( &parse ) ) {
623       tgsi_parse_token( &parse );
624 
625       if (i915_fpc_useless_mov(&parse.FullToken)) {
626          out_tokens->NumTokens--;
627          continue;
628       }
629 
630       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
631 
632       i++;
633    }
634    tgsi_parse_free (&parse);
635 
636    liveness_analysis(ctx, out_tokens);
637 
638    i = 1;
639    while( i < out_tokens->NumTokens) {
640       i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
641       i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
642       i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
643       i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
644       i++;
645    }
646 
647    free(ctx);
648 
649    return out_tokens;
650 }
651 
i915_optimize_free(struct i915_token_list * tokens)652 void i915_optimize_free(struct i915_token_list *tokens)
653 {
654    free(tokens->Tokens);
655    free(tokens);
656 }
657 
658 
659