1 /**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "i915_context.h"
29 #include "i915_fpc.h"
30 #include "i915_reg.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_exec.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_string.h"
39
40 struct i915_optimize_context {
41 int first_write[TGSI_EXEC_NUM_TEMPS];
42 int last_read[TGSI_EXEC_NUM_TEMPS];
43 };
44
45 static bool
same_src_dst_reg(struct i915_full_src_register * s1,struct i915_full_dst_register * d1)46 same_src_dst_reg(struct i915_full_src_register *s1,
47 struct i915_full_dst_register *d1)
48 {
49 return (s1->Register.File == d1->Register.File &&
50 s1->Register.Indirect == d1->Register.Indirect &&
51 s1->Register.Dimension == d1->Register.Dimension &&
52 s1->Register.Index == d1->Register.Index);
53 }
54
55 static bool
same_dst_reg(struct i915_full_dst_register * d1,struct i915_full_dst_register * d2)56 same_dst_reg(struct i915_full_dst_register *d1,
57 struct i915_full_dst_register *d2)
58 {
59 return (d1->Register.File == d2->Register.File &&
60 d1->Register.Indirect == d2->Register.Indirect &&
61 d1->Register.Dimension == d2->Register.Dimension &&
62 d1->Register.Index == d2->Register.Index);
63 }
64
65 static bool
same_src_reg(struct i915_full_src_register * d1,struct i915_full_src_register * d2)66 same_src_reg(struct i915_full_src_register *d1,
67 struct i915_full_src_register *d2)
68 {
69 return (d1->Register.File == d2->Register.File &&
70 d1->Register.Indirect == d2->Register.Indirect &&
71 d1->Register.Dimension == d2->Register.Dimension &&
72 d1->Register.Index == d2->Register.Index &&
73 d1->Register.Absolute == d2->Register.Absolute &&
74 d1->Register.Negate == d2->Register.Negate);
75 }
76
77 static const struct {
78 bool is_texture;
79 bool commutes;
80 unsigned neutral_element;
81 unsigned num_dst;
82 unsigned num_src;
83 } op_table[TGSI_OPCODE_LAST] = {
84 [TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2},
85 [TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1},
86 [TGSI_OPCODE_CMP] = {false, false, 0, 1, 2},
87 [TGSI_OPCODE_COS] = {false, false, 0, 1, 1},
88 [TGSI_OPCODE_DDX] = {false, false, 0, 1, 0},
89 [TGSI_OPCODE_DDY] = {false, false, 0, 1, 0},
90 [TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
91 [TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
92 [TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
93 [TGSI_OPCODE_DST] = {false, false, 0, 1, 2},
94 [TGSI_OPCODE_END] = {false, false, 0, 0, 0},
95 [TGSI_OPCODE_EX2] = {false, false, 0, 1, 1},
96 [TGSI_OPCODE_FLR] = {false, false, 0, 1, 1},
97 [TGSI_OPCODE_FRC] = {false, false, 0, 1, 1},
98 [TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1},
99 [TGSI_OPCODE_KILL] = {false, false, 0, 0, 0},
100 [TGSI_OPCODE_LG2] = {false, false, 0, 1, 1},
101 [TGSI_OPCODE_LIT] = {false, false, 0, 1, 1},
102 [TGSI_OPCODE_LRP] = {false, false, 0, 1, 3},
103 [TGSI_OPCODE_MAX] = {false, false, 0, 1, 2},
104 [TGSI_OPCODE_MAD] = {false, false, 0, 1, 3},
105 [TGSI_OPCODE_MIN] = {false, false, 0, 1, 2},
106 [TGSI_OPCODE_MOV] = {false, false, 0, 1, 1},
107 [TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
108 [TGSI_OPCODE_NOP] = {false, false, 0, 0, 0},
109 [TGSI_OPCODE_POW] = {false, false, 0, 1, 2},
110 [TGSI_OPCODE_RCP] = {false, false, 0, 1, 1},
111 [TGSI_OPCODE_RET] = {false, false, 0, 0, 0},
112 [TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1},
113 [TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2},
114 [TGSI_OPCODE_SGE] = {false, false, 0, 1, 2},
115 [TGSI_OPCODE_SGT] = {false, false, 0, 1, 2},
116 [TGSI_OPCODE_SIN] = {false, false, 0, 1, 1},
117 [TGSI_OPCODE_SLE] = {false, false, 0, 1, 2},
118 [TGSI_OPCODE_SLT] = {false, false, 0, 1, 2},
119 [TGSI_OPCODE_SNE] = {false, false, 0, 1, 2},
120 [TGSI_OPCODE_SSG] = {false, false, 0, 1, 1},
121 [TGSI_OPCODE_TEX] = {true, false, 0, 1, 2},
122 [TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1},
123 [TGSI_OPCODE_TXB] = {true, false, 0, 1, 2},
124 [TGSI_OPCODE_TXP] = {true, false, 0, 1, 2},
125 };
126
127 static bool
op_has_dst(unsigned opcode)128 op_has_dst(unsigned opcode)
129 {
130 return (op_table[opcode].num_dst > 0);
131 }
132
133 static int
op_num_dst(unsigned opcode)134 op_num_dst(unsigned opcode)
135 {
136 return op_table[opcode].num_dst;
137 }
138
139 static int
op_num_src(unsigned opcode)140 op_num_src(unsigned opcode)
141 {
142 return op_table[opcode].num_src;
143 }
144
145 static bool
op_commutes(unsigned opcode)146 op_commutes(unsigned opcode)
147 {
148 return op_table[opcode].commutes;
149 }
150
151 static unsigned
mask_for_unswizzled(int num_components)152 mask_for_unswizzled(int num_components)
153 {
154 unsigned mask = 0;
155 switch (num_components) {
156 case 4:
157 mask |= TGSI_WRITEMASK_W;
158 FALLTHROUGH;
159 case 3:
160 mask |= TGSI_WRITEMASK_Z;
161 FALLTHROUGH;
162 case 2:
163 mask |= TGSI_WRITEMASK_Y;
164 FALLTHROUGH;
165 case 1:
166 mask |= TGSI_WRITEMASK_X;
167 }
168 return mask;
169 }
170
171 static bool
is_unswizzled(struct i915_full_src_register * r,unsigned write_mask)172 is_unswizzled(struct i915_full_src_register *r, unsigned write_mask)
173 {
174 if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
175 return false;
176 if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
177 return false;
178 if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
179 return false;
180 if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
181 return false;
182 return true;
183 }
184
185 static bool
op_is_texture(unsigned opcode)186 op_is_texture(unsigned opcode)
187 {
188 return op_table[opcode].is_texture;
189 }
190
191 static unsigned
op_neutral_element(unsigned opcode)192 op_neutral_element(unsigned opcode)
193 {
194 unsigned ne = op_table[opcode].neutral_element;
195 if (!ne) {
196 debug_printf("No neutral element for opcode %d\n", opcode);
197 ne = TGSI_SWIZZLE_ZERO;
198 }
199 return ne;
200 }
201
202 /*
203 * Sets the swizzle to the neutral element for the operation for the bits
204 * of writemask which are set, swizzle to identity otherwise.
205 */
206 static void
set_neutral_element_swizzle(struct i915_full_src_register * r,unsigned write_mask,unsigned neutral)207 set_neutral_element_swizzle(struct i915_full_src_register *r,
208 unsigned write_mask, unsigned neutral)
209 {
210 if (write_mask & TGSI_WRITEMASK_X)
211 r->Register.SwizzleX = neutral;
212 else
213 r->Register.SwizzleX = TGSI_SWIZZLE_X;
214
215 if (write_mask & TGSI_WRITEMASK_Y)
216 r->Register.SwizzleY = neutral;
217 else
218 r->Register.SwizzleY = TGSI_SWIZZLE_Y;
219
220 if (write_mask & TGSI_WRITEMASK_Z)
221 r->Register.SwizzleZ = neutral;
222 else
223 r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
224
225 if (write_mask & TGSI_WRITEMASK_W)
226 r->Register.SwizzleW = neutral;
227 else
228 r->Register.SwizzleW = TGSI_SWIZZLE_W;
229 }
230
231 static void
copy_src_reg(struct i915_src_register * o,const struct tgsi_src_register * i)232 copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
233 {
234 o->File = i->File;
235 o->Indirect = i->Indirect;
236 o->Dimension = i->Dimension;
237 o->Index = i->Index;
238 o->SwizzleX = i->SwizzleX;
239 o->SwizzleY = i->SwizzleY;
240 o->SwizzleZ = i->SwizzleZ;
241 o->SwizzleW = i->SwizzleW;
242 o->Absolute = i->Absolute;
243 o->Negate = i->Negate;
244 }
245
246 static void
copy_dst_reg(struct i915_dst_register * o,const struct tgsi_dst_register * i)247 copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
248 {
249 o->File = i->File;
250 o->WriteMask = i->WriteMask;
251 o->Indirect = i->Indirect;
252 o->Dimension = i->Dimension;
253 o->Index = i->Index;
254 }
255
256 static void
copy_instruction(struct i915_full_instruction * o,const struct tgsi_full_instruction * i)257 copy_instruction(struct i915_full_instruction *o,
258 const struct tgsi_full_instruction *i)
259 {
260 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
261 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
262
263 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
264
265 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
266 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
267 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
268 }
269
270 static void
copy_token(union i915_full_token * o,union tgsi_full_token * i)271 copy_token(union i915_full_token *o, union tgsi_full_token *i)
272 {
273 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
274 memcpy(o, i, sizeof(*o));
275 else
276 copy_instruction(&o->FullInstruction, &i->FullInstruction);
277 }
278
279 static void
liveness_mark_written(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int pos)280 liveness_mark_written(struct i915_optimize_context *ctx,
281 struct i915_full_dst_register *dst_reg, int pos)
282 {
283 int dst_reg_index;
284 if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
285 dst_reg_index = dst_reg->Register.Index;
286 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
287 /* dead -> live transition */
288 if (ctx->first_write[dst_reg_index] != -1)
289 ctx->first_write[dst_reg_index] = pos;
290 }
291 }
292
293 static void
liveness_mark_read(struct i915_optimize_context * ctx,struct i915_full_src_register * src_reg,int pos)294 liveness_mark_read(struct i915_optimize_context *ctx,
295 struct i915_full_src_register *src_reg, int pos)
296 {
297 int src_reg_index;
298 if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
299 src_reg_index = src_reg->Register.Index;
300 assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
301 /* live -> dead transition */
302 if (ctx->last_read[src_reg_index] != -1)
303 ctx->last_read[src_reg_index] = pos;
304 }
305 }
306
307 static void
liveness_analysis(struct i915_optimize_context * ctx,struct i915_token_list * tokens)308 liveness_analysis(struct i915_optimize_context *ctx,
309 struct i915_token_list *tokens)
310 {
311 struct i915_full_dst_register *dst_reg;
312 struct i915_full_src_register *src_reg;
313 union i915_full_token *current;
314 unsigned opcode;
315 int num_dst, num_src;
316 int i = 0;
317
318 for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
319 ctx->first_write[i] = -1;
320 ctx->last_read[i] = -1;
321 }
322
323 for (i = 0; i < tokens->NumTokens; i++) {
324 current = &tokens->Tokens[i];
325
326 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
327 continue;
328
329 opcode = current->FullInstruction.Instruction.Opcode;
330 num_dst = op_num_dst(opcode);
331
332 switch (num_dst) {
333 case 1:
334 dst_reg = ¤t->FullInstruction.Dst[0];
335 liveness_mark_written(ctx, dst_reg, i);
336 case 0:
337 break;
338 default:
339 debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
340 break;
341 }
342 }
343
344 for (i = tokens->NumTokens - 1; i >= 0; i--) {
345 current = &tokens->Tokens[i];
346
347 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
348 continue;
349
350 opcode = current->FullInstruction.Instruction.Opcode;
351 num_src = op_num_src(opcode);
352
353 switch (num_src) {
354 case 3:
355 src_reg = ¤t->FullInstruction.Src[2];
356 liveness_mark_read(ctx, src_reg, i);
357 FALLTHROUGH;
358 case 2:
359 src_reg = ¤t->FullInstruction.Src[1];
360 liveness_mark_read(ctx, src_reg, i);
361 FALLTHROUGH;
362 case 1:
363 src_reg = ¤t->FullInstruction.Src[0];
364 liveness_mark_read(ctx, src_reg, i);
365 FALLTHROUGH;
366 case 0:
367 break;
368 default:
369 debug_printf("Op %d has %d src regs\n", opcode, num_src);
370 break;
371 }
372 }
373 }
374
375 static int
unused_from(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int from)376 unused_from(struct i915_optimize_context *ctx,
377 struct i915_full_dst_register *dst_reg, int from)
378 {
379 int dst_reg_index = dst_reg->Register.Index;
380 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
381 return (from >= ctx->last_read[dst_reg_index]);
382 }
383
384 /* Returns a mask with the components used for a texture access instruction */
385 static unsigned
i915_tex_mask(union i915_full_token * instr)386 i915_tex_mask(union i915_full_token *instr)
387 {
388 unsigned mask;
389
390 /* Get the number of coords */
391 mask = mask_for_unswizzled(
392 i915_num_coords(instr->FullInstruction.Texture.Texture));
393
394 /* Add the W component if projective */
395 if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
396 mask |= TGSI_WRITEMASK_W;
397
398 return mask;
399 }
400
401 static bool
target_is_texture2d(uint32_t tex)402 target_is_texture2d(uint32_t tex)
403 {
404 switch (tex) {
405 case TGSI_TEXTURE_2D:
406 case TGSI_TEXTURE_RECT:
407 return true;
408 default:
409 return false;
410 }
411 }
412
413 /*
414 * Optimize away useless indirect texture reads:
415 * MOV TEMP[0].xy, IN[0].xyyy
416 * TEX TEMP[1], TEMP[0], SAMP[0], 2D
417 * into:
418 * TEX TEMP[1], IN[0], SAMP[0], 2D
419 *
420 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
421 */
422 static void
i915_fpc_optimize_mov_before_tex(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)423 i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
424 struct i915_token_list *tokens, int index)
425 {
426 union i915_full_token *current = &tokens->Tokens[index - 1];
427 union i915_full_token *next = &tokens->Tokens[index];
428
429 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
430 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
431 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
432 op_is_texture(next->FullInstruction.Instruction.Opcode) &&
433 target_is_texture2d(next->FullInstruction.Texture.Texture) &&
434 same_src_dst_reg(&next->FullInstruction.Src[0],
435 ¤t->FullInstruction.Dst[0]) &&
436 is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) &&
437 unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) {
438 memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0],
439 sizeof(struct i915_src_register));
440 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
441 }
442 }
443
444 /*
445 * Optimize away things like:
446 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
447 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
448 * into:
449 * NOP
450 * MOV OUT[0].xyw, TEMP[1].xyww
451 */
452 static void
i915_fpc_optimize_mov_after_mov(union i915_full_token * current,union i915_full_token * next)453 i915_fpc_optimize_mov_after_mov(union i915_full_token *current,
454 union i915_full_token *next)
455 {
456 struct i915_full_src_register *src_reg1, *src_reg2;
457 struct i915_full_dst_register *dst_reg1, *dst_reg2;
458 unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
459
460 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
461 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
462 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
463 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
464 current->FullInstruction.Instruction.Saturate ==
465 next->FullInstruction.Instruction.Saturate &&
466 same_dst_reg(&next->FullInstruction.Dst[0],
467 ¤t->FullInstruction.Dst[0]) &&
468 same_src_reg(&next->FullInstruction.Src[0],
469 ¤t->FullInstruction.Src[0]) &&
470 !same_src_dst_reg(¤t->FullInstruction.Src[0],
471 ¤t->FullInstruction.Dst[0])) {
472 src_reg1 = ¤t->FullInstruction.Src[0];
473 dst_reg1 = ¤t->FullInstruction.Dst[0];
474 src_reg2 = &next->FullInstruction.Src[0];
475 dst_reg2 = &next->FullInstruction.Dst[0];
476
477 /* Start with swizzles from the first mov */
478 swizzle_x = src_reg1->Register.SwizzleX;
479 swizzle_y = src_reg1->Register.SwizzleY;
480 swizzle_z = src_reg1->Register.SwizzleZ;
481 swizzle_w = src_reg1->Register.SwizzleW;
482
483 /* Pile the second mov on top */
484 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
485 swizzle_x = src_reg2->Register.SwizzleX;
486 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
487 swizzle_y = src_reg2->Register.SwizzleY;
488 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
489 swizzle_z = src_reg2->Register.SwizzleZ;
490 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
491 swizzle_w = src_reg2->Register.SwizzleW;
492
493 dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
494 src_reg2->Register.SwizzleX = swizzle_x;
495 src_reg2->Register.SwizzleY = swizzle_y;
496 src_reg2->Register.SwizzleZ = swizzle_z;
497 src_reg2->Register.SwizzleW = swizzle_w;
498
499 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
500
501 return;
502 }
503 }
504
505 /*
506 * Optimize away things like:
507 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
508 * MOV OUT[0].w, TEMP[2]
509 * into:
510 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
511 * This is useful for optimizing texenv.
512 */
513 static void
i915_fpc_optimize_mov_after_alu(union i915_full_token * current,union i915_full_token * next)514 i915_fpc_optimize_mov_after_alu(union i915_full_token *current,
515 union i915_full_token *next)
516 {
517 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
518 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
519 op_commutes(current->FullInstruction.Instruction.Opcode) &&
520 current->FullInstruction.Instruction.Saturate ==
521 next->FullInstruction.Instruction.Saturate &&
522 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
523 same_dst_reg(&next->FullInstruction.Dst[0],
524 ¤t->FullInstruction.Dst[0]) &&
525 same_src_reg(&next->FullInstruction.Src[0],
526 ¤t->FullInstruction.Src[1]) &&
527 !same_src_dst_reg(&next->FullInstruction.Src[0],
528 ¤t->FullInstruction.Dst[0]) &&
529 is_unswizzled(¤t->FullInstruction.Src[0],
530 current->FullInstruction.Dst[0].Register.WriteMask) &&
531 is_unswizzled(¤t->FullInstruction.Src[1],
532 current->FullInstruction.Dst[0].Register.WriteMask) &&
533 is_unswizzled(&next->FullInstruction.Src[0],
534 next->FullInstruction.Dst[0].Register.WriteMask)) {
535 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
536
537 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0);
538 set_neutral_element_swizzle(
539 ¤t->FullInstruction.Src[0],
540 next->FullInstruction.Dst[0].Register.WriteMask,
541 op_neutral_element(current->FullInstruction.Instruction.Opcode));
542
543 current->FullInstruction.Dst[0].Register.WriteMask =
544 current->FullInstruction.Dst[0].Register.WriteMask |
545 next->FullInstruction.Dst[0].Register.WriteMask;
546 return;
547 }
548
549 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
550 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
551 op_commutes(current->FullInstruction.Instruction.Opcode) &&
552 current->FullInstruction.Instruction.Saturate ==
553 next->FullInstruction.Instruction.Saturate &&
554 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
555 same_dst_reg(&next->FullInstruction.Dst[0],
556 ¤t->FullInstruction.Dst[0]) &&
557 same_src_reg(&next->FullInstruction.Src[0],
558 ¤t->FullInstruction.Src[0]) &&
559 !same_src_dst_reg(&next->FullInstruction.Src[0],
560 ¤t->FullInstruction.Dst[0]) &&
561 is_unswizzled(¤t->FullInstruction.Src[0],
562 current->FullInstruction.Dst[0].Register.WriteMask) &&
563 is_unswizzled(¤t->FullInstruction.Src[1],
564 current->FullInstruction.Dst[0].Register.WriteMask) &&
565 is_unswizzled(&next->FullInstruction.Src[0],
566 next->FullInstruction.Dst[0].Register.WriteMask)) {
567 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
568
569 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0);
570 set_neutral_element_swizzle(
571 ¤t->FullInstruction.Src[1],
572 next->FullInstruction.Dst[0].Register.WriteMask,
573 op_neutral_element(current->FullInstruction.Instruction.Opcode));
574
575 current->FullInstruction.Dst[0].Register.WriteMask =
576 current->FullInstruction.Dst[0].Register.WriteMask |
577 next->FullInstruction.Dst[0].Register.WriteMask;
578 return;
579 }
580 }
581
582 /*
583 * Optimize away things like:
584 * MOV TEMP[0].xyz TEMP[0].xyzx
585 * into:
586 * NOP
587 */
588 static bool
i915_fpc_useless_mov(union tgsi_full_token * tgsi_current)589 i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
590 {
591 union i915_full_token current;
592 copy_token(¤t, tgsi_current);
593 if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
594 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
595 op_has_dst(current.FullInstruction.Instruction.Opcode) &&
596 !current.FullInstruction.Instruction.Saturate &&
597 current.FullInstruction.Src[0].Register.Absolute == 0 &&
598 current.FullInstruction.Src[0].Register.Negate == 0 &&
599 is_unswizzled(¤t.FullInstruction.Src[0],
600 current.FullInstruction.Dst[0].Register.WriteMask) &&
601 same_src_dst_reg(¤t.FullInstruction.Src[0],
602 ¤t.FullInstruction.Dst[0])) {
603 return true;
604 }
605 return false;
606 }
607
608 /*
609 * Optimize away things like:
610 * *** TEMP[0], TEMP[1], TEMP[2]
611 * MOV OUT[0] TEMP[0]
612 * into:
613 * *** OUT[0], TEMP[1], TEMP[2]
614 */
615 static void
i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)616 i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
617 struct i915_token_list *tokens,
618 int index)
619 {
620 union i915_full_token *current = &tokens->Tokens[index - 1];
621 union i915_full_token *next = &tokens->Tokens[index];
622
623 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
624 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
625 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
626 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
627 op_has_dst(current->FullInstruction.Instruction.Opcode) &&
628 !next->FullInstruction.Instruction.Saturate &&
629 next->FullInstruction.Src[0].Register.Absolute == 0 &&
630 next->FullInstruction.Src[0].Register.Negate == 0 &&
631 unused_from(ctx, ¤t->FullInstruction.Dst[0], index) &&
632 current->FullInstruction.Dst[0].Register.WriteMask ==
633 TGSI_WRITEMASK_XYZW &&
634 is_unswizzled(&next->FullInstruction.Src[0],
635 next->FullInstruction.Dst[0].Register.WriteMask) &&
636 current->FullInstruction.Dst[0].Register.WriteMask ==
637 next->FullInstruction.Dst[0].Register.WriteMask &&
638 same_src_dst_reg(&next->FullInstruction.Src[0],
639 ¤t->FullInstruction.Dst[0])) {
640 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
641
642 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
643 return;
644 }
645 }
646
647 struct i915_token_list *
i915_optimize(const struct tgsi_token * tokens)648 i915_optimize(const struct tgsi_token *tokens)
649 {
650 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
651 struct tgsi_parse_context parse;
652 struct i915_optimize_context *ctx;
653 int i = 0;
654
655 ctx = malloc(sizeof(*ctx));
656
657 out_tokens->NumTokens = 0;
658
659 /* Count the tokens */
660 tgsi_parse_init(&parse, tokens);
661 while (!tgsi_parse_end_of_tokens(&parse)) {
662 tgsi_parse_token(&parse);
663 out_tokens->NumTokens++;
664 }
665 tgsi_parse_free(&parse);
666
667 /* Allocate our tokens */
668 out_tokens->Tokens =
669 MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
670
671 tgsi_parse_init(&parse, tokens);
672 while (!tgsi_parse_end_of_tokens(&parse)) {
673 tgsi_parse_token(&parse);
674
675 if (i915_fpc_useless_mov(&parse.FullToken)) {
676 out_tokens->NumTokens--;
677 continue;
678 }
679
680 copy_token(&out_tokens->Tokens[i], &parse.FullToken);
681
682 i++;
683 }
684 tgsi_parse_free(&parse);
685
686 liveness_analysis(ctx, out_tokens);
687
688 i = 1;
689 while (i < out_tokens->NumTokens) {
690 i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
691 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1],
692 &out_tokens->Tokens[i]);
693 i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1],
694 &out_tokens->Tokens[i]);
695 i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
696 i++;
697 }
698
699 free(ctx);
700
701 return out_tokens;
702 }
703
704 void
i915_optimize_free(struct i915_token_list * tokens)705 i915_optimize_free(struct i915_token_list *tokens)
706 {
707 free(tokens->Tokens);
708 free(tokens);
709 }
710