1 /**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "i915_context.h"
29 #include "i915_fpc.h"
30 #include "i915_reg.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_exec.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_string.h"
39
40 struct i915_optimize_context {
41 int first_write[TGSI_EXEC_NUM_TEMPS];
42 int last_read[TGSI_EXEC_NUM_TEMPS];
43 };
44
45 static bool
same_src_dst_reg(struct i915_full_src_register * s1,struct i915_full_dst_register * d1)46 same_src_dst_reg(struct i915_full_src_register *s1,
47 struct i915_full_dst_register *d1)
48 {
49 return (s1->Register.File == d1->Register.File &&
50 s1->Register.Indirect == d1->Register.Indirect &&
51 s1->Register.Dimension == d1->Register.Dimension &&
52 s1->Register.Index == d1->Register.Index);
53 }
54
55 static bool
same_dst_reg(struct i915_full_dst_register * d1,struct i915_full_dst_register * d2)56 same_dst_reg(struct i915_full_dst_register *d1,
57 struct i915_full_dst_register *d2)
58 {
59 return (d1->Register.File == d2->Register.File &&
60 d1->Register.Indirect == d2->Register.Indirect &&
61 d1->Register.Dimension == d2->Register.Dimension &&
62 d1->Register.Index == d2->Register.Index);
63 }
64
65 static bool
same_src_reg(struct i915_full_src_register * d1,struct i915_full_src_register * d2)66 same_src_reg(struct i915_full_src_register *d1,
67 struct i915_full_src_register *d2)
68 {
69 return (d1->Register.File == d2->Register.File &&
70 d1->Register.Indirect == d2->Register.Indirect &&
71 d1->Register.Dimension == d2->Register.Dimension &&
72 d1->Register.Index == d2->Register.Index &&
73 d1->Register.Absolute == d2->Register.Absolute &&
74 d1->Register.Negate == d2->Register.Negate);
75 }
76
77 static const struct {
78 bool is_texture;
79 bool commutes;
80 unsigned neutral_element;
81 unsigned num_dst;
82 unsigned num_src;
83 } op_table[TGSI_OPCODE_LAST] = {
84 [TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2},
85 [TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1},
86 [TGSI_OPCODE_CMP] = {false, false, 0, 1, 2},
87 [TGSI_OPCODE_COS] = {false, false, 0, 1, 1},
88 [TGSI_OPCODE_DDX] = {false, false, 0, 1, 0},
89 [TGSI_OPCODE_DDY] = {false, false, 0, 1, 0},
90 [TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
91 [TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
92 [TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
93 [TGSI_OPCODE_DST] = {false, false, 0, 1, 2},
94 [TGSI_OPCODE_END] = {false, false, 0, 0, 0},
95 [TGSI_OPCODE_EX2] = {false, false, 0, 1, 1},
96 [TGSI_OPCODE_FLR] = {false, false, 0, 1, 1},
97 [TGSI_OPCODE_FRC] = {false, false, 0, 1, 1},
98 [TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1},
99 [TGSI_OPCODE_KILL] = {false, false, 0, 0, 0},
100 [TGSI_OPCODE_LG2] = {false, false, 0, 1, 1},
101 [TGSI_OPCODE_LIT] = {false, false, 0, 1, 1},
102 [TGSI_OPCODE_LRP] = {false, false, 0, 1, 3},
103 [TGSI_OPCODE_MAX] = {false, false, 0, 1, 2},
104 [TGSI_OPCODE_MAD] = {false, false, 0, 1, 3},
105 [TGSI_OPCODE_MIN] = {false, false, 0, 1, 2},
106 [TGSI_OPCODE_MOV] = {false, false, 0, 1, 1},
107 [TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
108 [TGSI_OPCODE_NOP] = {false, false, 0, 0, 0},
109 [TGSI_OPCODE_POW] = {false, false, 0, 1, 2},
110 [TGSI_OPCODE_RCP] = {false, false, 0, 1, 1},
111 [TGSI_OPCODE_RET] = {false, false, 0, 0, 0},
112 [TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1},
113 [TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2},
114 [TGSI_OPCODE_SGE] = {false, false, 0, 1, 2},
115 [TGSI_OPCODE_SGT] = {false, false, 0, 1, 2},
116 [TGSI_OPCODE_SIN] = {false, false, 0, 1, 1},
117 [TGSI_OPCODE_SLE] = {false, false, 0, 1, 2},
118 [TGSI_OPCODE_SLT] = {false, false, 0, 1, 2},
119 [TGSI_OPCODE_SNE] = {false, false, 0, 1, 2},
120 [TGSI_OPCODE_SSG] = {false, false, 0, 1, 1},
121 [TGSI_OPCODE_TEX] = {true, false, 0, 1, 2},
122 [TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1},
123 [TGSI_OPCODE_TXB] = {true, false, 0, 1, 2},
124 [TGSI_OPCODE_TXP] = {true, false, 0, 1, 2},
125 };
126
127 static bool
op_has_dst(unsigned opcode)128 op_has_dst(unsigned opcode)
129 {
130 return (op_table[opcode].num_dst > 0);
131 }
132
133 static int
op_num_dst(unsigned opcode)134 op_num_dst(unsigned opcode)
135 {
136 return op_table[opcode].num_dst;
137 }
138
139 static int
op_num_src(unsigned opcode)140 op_num_src(unsigned opcode)
141 {
142 return op_table[opcode].num_src;
143 }
144
145 static bool
op_commutes(unsigned opcode)146 op_commutes(unsigned opcode)
147 {
148 return op_table[opcode].commutes;
149 }
150
151 static bool
is_unswizzled(struct i915_full_src_register * r,unsigned write_mask)152 is_unswizzled(struct i915_full_src_register *r, unsigned write_mask)
153 {
154 if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
155 return false;
156 if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
157 return false;
158 if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
159 return false;
160 if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
161 return false;
162 return true;
163 }
164
165 static bool
op_is_texture(unsigned opcode)166 op_is_texture(unsigned opcode)
167 {
168 return op_table[opcode].is_texture;
169 }
170
171 static unsigned
op_neutral_element(unsigned opcode)172 op_neutral_element(unsigned opcode)
173 {
174 unsigned ne = op_table[opcode].neutral_element;
175 if (!ne) {
176 debug_printf("No neutral element for opcode %d\n", opcode);
177 ne = TGSI_SWIZZLE_ZERO;
178 }
179 return ne;
180 }
181
182 /*
183 * Sets the swizzle to the neutral element for the operation for the bits
184 * of writemask which are set, swizzle to identity otherwise.
185 */
186 static void
set_neutral_element_swizzle(struct i915_full_src_register * r,unsigned write_mask,unsigned neutral)187 set_neutral_element_swizzle(struct i915_full_src_register *r,
188 unsigned write_mask, unsigned neutral)
189 {
190 if (write_mask & TGSI_WRITEMASK_X)
191 r->Register.SwizzleX = neutral;
192 else
193 r->Register.SwizzleX = TGSI_SWIZZLE_X;
194
195 if (write_mask & TGSI_WRITEMASK_Y)
196 r->Register.SwizzleY = neutral;
197 else
198 r->Register.SwizzleY = TGSI_SWIZZLE_Y;
199
200 if (write_mask & TGSI_WRITEMASK_Z)
201 r->Register.SwizzleZ = neutral;
202 else
203 r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
204
205 if (write_mask & TGSI_WRITEMASK_W)
206 r->Register.SwizzleW = neutral;
207 else
208 r->Register.SwizzleW = TGSI_SWIZZLE_W;
209 }
210
211 static void
copy_src_reg(struct i915_src_register * o,const struct tgsi_src_register * i)212 copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
213 {
214 o->File = i->File;
215 o->Indirect = i->Indirect;
216 o->Dimension = i->Dimension;
217 o->Index = i->Index;
218 o->SwizzleX = i->SwizzleX;
219 o->SwizzleY = i->SwizzleY;
220 o->SwizzleZ = i->SwizzleZ;
221 o->SwizzleW = i->SwizzleW;
222 o->Absolute = i->Absolute;
223 o->Negate = i->Negate;
224 }
225
226 static void
copy_dst_reg(struct i915_dst_register * o,const struct tgsi_dst_register * i)227 copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
228 {
229 o->File = i->File;
230 o->WriteMask = i->WriteMask;
231 o->Indirect = i->Indirect;
232 o->Dimension = i->Dimension;
233 o->Index = i->Index;
234 }
235
236 static void
copy_instruction(struct i915_full_instruction * o,const struct tgsi_full_instruction * i)237 copy_instruction(struct i915_full_instruction *o,
238 const struct tgsi_full_instruction *i)
239 {
240 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
241 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
242
243 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
244
245 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
246 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
247 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
248 }
249
250 static void
copy_token(union i915_full_token * o,union tgsi_full_token * i)251 copy_token(union i915_full_token *o, union tgsi_full_token *i)
252 {
253 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
254 memcpy(o, i, sizeof(*o));
255 else
256 copy_instruction(&o->FullInstruction, &i->FullInstruction);
257 }
258
259 static void
liveness_mark_written(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int pos)260 liveness_mark_written(struct i915_optimize_context *ctx,
261 struct i915_full_dst_register *dst_reg, int pos)
262 {
263 int dst_reg_index;
264 if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
265 dst_reg_index = dst_reg->Register.Index;
266 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
267 /* dead -> live transition */
268 if (ctx->first_write[dst_reg_index] != -1)
269 ctx->first_write[dst_reg_index] = pos;
270 }
271 }
272
273 static void
liveness_mark_read(struct i915_optimize_context * ctx,struct i915_full_src_register * src_reg,int pos)274 liveness_mark_read(struct i915_optimize_context *ctx,
275 struct i915_full_src_register *src_reg, int pos)
276 {
277 int src_reg_index;
278 if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
279 src_reg_index = src_reg->Register.Index;
280 assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
281 /* live -> dead transition */
282 if (ctx->last_read[src_reg_index] != -1)
283 ctx->last_read[src_reg_index] = pos;
284 }
285 }
286
287 static void
liveness_analysis(struct i915_optimize_context * ctx,struct i915_token_list * tokens)288 liveness_analysis(struct i915_optimize_context *ctx,
289 struct i915_token_list *tokens)
290 {
291 struct i915_full_dst_register *dst_reg;
292 struct i915_full_src_register *src_reg;
293 union i915_full_token *current;
294 unsigned opcode;
295 int num_dst, num_src;
296 int i = 0;
297
298 for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
299 ctx->first_write[i] = -1;
300 ctx->last_read[i] = -1;
301 }
302
303 for (i = 0; i < tokens->NumTokens; i++) {
304 current = &tokens->Tokens[i];
305
306 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
307 continue;
308
309 opcode = current->FullInstruction.Instruction.Opcode;
310 num_dst = op_num_dst(opcode);
311
312 switch (num_dst) {
313 case 1:
314 dst_reg = ¤t->FullInstruction.Dst[0];
315 liveness_mark_written(ctx, dst_reg, i);
316 FALLTHROUGH;
317 case 0:
318 break;
319 default:
320 debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
321 break;
322 }
323 }
324
325 for (i = tokens->NumTokens - 1; i >= 0; i--) {
326 current = &tokens->Tokens[i];
327
328 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
329 continue;
330
331 opcode = current->FullInstruction.Instruction.Opcode;
332 num_src = op_num_src(opcode);
333
334 switch (num_src) {
335 case 3:
336 src_reg = ¤t->FullInstruction.Src[2];
337 liveness_mark_read(ctx, src_reg, i);
338 FALLTHROUGH;
339 case 2:
340 src_reg = ¤t->FullInstruction.Src[1];
341 liveness_mark_read(ctx, src_reg, i);
342 FALLTHROUGH;
343 case 1:
344 src_reg = ¤t->FullInstruction.Src[0];
345 liveness_mark_read(ctx, src_reg, i);
346 FALLTHROUGH;
347 case 0:
348 break;
349 default:
350 debug_printf("Op %d has %d src regs\n", opcode, num_src);
351 break;
352 }
353 }
354 }
355
356 static int
unused_from(struct i915_optimize_context * ctx,struct i915_full_dst_register * dst_reg,int from)357 unused_from(struct i915_optimize_context *ctx,
358 struct i915_full_dst_register *dst_reg, int from)
359 {
360 int dst_reg_index = dst_reg->Register.Index;
361 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
362 return (from >= ctx->last_read[dst_reg_index]);
363 }
364
365 /* Returns a mask with the components used for a texture access instruction */
366 static unsigned
i915_tex_mask(union i915_full_token * instr)367 i915_tex_mask(union i915_full_token *instr)
368 {
369 return i915_coord_mask(instr->FullInstruction.Instruction.Opcode,
370 instr->FullInstruction.Texture.Texture);
371 }
372
373 static bool
target_is_texture2d(uint32_t tex)374 target_is_texture2d(uint32_t tex)
375 {
376 switch (tex) {
377 case TGSI_TEXTURE_2D:
378 case TGSI_TEXTURE_RECT:
379 return true;
380 default:
381 return false;
382 }
383 }
384
385 /*
386 * Optimize away useless indirect texture reads:
387 * MOV TEMP[0].xy, IN[0].xyyy
388 * TEX TEMP[1], TEMP[0], SAMP[0], 2D
389 * into:
390 * TEX TEMP[1], IN[0], SAMP[0], 2D
391 *
392 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
393 */
394 static void
i915_fpc_optimize_mov_before_tex(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)395 i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
396 struct i915_token_list *tokens, int index)
397 {
398 union i915_full_token *current = &tokens->Tokens[index - 1];
399 union i915_full_token *next = &tokens->Tokens[index];
400
401 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
402 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
403 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
404 op_is_texture(next->FullInstruction.Instruction.Opcode) &&
405 target_is_texture2d(next->FullInstruction.Texture.Texture) &&
406 same_src_dst_reg(&next->FullInstruction.Src[0],
407 ¤t->FullInstruction.Dst[0]) &&
408 is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) &&
409 unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) {
410 memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0],
411 sizeof(struct i915_src_register));
412 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
413 }
414 }
415
416 /*
417 * Optimize away things like:
418 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
419 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
420 * into:
421 * NOP
422 * MOV OUT[0].xyw, TEMP[1].xyww
423 */
424 static void
i915_fpc_optimize_mov_after_mov(union i915_full_token * current,union i915_full_token * next)425 i915_fpc_optimize_mov_after_mov(union i915_full_token *current,
426 union i915_full_token *next)
427 {
428 struct i915_full_src_register *src_reg1, *src_reg2;
429 struct i915_full_dst_register *dst_reg1, *dst_reg2;
430 unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
431
432 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
433 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
434 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
435 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
436 current->FullInstruction.Instruction.Saturate ==
437 next->FullInstruction.Instruction.Saturate &&
438 same_dst_reg(&next->FullInstruction.Dst[0],
439 ¤t->FullInstruction.Dst[0]) &&
440 same_src_reg(&next->FullInstruction.Src[0],
441 ¤t->FullInstruction.Src[0]) &&
442 !same_src_dst_reg(¤t->FullInstruction.Src[0],
443 ¤t->FullInstruction.Dst[0])) {
444 src_reg1 = ¤t->FullInstruction.Src[0];
445 dst_reg1 = ¤t->FullInstruction.Dst[0];
446 src_reg2 = &next->FullInstruction.Src[0];
447 dst_reg2 = &next->FullInstruction.Dst[0];
448
449 /* Start with swizzles from the first mov */
450 swizzle_x = src_reg1->Register.SwizzleX;
451 swizzle_y = src_reg1->Register.SwizzleY;
452 swizzle_z = src_reg1->Register.SwizzleZ;
453 swizzle_w = src_reg1->Register.SwizzleW;
454
455 /* Pile the second mov on top */
456 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
457 swizzle_x = src_reg2->Register.SwizzleX;
458 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
459 swizzle_y = src_reg2->Register.SwizzleY;
460 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
461 swizzle_z = src_reg2->Register.SwizzleZ;
462 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
463 swizzle_w = src_reg2->Register.SwizzleW;
464
465 dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
466 src_reg2->Register.SwizzleX = swizzle_x;
467 src_reg2->Register.SwizzleY = swizzle_y;
468 src_reg2->Register.SwizzleZ = swizzle_z;
469 src_reg2->Register.SwizzleW = swizzle_w;
470
471 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
472
473 return;
474 }
475 }
476
477 /*
478 * Optimize away things like:
479 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
480 * MOV OUT[0].w, TEMP[2]
481 * into:
482 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
483 * This is useful for optimizing texenv.
484 */
485 static void
i915_fpc_optimize_mov_after_alu(union i915_full_token * current,union i915_full_token * next)486 i915_fpc_optimize_mov_after_alu(union i915_full_token *current,
487 union i915_full_token *next)
488 {
489 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
490 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
491 op_commutes(current->FullInstruction.Instruction.Opcode) &&
492 current->FullInstruction.Instruction.Saturate ==
493 next->FullInstruction.Instruction.Saturate &&
494 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
495 same_dst_reg(&next->FullInstruction.Dst[0],
496 ¤t->FullInstruction.Dst[0]) &&
497 same_src_reg(&next->FullInstruction.Src[0],
498 ¤t->FullInstruction.Src[1]) &&
499 !same_src_dst_reg(&next->FullInstruction.Src[0],
500 ¤t->FullInstruction.Dst[0]) &&
501 is_unswizzled(¤t->FullInstruction.Src[0],
502 current->FullInstruction.Dst[0].Register.WriteMask) &&
503 is_unswizzled(¤t->FullInstruction.Src[1],
504 current->FullInstruction.Dst[0].Register.WriteMask) &&
505 is_unswizzled(&next->FullInstruction.Src[0],
506 next->FullInstruction.Dst[0].Register.WriteMask)) {
507 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
508
509 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0);
510 set_neutral_element_swizzle(
511 ¤t->FullInstruction.Src[0],
512 next->FullInstruction.Dst[0].Register.WriteMask,
513 op_neutral_element(current->FullInstruction.Instruction.Opcode));
514
515 current->FullInstruction.Dst[0].Register.WriteMask =
516 current->FullInstruction.Dst[0].Register.WriteMask |
517 next->FullInstruction.Dst[0].Register.WriteMask;
518 return;
519 }
520
521 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
522 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
523 op_commutes(current->FullInstruction.Instruction.Opcode) &&
524 current->FullInstruction.Instruction.Saturate ==
525 next->FullInstruction.Instruction.Saturate &&
526 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
527 same_dst_reg(&next->FullInstruction.Dst[0],
528 ¤t->FullInstruction.Dst[0]) &&
529 same_src_reg(&next->FullInstruction.Src[0],
530 ¤t->FullInstruction.Src[0]) &&
531 !same_src_dst_reg(&next->FullInstruction.Src[0],
532 ¤t->FullInstruction.Dst[0]) &&
533 is_unswizzled(¤t->FullInstruction.Src[0],
534 current->FullInstruction.Dst[0].Register.WriteMask) &&
535 is_unswizzled(¤t->FullInstruction.Src[1],
536 current->FullInstruction.Dst[0].Register.WriteMask) &&
537 is_unswizzled(&next->FullInstruction.Src[0],
538 next->FullInstruction.Dst[0].Register.WriteMask)) {
539 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
540
541 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0);
542 set_neutral_element_swizzle(
543 ¤t->FullInstruction.Src[1],
544 next->FullInstruction.Dst[0].Register.WriteMask,
545 op_neutral_element(current->FullInstruction.Instruction.Opcode));
546
547 current->FullInstruction.Dst[0].Register.WriteMask =
548 current->FullInstruction.Dst[0].Register.WriteMask |
549 next->FullInstruction.Dst[0].Register.WriteMask;
550 return;
551 }
552 }
553
554 /*
555 * Optimize away things like:
556 * MOV TEMP[0].xyz TEMP[0].xyzx
557 * into:
558 * NOP
559 */
560 static bool
i915_fpc_useless_mov(union tgsi_full_token * tgsi_current)561 i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
562 {
563 union i915_full_token current;
564 copy_token(¤t, tgsi_current);
565 if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
566 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
567 op_has_dst(current.FullInstruction.Instruction.Opcode) &&
568 !current.FullInstruction.Instruction.Saturate &&
569 current.FullInstruction.Src[0].Register.Absolute == 0 &&
570 current.FullInstruction.Src[0].Register.Negate == 0 &&
571 is_unswizzled(¤t.FullInstruction.Src[0],
572 current.FullInstruction.Dst[0].Register.WriteMask) &&
573 same_src_dst_reg(¤t.FullInstruction.Src[0],
574 ¤t.FullInstruction.Dst[0])) {
575 return true;
576 }
577 return false;
578 }
579
580 /*
581 * Optimize away things like:
582 * *** TEMP[0], TEMP[1], TEMP[2]
583 * MOV OUT[0] TEMP[0]
584 * into:
585 * *** OUT[0], TEMP[1], TEMP[2]
586 */
587 static void
i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context * ctx,struct i915_token_list * tokens,int index)588 i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
589 struct i915_token_list *tokens,
590 int index)
591 {
592 union i915_full_token *current = &tokens->Tokens[index - 1];
593 union i915_full_token *next = &tokens->Tokens[index];
594
595 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
596 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
597 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
598 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
599 op_has_dst(current->FullInstruction.Instruction.Opcode) &&
600 !next->FullInstruction.Instruction.Saturate &&
601 next->FullInstruction.Src[0].Register.Absolute == 0 &&
602 next->FullInstruction.Src[0].Register.Negate == 0 &&
603 unused_from(ctx, ¤t->FullInstruction.Dst[0], index) &&
604 current->FullInstruction.Dst[0].Register.WriteMask ==
605 TGSI_WRITEMASK_XYZW &&
606 is_unswizzled(&next->FullInstruction.Src[0],
607 next->FullInstruction.Dst[0].Register.WriteMask) &&
608 current->FullInstruction.Dst[0].Register.WriteMask ==
609 next->FullInstruction.Dst[0].Register.WriteMask &&
610 same_src_dst_reg(&next->FullInstruction.Src[0],
611 ¤t->FullInstruction.Dst[0])) {
612 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
613
614 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
615 return;
616 }
617 }
618
619 struct i915_token_list *
i915_optimize(const struct tgsi_token * tokens)620 i915_optimize(const struct tgsi_token *tokens)
621 {
622 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
623 struct tgsi_parse_context parse;
624 struct i915_optimize_context *ctx;
625 int i = 0;
626
627 ctx = malloc(sizeof(*ctx));
628
629 out_tokens->NumTokens = 0;
630
631 /* Count the tokens */
632 tgsi_parse_init(&parse, tokens);
633 while (!tgsi_parse_end_of_tokens(&parse)) {
634 tgsi_parse_token(&parse);
635 out_tokens->NumTokens++;
636 }
637 tgsi_parse_free(&parse);
638
639 /* Allocate our tokens */
640 out_tokens->Tokens =
641 MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
642
643 tgsi_parse_init(&parse, tokens);
644 while (!tgsi_parse_end_of_tokens(&parse)) {
645 tgsi_parse_token(&parse);
646
647 if (i915_fpc_useless_mov(&parse.FullToken)) {
648 out_tokens->NumTokens--;
649 continue;
650 }
651
652 copy_token(&out_tokens->Tokens[i], &parse.FullToken);
653
654 i++;
655 }
656 tgsi_parse_free(&parse);
657
658 liveness_analysis(ctx, out_tokens);
659
660 i = 1;
661 while (i < out_tokens->NumTokens) {
662 i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
663 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1],
664 &out_tokens->Tokens[i]);
665 i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1],
666 &out_tokens->Tokens[i]);
667 i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
668 i++;
669 }
670
671 free(ctx);
672
673 return out_tokens;
674 }
675
676 void
i915_optimize_free(struct i915_token_list * tokens)677 i915_optimize_free(struct i915_token_list *tokens)
678 {
679 free(tokens->Tokens);
680 free(tokens);
681 }
682