1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_list.h"
34 #include "radeon_swizzle.h"
35 #include "radeon_variable.h"
36
37 struct src_clobbered_reads_cb_data {
38 rc_register_file File;
39 unsigned int Index;
40 unsigned int Mask;
41 struct rc_reader_data * ReaderData;
42 };
43
44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45 struct rc_instruction *,
46 unsigned int);
47
chain_srcregs(struct rc_src_register outer,struct rc_src_register inner)48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49 {
50 struct rc_src_register combine;
51 combine.File = inner.File;
52 combine.Index = inner.Index;
53 combine.RelAddr = inner.RelAddr;
54 if (outer.Abs) {
55 combine.Abs = 1;
56 combine.Negate = outer.Negate;
57 } else {
58 combine.Abs = inner.Abs;
59 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60 combine.Negate ^= outer.Negate;
61 }
62 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63 return combine;
64 }
65
copy_propagate_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67 struct rc_src_register * src)
68 {
69 rc_register_file file = src->File;
70 struct rc_reader_data * reader_data = data;
71
72 if(!rc_inst_can_use_presub(inst,
73 reader_data->Writer->U.I.PreSub.Opcode,
74 rc_swizzle_to_writemask(src->Swizzle),
75 src,
76 &reader_data->Writer->U.I.PreSub.SrcReg[0],
77 &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78 reader_data->Abort = 1;
79 return;
80 }
81
82 /* XXX This could probably be handled better. */
83 if (file == RC_FILE_ADDRESS) {
84 reader_data->Abort = 1;
85 return;
86 }
87
88 /* These instructions cannot read from the constants file.
89 * see radeonTransformTEX()
90 */
91 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93 (inst->U.I.Opcode == RC_OPCODE_TEX ||
94 inst->U.I.Opcode == RC_OPCODE_TXB ||
95 inst->U.I.Opcode == RC_OPCODE_TXP ||
96 inst->U.I.Opcode == RC_OPCODE_TXD ||
97 inst->U.I.Opcode == RC_OPCODE_TXL ||
98 inst->U.I.Opcode == RC_OPCODE_KIL)){
99 reader_data->Abort = 1;
100 return;
101 }
102 }
103
src_clobbered_reads_cb(void * data,struct rc_instruction * inst,struct rc_src_register * src)104 static void src_clobbered_reads_cb(
105 void * data,
106 struct rc_instruction * inst,
107 struct rc_src_register * src)
108 {
109 struct src_clobbered_reads_cb_data * sc_data = data;
110
111 if (src->File == sc_data->File
112 && src->Index == sc_data->Index
113 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114
115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116 }
117
118 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120 }
121 }
122
is_src_clobbered_scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)123 static void is_src_clobbered_scan_write(
124 void * data,
125 struct rc_instruction * inst,
126 rc_register_file file,
127 unsigned int index,
128 unsigned int mask)
129 {
130 struct src_clobbered_reads_cb_data sc_data;
131 struct rc_reader_data * reader_data = data;
132 sc_data.File = file;
133 sc_data.Index = index;
134 sc_data.Mask = mask;
135 sc_data.ReaderData = reader_data;
136 rc_for_all_reads_src(reader_data->Writer,
137 src_clobbered_reads_cb, &sc_data);
138 }
139
copy_propagate(struct radeon_compiler * c,struct rc_instruction * inst_mov)140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141 {
142 struct rc_reader_data reader_data;
143 unsigned int i;
144
145 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146 inst_mov->U.I.WriteALUResult)
147 return;
148
149 /* Get a list of all the readers of this MOV instruction. */
150 reader_data.ExitOnAbort = 1;
151 rc_get_readers(c, inst_mov, &reader_data,
152 copy_propagate_scan_read, NULL,
153 is_src_clobbered_scan_write);
154
155 if (reader_data.Abort || reader_data.ReaderCount == 0)
156 return;
157
158 /* We can propagate SaturateMode if all the readers are MOV instructions
159 * without a presubtract operation, source negation and absolute.
160 * In that case, we just move SaturateMode to all readers. */
161 if (inst_mov->U.I.SaturateMode) {
162 for (i = 0; i < reader_data.ReaderCount; i++) {
163 struct rc_instruction * inst = reader_data.Readers[i].Inst;
164
165 if (inst->U.I.Opcode != RC_OPCODE_MOV ||
166 inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
167 inst->U.I.SrcReg[0].Abs ||
168 inst->U.I.SrcReg[0].Negate) {
169 return;
170 }
171 }
172 }
173
174 /* Propagate the MOV instruction. */
175 for (i = 0; i < reader_data.ReaderCount; i++) {
176 struct rc_instruction * inst = reader_data.Readers[i].Inst;
177 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
178
179 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
180 inst->U.I.PreSub = inst_mov->U.I.PreSub;
181 if (!inst->U.I.SaturateMode)
182 inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
183 }
184
185 /* Finally, remove the original MOV instruction */
186 rc_remove_instruction(inst_mov);
187 }
188
189 /**
190 * Check if a source register is actually always the same
191 * swizzle constant.
192 */
is_src_uniform_constant(struct rc_src_register src,rc_swizzle * pswz,unsigned int * pnegate)193 static int is_src_uniform_constant(struct rc_src_register src,
194 rc_swizzle * pswz, unsigned int * pnegate)
195 {
196 int have_used = 0;
197
198 if (src.File != RC_FILE_NONE) {
199 *pswz = 0;
200 return 0;
201 }
202
203 for(unsigned int chan = 0; chan < 4; ++chan) {
204 unsigned int swz = GET_SWZ(src.Swizzle, chan);
205 if (swz < 4) {
206 *pswz = 0;
207 return 0;
208 }
209 if (swz == RC_SWIZZLE_UNUSED)
210 continue;
211
212 if (!have_used) {
213 *pswz = swz;
214 *pnegate = GET_BIT(src.Negate, chan);
215 have_used = 1;
216 } else {
217 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
218 *pswz = 0;
219 return 0;
220 }
221 }
222 }
223
224 return 1;
225 }
226
constant_folding_mad(struct rc_instruction * inst)227 static void constant_folding_mad(struct rc_instruction * inst)
228 {
229 rc_swizzle swz = 0;
230 unsigned int negate= 0;
231
232 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
233 if (swz == RC_SWIZZLE_ZERO) {
234 inst->U.I.Opcode = RC_OPCODE_MUL;
235 return;
236 }
237 }
238
239 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
240 if (swz == RC_SWIZZLE_ONE) {
241 inst->U.I.Opcode = RC_OPCODE_ADD;
242 if (negate)
243 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
244 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
245 return;
246 } else if (swz == RC_SWIZZLE_ZERO) {
247 inst->U.I.Opcode = RC_OPCODE_MOV;
248 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
249 return;
250 }
251 }
252
253 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
254 if (swz == RC_SWIZZLE_ONE) {
255 inst->U.I.Opcode = RC_OPCODE_ADD;
256 if (negate)
257 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
258 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
259 return;
260 } else if (swz == RC_SWIZZLE_ZERO) {
261 inst->U.I.Opcode = RC_OPCODE_MOV;
262 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
263 return;
264 }
265 }
266 }
267
constant_folding_mul(struct rc_instruction * inst)268 static void constant_folding_mul(struct rc_instruction * inst)
269 {
270 rc_swizzle swz = 0;
271 unsigned int negate = 0;
272
273 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
274 if (swz == RC_SWIZZLE_ONE) {
275 inst->U.I.Opcode = RC_OPCODE_MOV;
276 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
277 if (negate)
278 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
279 return;
280 } else if (swz == RC_SWIZZLE_ZERO) {
281 inst->U.I.Opcode = RC_OPCODE_MOV;
282 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
283 return;
284 }
285 }
286
287 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
288 if (swz == RC_SWIZZLE_ONE) {
289 inst->U.I.Opcode = RC_OPCODE_MOV;
290 if (negate)
291 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
292 return;
293 } else if (swz == RC_SWIZZLE_ZERO) {
294 inst->U.I.Opcode = RC_OPCODE_MOV;
295 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
296 return;
297 }
298 }
299 }
300
constant_folding_add(struct rc_instruction * inst)301 static void constant_folding_add(struct rc_instruction * inst)
302 {
303 rc_swizzle swz = 0;
304 unsigned int negate = 0;
305
306 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
307 if (swz == RC_SWIZZLE_ZERO) {
308 inst->U.I.Opcode = RC_OPCODE_MOV;
309 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
310 return;
311 }
312 }
313
314 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
315 if (swz == RC_SWIZZLE_ZERO) {
316 inst->U.I.Opcode = RC_OPCODE_MOV;
317 return;
318 }
319 }
320 }
321
322 /**
323 * Replace 0.0, 1.0 and 0.5 immediate constants by their
324 * respective swizzles. Simplify instructions like ADD dst, src, 0;
325 */
constant_folding(struct radeon_compiler * c,struct rc_instruction * inst)326 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
327 {
328 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
329 unsigned int i;
330
331 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
332 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
333 struct rc_constant * constant;
334 struct rc_src_register newsrc;
335 int have_real_reference;
336 unsigned int chan;
337
338 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
339 for (chan = 0; chan < 4; ++chan)
340 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
341 break;
342 if (chan == 4) {
343 inst->U.I.SrcReg[src].File = RC_FILE_NONE;
344 continue;
345 }
346
347 /* Convert immediates to swizzles. */
348 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
349 inst->U.I.SrcReg[src].RelAddr ||
350 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
351 continue;
352
353 constant =
354 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
355
356 if (constant->Type != RC_CONSTANT_IMMEDIATE)
357 continue;
358
359 newsrc = inst->U.I.SrcReg[src];
360 have_real_reference = 0;
361 for (chan = 0; chan < 4; ++chan) {
362 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
363 unsigned int newswz;
364 float imm;
365 float baseimm;
366
367 if (swz >= 4)
368 continue;
369
370 imm = constant->u.Immediate[swz];
371 baseimm = imm;
372 if (imm < 0.0)
373 baseimm = -baseimm;
374
375 if (baseimm == 0.0) {
376 newswz = RC_SWIZZLE_ZERO;
377 } else if (baseimm == 1.0) {
378 newswz = RC_SWIZZLE_ONE;
379 } else if (baseimm == 0.5 && c->has_half_swizzles) {
380 newswz = RC_SWIZZLE_HALF;
381 } else {
382 have_real_reference = 1;
383 continue;
384 }
385
386 SET_SWZ(newsrc.Swizzle, chan, newswz);
387 if (imm < 0.0 && !newsrc.Abs)
388 newsrc.Negate ^= 1 << chan;
389 }
390
391 if (!have_real_reference) {
392 newsrc.File = RC_FILE_NONE;
393 newsrc.Index = 0;
394 }
395
396 /* don't make the swizzle worse */
397 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
398 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
399 continue;
400
401 inst->U.I.SrcReg[src] = newsrc;
402 }
403
404 /* Simplify instructions based on constants */
405 if (inst->U.I.Opcode == RC_OPCODE_MAD)
406 constant_folding_mad(inst);
407
408 /* note: MAD can simplify to MUL or ADD */
409 if (inst->U.I.Opcode == RC_OPCODE_MUL)
410 constant_folding_mul(inst);
411 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
412 constant_folding_add(inst);
413
414 /* In case this instruction has been converted, make sure all of the
415 * registers that are no longer used are empty. */
416 opcode = rc_get_opcode_info(inst->U.I.Opcode);
417 for(i = opcode->NumSrcRegs; i < 3; i++) {
418 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
419 }
420 }
421
422 /**
423 * If src and dst use the same register, this function returns a writemask that
424 * indicates which components are read by src. Otherwise zero is returned.
425 */
src_reads_dst_mask(struct rc_src_register src,struct rc_dst_register dst)426 static unsigned int src_reads_dst_mask(struct rc_src_register src,
427 struct rc_dst_register dst)
428 {
429 if (dst.File != src.File || dst.Index != src.Index) {
430 return 0;
431 }
432 return rc_swizzle_to_writemask(src.Swizzle);
433 }
434
435 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
436 * in any of its channels. Return 0 otherwise. */
src_has_const_swz(struct rc_src_register src)437 static int src_has_const_swz(struct rc_src_register src) {
438 int chan;
439 for(chan = 0; chan < 4; chan++) {
440 unsigned int swz = GET_SWZ(src.Swizzle, chan);
441 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
442 || swz == RC_SWIZZLE_ONE) {
443 return 1;
444 }
445 }
446 return 0;
447 }
448
presub_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)449 static void presub_scan_read(
450 void * data,
451 struct rc_instruction * inst,
452 struct rc_src_register * src)
453 {
454 struct rc_reader_data * reader_data = data;
455 rc_presubtract_op * presub_opcode = reader_data->CbData;
456
457 if (!rc_inst_can_use_presub(inst, *presub_opcode,
458 reader_data->Writer->U.I.DstReg.WriteMask,
459 src,
460 &reader_data->Writer->U.I.SrcReg[0],
461 &reader_data->Writer->U.I.SrcReg[1])) {
462 reader_data->Abort = 1;
463 return;
464 }
465 }
466
presub_helper(struct radeon_compiler * c,struct rc_instruction * inst_add,rc_presubtract_op presub_opcode,rc_presub_replace_fn presub_replace)467 static int presub_helper(
468 struct radeon_compiler * c,
469 struct rc_instruction * inst_add,
470 rc_presubtract_op presub_opcode,
471 rc_presub_replace_fn presub_replace)
472 {
473 struct rc_reader_data reader_data;
474 unsigned int i;
475 rc_presubtract_op cb_op = presub_opcode;
476
477 reader_data.CbData = &cb_op;
478 reader_data.ExitOnAbort = 1;
479 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
480 is_src_clobbered_scan_write);
481
482 if (reader_data.Abort || reader_data.ReaderCount == 0)
483 return 0;
484
485 for(i = 0; i < reader_data.ReaderCount; i++) {
486 unsigned int src_index;
487 struct rc_reader reader = reader_data.Readers[i];
488 const struct rc_opcode_info * info =
489 rc_get_opcode_info(reader.Inst->U.I.Opcode);
490
491 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
492 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
493 presub_replace(inst_add, reader.Inst, src_index);
494 }
495 }
496 return 1;
497 }
498
499 /* This function assumes that inst_add->U.I.SrcReg[0] and
500 * inst_add->U.I.SrcReg[1] aren't both negative. */
presub_replace_add(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)501 static void presub_replace_add(
502 struct rc_instruction * inst_add,
503 struct rc_instruction * inst_reader,
504 unsigned int src_index)
505 {
506 rc_presubtract_op presub_opcode;
507 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
508 presub_opcode = RC_PRESUB_SUB;
509 else
510 presub_opcode = RC_PRESUB_ADD;
511
512 if (inst_add->U.I.SrcReg[1].Negate) {
513 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
514 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
515 } else {
516 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
517 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
518 }
519 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
520 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
521 inst_reader->U.I.PreSub.Opcode = presub_opcode;
522 inst_reader->U.I.SrcReg[src_index] =
523 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
524 inst_reader->U.I.PreSub.SrcReg[0]);
525 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
526 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
527 }
528
is_presub_candidate(struct radeon_compiler * c,struct rc_instruction * inst)529 static int is_presub_candidate(
530 struct radeon_compiler * c,
531 struct rc_instruction * inst)
532 {
533 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
534 unsigned int i;
535 unsigned int is_constant[2] = {0, 0};
536
537 assert(inst->U.I.Opcode == RC_OPCODE_ADD);
538
539 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
540 || inst->U.I.SaturateMode
541 || inst->U.I.WriteALUResult
542 || inst->U.I.Omod) {
543 return 0;
544 }
545
546 /* If both sources use a constant swizzle, then we can't convert it to
547 * a presubtract operation. In fact for the ADD and SUB presubtract
548 * operations neither source can contain a constant swizzle. This
549 * specific case is checked in peephole_add_presub_add() when
550 * we make sure the swizzles for both sources are equal, so we
551 * don't need to worry about it here. */
552 for (i = 0; i < 2; i++) {
553 int chan;
554 for (chan = 0; chan < 4; chan++) {
555 rc_swizzle swz =
556 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
557 if (swz == RC_SWIZZLE_ONE
558 || swz == RC_SWIZZLE_ZERO
559 || swz == RC_SWIZZLE_HALF) {
560 is_constant[i] = 1;
561 }
562 }
563 }
564 if (is_constant[0] && is_constant[1])
565 return 0;
566
567 for(i = 0; i < info->NumSrcRegs; i++) {
568 struct rc_src_register src = inst->U.I.SrcReg[i];
569 if (src_reads_dst_mask(src, inst->U.I.DstReg))
570 return 0;
571
572 src.File = RC_FILE_PRESUB;
573 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
574 return 0;
575 }
576 return 1;
577 }
578
peephole_add_presub_add(struct radeon_compiler * c,struct rc_instruction * inst_add)579 static int peephole_add_presub_add(
580 struct radeon_compiler * c,
581 struct rc_instruction * inst_add)
582 {
583 unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
584 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
585 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
586
587 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
588 return 0;
589
590 /* src0 and src1 can't have absolute values */
591 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
592 return 0;
593
594 /* presub_replace_add() assumes only one is negative */
595 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
596 return 0;
597
598 /* if src0 is negative, at least all bits of dstmask have to be set */
599 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
600 return 0;
601
602 /* if src1 is negative, at least all bits of dstmask have to be set */
603 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
604 return 0;
605
606 if (!is_presub_candidate(c, inst_add))
607 return 0;
608
609 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
610 rc_remove_instruction(inst_add);
611 return 1;
612 }
613 return 0;
614 }
615
presub_replace_inv(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)616 static void presub_replace_inv(
617 struct rc_instruction * inst_add,
618 struct rc_instruction * inst_reader,
619 unsigned int src_index)
620 {
621 /* We must be careful not to modify inst_add, since it
622 * is possible it will remain part of the program.*/
623 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
624 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
625 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
626 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
627 inst_reader->U.I.PreSub.SrcReg[0]);
628
629 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
630 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
631 }
632
633 /**
634 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
635 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
636 * of the add instruction must have the constatnt 1 swizzle. This function
637 * does not check const registers to see if their value is 1.0, so it should
638 * be called after the constant_folding optimization.
639 * @return
640 * 0 if the ADD instruction is still part of the program.
641 * 1 if the ADD instruction is no longer part of the program.
642 */
peephole_add_presub_inv(struct radeon_compiler * c,struct rc_instruction * inst_add)643 static int peephole_add_presub_inv(
644 struct radeon_compiler * c,
645 struct rc_instruction * inst_add)
646 {
647 unsigned int i, swz;
648
649 if (!is_presub_candidate(c, inst_add))
650 return 0;
651
652 /* Check if src0 is 1. */
653 /* XXX It would be nice to use is_src_uniform_constant here, but that
654 * function only works if the register's file is RC_FILE_NONE */
655 for(i = 0; i < 4; i++ ) {
656 if (!(inst_add->U.I.DstReg.WriteMask & (1 << i)))
657 continue;
658
659 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
660 if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i))
661 return 0;
662 }
663
664 /* Check src1. */
665 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
666 inst_add->U.I.DstReg.WriteMask
667 || inst_add->U.I.SrcReg[1].Abs
668 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
669 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
670 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
671
672 return 0;
673 }
674
675 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
676 rc_remove_instruction(inst_add);
677 return 1;
678 }
679 return 0;
680 }
681
682 struct peephole_mul_cb_data {
683 struct rc_dst_register * Writer;
684 unsigned int Clobbered;
685 };
686
omod_filter_reader_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)687 static void omod_filter_reader_cb(
688 void * userdata,
689 struct rc_instruction * inst,
690 rc_register_file file,
691 unsigned int index,
692 unsigned int mask)
693 {
694 struct peephole_mul_cb_data * d = userdata;
695 if (rc_src_reads_dst_mask(file, mask, index,
696 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
697
698 d->Clobbered = 1;
699 }
700 }
701
omod_filter_writer_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)702 static void omod_filter_writer_cb(
703 void * userdata,
704 struct rc_instruction * inst,
705 rc_register_file file,
706 unsigned int index,
707 unsigned int mask)
708 {
709 struct peephole_mul_cb_data * d = userdata;
710 if (file == d->Writer->File && index == d->Writer->Index &&
711 (mask & d->Writer->WriteMask)) {
712 d->Clobbered = 1;
713 }
714 }
715
peephole_mul_omod(struct radeon_compiler * c,struct rc_instruction * inst_mul,struct rc_list * var_list)716 static int peephole_mul_omod(
717 struct radeon_compiler * c,
718 struct rc_instruction * inst_mul,
719 struct rc_list * var_list)
720 {
721 unsigned int chan = 0, swz, i;
722 int const_index = -1;
723 int temp_index = -1;
724 float const_value;
725 rc_omod_op omod_op = RC_OMOD_DISABLE;
726 struct rc_list * writer_list;
727 struct rc_variable * var;
728 struct peephole_mul_cb_data cb_data;
729 unsigned writemask_sum;
730
731 for (i = 0; i < 2; i++) {
732 unsigned int j;
733 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
734 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
735 return 0;
736 }
737 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
738 if (temp_index != -1) {
739 /* The instruction has two temp sources */
740 return 0;
741 } else {
742 temp_index = i;
743 continue;
744 }
745 }
746 /* If we get this far Src[i] must be a constant src */
747 if (inst_mul->U.I.SrcReg[i].Negate) {
748 return 0;
749 }
750 /* The constant src needs to read from the same swizzle */
751 swz = RC_SWIZZLE_UNUSED;
752 chan = 0;
753 for (j = 0; j < 4; j++) {
754 unsigned int j_swz =
755 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
756 if (j_swz == RC_SWIZZLE_UNUSED) {
757 continue;
758 }
759 if (swz == RC_SWIZZLE_UNUSED) {
760 swz = j_swz;
761 chan = j;
762 } else if (j_swz != swz) {
763 return 0;
764 }
765 }
766
767 if (const_index != -1) {
768 /* The instruction has two constant sources */
769 return 0;
770 } else {
771 const_index = i;
772 }
773 }
774
775 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
776 inst_mul->U.I.SrcReg[const_index].Index)) {
777 return 0;
778 }
779 const_value = rc_get_constant_value(c,
780 inst_mul->U.I.SrcReg[const_index].Index,
781 inst_mul->U.I.SrcReg[const_index].Swizzle,
782 inst_mul->U.I.SrcReg[const_index].Negate,
783 chan);
784
785 if (const_value == 2.0f) {
786 omod_op = RC_OMOD_MUL_2;
787 } else if (const_value == 4.0f) {
788 omod_op = RC_OMOD_MUL_4;
789 } else if (const_value == 8.0f) {
790 omod_op = RC_OMOD_MUL_8;
791 } else if (const_value == (1.0f / 2.0f)) {
792 omod_op = RC_OMOD_DIV_2;
793 } else if (const_value == (1.0f / 4.0f)) {
794 omod_op = RC_OMOD_DIV_4;
795 } else if (const_value == (1.0f / 8.0f)) {
796 omod_op = RC_OMOD_DIV_8;
797 } else {
798 return 0;
799 }
800
801 writer_list = rc_variable_list_get_writers_one_reader(var_list,
802 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
803
804 if (!writer_list) {
805 return 0;
806 }
807
808 cb_data.Clobbered = 0;
809 cb_data.Writer = &inst_mul->U.I.DstReg;
810 for (var = writer_list->Item; var; var = var->Friend) {
811 struct rc_instruction * inst;
812 const struct rc_opcode_info * info = rc_get_opcode_info(
813 var->Inst->U.I.Opcode);
814 if (info->HasTexture) {
815 return 0;
816 }
817 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
818 return 0;
819 }
820 for (inst = inst_mul->Prev; inst != var->Inst;
821 inst = inst->Prev) {
822 rc_for_all_reads_mask(inst, omod_filter_reader_cb,
823 &cb_data);
824 rc_for_all_writes_mask(inst, omod_filter_writer_cb,
825 &cb_data);
826 if (cb_data.Clobbered) {
827 break;
828 }
829 }
830 }
831
832 if (cb_data.Clobbered) {
833 return 0;
834 }
835
836 /* Rewrite the instructions */
837 writemask_sum = rc_variable_writemask_sum(writer_list->Item);
838 for (var = writer_list->Item; var; var = var->Friend) {
839 struct rc_variable * writer = var;
840 unsigned conversion_swizzle = rc_make_conversion_swizzle(
841 writemask_sum,
842 inst_mul->U.I.DstReg.WriteMask);
843 writer->Inst->U.I.Omod = omod_op;
844 writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
845 writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
846 rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
847 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
848 }
849
850 rc_remove_instruction(inst_mul);
851
852 return 1;
853 }
854
855 /**
856 * @return
857 * 0 if inst is still part of the program.
858 * 1 if inst is no longer part of the program.
859 */
peephole(struct radeon_compiler * c,struct rc_instruction * inst)860 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
861 {
862 switch(inst->U.I.Opcode){
863 case RC_OPCODE_ADD:
864 if (c->has_presub) {
865 if(peephole_add_presub_inv(c, inst))
866 return 1;
867 if(peephole_add_presub_add(c, inst))
868 return 1;
869 }
870 break;
871 default:
872 break;
873 }
874 return 0;
875 }
876
rc_optimize(struct radeon_compiler * c,void * user)877 void rc_optimize(struct radeon_compiler * c, void *user)
878 {
879 struct rc_instruction * inst = c->Program.Instructions.Next;
880 struct rc_list * var_list;
881 while(inst != &c->Program.Instructions) {
882 struct rc_instruction * cur = inst;
883 inst = inst->Next;
884
885 constant_folding(c, cur);
886
887 if(peephole(c, cur))
888 continue;
889
890 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
891 copy_propagate(c, cur);
892 /* cur may no longer be part of the program */
893 }
894 }
895
896 if (!c->has_omod) {
897 return;
898 }
899
900 inst = c->Program.Instructions.Next;
901 while(inst != &c->Program.Instructions) {
902 struct rc_instruction * cur = inst;
903 inst = inst->Next;
904 if (cur->U.I.Opcode == RC_OPCODE_MUL) {
905 var_list = rc_get_variables(c);
906 peephole_mul_omod(c, cur, var_list);
907 }
908 }
909 }
910