1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_program_pair.h"
7
8 #include <stdio.h>
9
10 #include "radeon_compiler.h"
11 #include "radeon_compiler_util.h"
12 #include "radeon_dataflow.h"
13 #include "radeon_list.h"
14 #include "radeon_variable.h"
15
16 #include "util/u_debug.h"
17
18 #define VERBOSE 0
19
20 #define DBG(...) \
21 do { \
22 if (VERBOSE) \
23 fprintf(stderr, __VA_ARGS__); \
24 } while (0)
25
26 struct schedule_instruction {
27 struct rc_instruction *Instruction;
28
29 /** Next instruction in the linked list of ready instructions. */
30 struct schedule_instruction *NextReady;
31
32 /** Values that this instruction reads and writes */
33 struct reg_value *WriteValues[4];
34 struct reg_value *ReadValues[12];
35 unsigned int NumWriteValues : 3;
36 unsigned int NumReadValues : 4;
37
38 /**
39 * Number of (read and write) dependencies that must be resolved before
40 * this instruction can be scheduled.
41 */
42 unsigned int NumDependencies : 5;
43
44 /** List of all readers (see rc_get_readers() for the definition of
45 * "all readers"), even those outside the basic block this instruction
46 * lives in. */
47 struct rc_reader_data GlobalReaders;
48
49 /** If the scheduler has paired an RGB and an Alpha instruction together,
50 * PairedInst references the alpha instruction's dependency information.
51 */
52 struct schedule_instruction *PairedInst;
53
54 /** This scheduler uses the value of Score to determine which
55 * instruction to schedule. Instructions with a higher value of Score
56 * will be scheduled first. */
57 int Score;
58
59 /** The number of components that read from a TEX instruction. */
60 unsigned TexReadCount;
61
62 /** For TEX instructions a list of readers */
63 struct rc_list *TexReaders;
64 };
65
66 /**
67 * Used to keep track of which instructions read a value.
68 */
69 struct reg_value_reader {
70 struct schedule_instruction *Reader;
71 struct reg_value_reader *Next;
72 };
73
74 /**
75 * Used to keep track which values are stored in each component of a
76 * RC_FILE_TEMPORARY.
77 */
78 struct reg_value {
79 struct schedule_instruction *Writer;
80
81 /**
82 * Unordered linked list of instructions that read from this value.
83 * When this value becomes available, we increase all readers'
84 * dependency count.
85 */
86 struct reg_value_reader *Readers;
87
88 /**
89 * Number of readers of this value. This is decremented each time
90 * a reader of the value is committed.
91 * When the reader count reaches zero, the dependency count
92 * of the instruction writing \ref Next is decremented.
93 */
94 unsigned int NumReaders;
95
96 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
97 };
98
99 struct register_state {
100 struct reg_value *Values[4];
101 };
102
103 struct remap_reg {
104 struct rc_instruction *Inst;
105 unsigned int OldIndex : (RC_REGISTER_INDEX_BITS + 1);
106 unsigned int OldSwizzle : 3;
107 unsigned int NewIndex : (RC_REGISTER_INDEX_BITS + 1);
108 unsigned int NewSwizzle : 3;
109 unsigned int OnlyTexReads : 1;
110 struct remap_reg *Next;
111 };
112
113 struct schedule_state {
114 struct radeon_compiler *C;
115 struct schedule_instruction *Current;
116 /** Array of the previous writers of Current's destination register
117 * indexed by channel. */
118 struct schedule_instruction *PrevWriter[4];
119
120 struct register_state Temporary[RC_REGISTER_MAX_INDEX];
121
122 /**
123 * Linked lists of instructions that can be scheduled right now,
124 * based on which ALU/TEX resources they require.
125 */
126 /*@{*/
127 struct schedule_instruction *ReadyFullALU;
128 struct schedule_instruction *ReadyRGB;
129 struct schedule_instruction *ReadyAlpha;
130 struct schedule_instruction *ReadyTEX;
131 /*@}*/
132 struct rc_list *PendingTEX;
133
134 void (*CalcScore)(struct schedule_instruction *);
135 long max_tex_group;
136 unsigned PrevBlockHasTex : 1;
137 unsigned PrevBlockHasKil : 1;
138 /* Number of TEX in the current block */
139 unsigned TEXCount;
140 /* Total number of TEX in the whole program.*/
141 unsigned totalTEXCount;
142 unsigned Opt : 1;
143 };
144
145 static struct reg_value **
get_reg_valuep(struct schedule_state * s,rc_register_file file,unsigned int index,unsigned int chan)146 get_reg_valuep(struct schedule_state *s, rc_register_file file, unsigned int index,
147 unsigned int chan)
148 {
149 if (file != RC_FILE_TEMPORARY)
150 return NULL;
151
152 if (index >= RC_REGISTER_MAX_INDEX) {
153 rc_error(s->C, "%s: index %i out of bounds\n", __func__, index);
154 return NULL;
155 }
156
157 return &s->Temporary[index].Values[chan];
158 }
159
160 static unsigned
get_tex_read_count(struct schedule_instruction * sinst)161 get_tex_read_count(struct schedule_instruction *sinst)
162 {
163 unsigned tex_read_count = sinst->TexReadCount;
164 if (sinst->PairedInst) {
165 tex_read_count += sinst->PairedInst->TexReadCount;
166 }
167 return tex_read_count;
168 }
169
170 #if VERBOSE
171 static void
print_list(struct schedule_instruction * sinst)172 print_list(struct schedule_instruction *sinst)
173 {
174 struct schedule_instruction *ptr;
175 for (ptr = sinst; ptr; ptr = ptr->NextReady) {
176 unsigned tex_read_count = get_tex_read_count(ptr);
177 unsigned score = sinst->Score;
178 fprintf(stderr, "%u (%d) [%u],", ptr->Instruction->IP, score, tex_read_count);
179 }
180 fprintf(stderr, "\n");
181 }
182 #endif
183
184 static void
remove_inst_from_list(struct schedule_instruction ** list,struct schedule_instruction * inst)185 remove_inst_from_list(struct schedule_instruction **list, struct schedule_instruction *inst)
186 {
187 struct schedule_instruction *prev = NULL;
188 struct schedule_instruction *list_ptr;
189 for (list_ptr = *list; list_ptr; prev = list_ptr, list_ptr = list_ptr->NextReady) {
190 if (list_ptr == inst) {
191 if (prev) {
192 prev->NextReady = inst->NextReady;
193 } else {
194 *list = inst->NextReady;
195 }
196 inst->NextReady = NULL;
197 break;
198 }
199 }
200 }
201
202 static void
add_inst_to_list(struct schedule_instruction ** list,struct schedule_instruction * inst)203 add_inst_to_list(struct schedule_instruction **list, struct schedule_instruction *inst)
204 {
205 inst->NextReady = *list;
206 *list = inst;
207 }
208
209 static void
add_inst_to_list_score(struct schedule_instruction ** list,struct schedule_instruction * inst)210 add_inst_to_list_score(struct schedule_instruction **list, struct schedule_instruction *inst)
211 {
212 struct schedule_instruction *temp;
213 struct schedule_instruction *prev;
214 if (!*list) {
215 *list = inst;
216 return;
217 }
218 temp = *list;
219 prev = NULL;
220 while (temp && inst->Score <= temp->Score) {
221 prev = temp;
222 temp = temp->NextReady;
223 }
224
225 if (!prev) {
226 inst->NextReady = temp;
227 *list = inst;
228 } else {
229 prev->NextReady = inst;
230 inst->NextReady = temp;
231 }
232 }
233
234 static void
instruction_ready(struct schedule_state * s,struct schedule_instruction * sinst)235 instruction_ready(struct schedule_state *s, struct schedule_instruction *sinst)
236 {
237 DBG("%i is now ready\n", sinst->Instruction->IP);
238
239 /* Adding Ready TEX instructions to the end of the "Ready List" helps
240 * us emit TEX instructions in blocks without losing our place. */
241 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
242 add_inst_to_list_score(&s->ReadyTEX, sinst);
243 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
244 add_inst_to_list_score(&s->ReadyRGB, sinst);
245 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
246 add_inst_to_list_score(&s->ReadyAlpha, sinst);
247 else
248 add_inst_to_list_score(&s->ReadyFullALU, sinst);
249 }
250
251 static void
decrease_dependencies(struct schedule_state * s,struct schedule_instruction * sinst)252 decrease_dependencies(struct schedule_state *s, struct schedule_instruction *sinst)
253 {
254 assert(sinst->NumDependencies > 0);
255 sinst->NumDependencies--;
256 if (!sinst->NumDependencies)
257 instruction_ready(s, sinst);
258 }
259
260 /* These functions provide different heuristics for scheduling instructions.
261 * The default is calc_score_readers. */
262
263 #if 0
264
265 static void calc_score_zero(struct schedule_instruction * sinst)
266 {
267 sinst->Score = 0;
268 }
269
270 static void calc_score_deps(struct schedule_instruction * sinst)
271 {
272 int i;
273 sinst->Score = 0;
274 for (i = 0; i < sinst->NumWriteValues; i++) {
275 struct reg_value * v = sinst->WriteValues[i];
276 if (v->NumReaders) {
277 struct reg_value_reader * r;
278 for (r = v->Readers; r; r = r->Next) {
279 if (r->Reader->NumDependencies == 1) {
280 sinst->Score += 100;
281 }
282 sinst->Score += r->Reader->NumDependencies;
283 }
284 }
285 }
286 }
287
288 #endif
289
290 #define NO_OUTPUT_SCORE (1 << 24)
291
292 static void
score_no_output(struct schedule_instruction * sinst)293 score_no_output(struct schedule_instruction *sinst)
294 {
295 assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
296 if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
297 !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
298 if (sinst->PairedInst) {
299 if (!sinst->PairedInst->Instruction->U.P.RGB.OutputWriteMask &&
300 !sinst->PairedInst->Instruction->U.P.Alpha.OutputWriteMask) {
301 sinst->Score |= NO_OUTPUT_SCORE;
302 }
303
304 } else {
305 sinst->Score |= NO_OUTPUT_SCORE;
306 }
307 }
308 }
309
310 #define PAIRED_SCORE (1 << 16)
311
312 static void
calc_score_r300(struct schedule_instruction * sinst)313 calc_score_r300(struct schedule_instruction *sinst)
314 {
315 unsigned src_idx;
316
317 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
318 sinst->Score = 0;
319 return;
320 }
321
322 score_no_output(sinst);
323
324 if (sinst->PairedInst) {
325 sinst->Score |= PAIRED_SCORE;
326 return;
327 }
328
329 for (src_idx = 0; src_idx < 4; src_idx++) {
330 sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
331 sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
332 }
333 }
334
335 #define NO_READ_TEX_SCORE (1 << 16)
336
337 static void
calc_score_readers(struct schedule_instruction * sinst)338 calc_score_readers(struct schedule_instruction *sinst)
339 {
340 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
341 sinst->Score = 0;
342 } else {
343 sinst->Score = sinst->NumReadValues;
344 if (sinst->PairedInst) {
345 sinst->Score += sinst->PairedInst->NumReadValues;
346 }
347 if (get_tex_read_count(sinst) == 0) {
348 sinst->Score |= NO_READ_TEX_SCORE;
349 }
350 score_no_output(sinst);
351 }
352 }
353
354 /**
355 * This function decreases the dependencies of the next instruction that
356 * wants to write to each of sinst's read values.
357 */
358 static void
commit_update_reads(struct schedule_state * s,struct schedule_instruction * sinst)359 commit_update_reads(struct schedule_state *s, struct schedule_instruction *sinst)
360 {
361 do {
362 for (unsigned int i = 0; i < sinst->NumReadValues; ++i) {
363 struct reg_value *v = sinst->ReadValues[i];
364 assert(v->NumReaders > 0);
365 v->NumReaders--;
366 if (!v->NumReaders) {
367 if (v->Next) {
368 decrease_dependencies(s, v->Next->Writer);
369 }
370 }
371 }
372 } while ((sinst = sinst->PairedInst));
373 }
374
375 static void
commit_update_writes(struct schedule_state * s,struct schedule_instruction * sinst)376 commit_update_writes(struct schedule_state *s, struct schedule_instruction *sinst)
377 {
378 do {
379 for (unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
380 struct reg_value *v = sinst->WriteValues[i];
381 if (v->NumReaders) {
382 for (struct reg_value_reader *r = v->Readers; r; r = r->Next) {
383 decrease_dependencies(s, r->Reader);
384 }
385 } else {
386 /* This happens in instruction sequences of the type
387 * OP r.x, ...;
388 * OP r.x, r.x, ...;
389 * See also the subtlety in how instructions that both
390 * read and write the same register are scanned.
391 */
392 if (v->Next)
393 decrease_dependencies(s, v->Next->Writer);
394 }
395 }
396 } while ((sinst = sinst->PairedInst));
397 }
398
399 static void
notify_sem_wait(struct schedule_state * s)400 notify_sem_wait(struct schedule_state *s)
401 {
402 struct rc_list *pend_ptr;
403 for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
404 struct rc_list *read_ptr;
405 struct schedule_instruction *pending = pend_ptr->Item;
406 for (read_ptr = pending->TexReaders; read_ptr; read_ptr = read_ptr->Next) {
407 struct schedule_instruction *reader = read_ptr->Item;
408 reader->TexReadCount--;
409 }
410 }
411 s->PendingTEX = NULL;
412 }
413
414 static void
commit_alu_instruction(struct schedule_state * s,struct schedule_instruction * sinst)415 commit_alu_instruction(struct schedule_state *s, struct schedule_instruction *sinst)
416 {
417 DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
418
419 commit_update_reads(s, sinst);
420
421 commit_update_writes(s, sinst);
422
423 if (get_tex_read_count(sinst) > 0) {
424 sinst->Instruction->U.P.SemWait = 1;
425 notify_sem_wait(s);
426 }
427 }
428
429 /**
430 * Emit all ready texture instructions in a single block.
431 *
432 * Emit as a single block to (hopefully) sample many textures in parallel,
433 * and to avoid hardware indirections on R300.
434 */
435 static void
emit_all_tex(struct schedule_state * s,struct rc_instruction * before)436 emit_all_tex(struct schedule_state *s, struct rc_instruction *before)
437 {
438 struct schedule_instruction *readytex;
439 struct rc_instruction *inst_begin;
440
441 assert(s->ReadyTEX);
442 notify_sem_wait(s);
443
444 /* Node marker for R300 */
445 inst_begin = rc_insert_new_instruction(s->C, before->Prev);
446 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
447
448 /* Link texture instructions back in */
449 readytex = s->ReadyTEX;
450 while (readytex) {
451 rc_insert_instruction(before->Prev, readytex->Instruction);
452 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
453
454 /* All of the TEX instructions in the same TEX block have
455 * their source registers read from before any of the
456 * instructions in that block write to their destination
457 * registers. This means that when we commit a TEX
458 * instruction, any other TEX instruction that wants to write
459 * to one of the committed instruction's source register can be
460 * marked as ready and should be emitted in the same TEX
461 * block. This prevents the following sequence from being
462 * emitted in two different TEX blocks:
463 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
464 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
465 */
466 commit_update_reads(s, readytex);
467 readytex = readytex->NextReady;
468 }
469 readytex = s->ReadyTEX;
470 s->ReadyTEX = NULL;
471 while (readytex) {
472 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
473 commit_update_writes(s, readytex);
474 /* Set semaphore bits for last TEX instruction in the block */
475 if (!readytex->NextReady) {
476 readytex->Instruction->U.I.TexSemAcquire = 1;
477 readytex->Instruction->U.I.TexSemWait = 1;
478 }
479 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
480 readytex = readytex->NextReady;
481 }
482 }
483
484 /* This is a helper function for destructive_merge_instructions(). It helps
485 * merge presubtract sources from two instructions and makes sure the
486 * presubtract sources end up in the correct spot. This function assumes that
487 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
488 * but no scalar instruction (alpha).
489 * @return 0 if merging the presubtract sources fails.
490 * @return 1 if merging the presubtract sources succeeds.
491 */
492 static int
merge_presub_sources(struct rc_pair_instruction * dst_full,struct rc_pair_sub_instruction src,unsigned int type)493 merge_presub_sources(struct rc_pair_instruction *dst_full, struct rc_pair_sub_instruction src,
494 unsigned int type)
495 {
496 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
497 struct rc_pair_sub_instruction *dst_sub;
498 const struct rc_opcode_info *info;
499
500 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
501
502 switch (type) {
503 case RC_SOURCE_RGB:
504 is_rgb = 1;
505 is_alpha = 0;
506 dst_sub = &dst_full->RGB;
507 break;
508 case RC_SOURCE_ALPHA:
509 is_rgb = 0;
510 is_alpha = 1;
511 dst_sub = &dst_full->Alpha;
512 break;
513 default:
514 assert(0);
515 return 0;
516 }
517
518 info = rc_get_opcode_info(dst_full->RGB.Opcode);
519
520 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
521 return 0;
522
523 srcp_regs = rc_presubtract_src_reg_count(src.Src[RC_PAIR_PRESUB_SRC].Index);
524 for (srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
525 unsigned int arg;
526 int free_source;
527 unsigned int one_way = 0;
528 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
529 struct rc_pair_instruction_source temp;
530
531 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, srcp.File, srcp.Index);
532
533 /* If free_source < 0 then there are no free source
534 * slots. */
535 if (free_source < 0)
536 return 0;
537
538 temp = dst_sub->Src[srcp_src];
539 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
540
541 /* srcp needs src0 and src1 to be the same */
542 if (free_source < srcp_src) {
543 if (!temp.Used)
544 continue;
545 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index);
546 if (free_source < 0)
547 return 0;
548 one_way = 1;
549 } else {
550 dst_sub->Src[free_source] = temp;
551 }
552
553 /* If free_source == srcp_src, then the presubtract
554 * source is already in the correct place. */
555 if (free_source == srcp_src)
556 continue;
557
558 /* Shuffle the sources, so we can put the
559 * presubtract source in the correct place. */
560 for (arg = 0; arg < info->NumSrcRegs; arg++) {
561 /* If the arg does read both from rgb and alpha, then we need to rewrite
562 * both sources and the code currently doesn't handle this.
563 * FIXME: This is definitely solvable, however shader-db shows it is
564 * not worth the effort.
565 */
566 if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA &&
567 rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB)
568 return 0;
569
570 /*If this arg does not read from an rgb source,
571 * do nothing. */
572 if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & type)) {
573 continue;
574 }
575
576 if (dst_full->RGB.Arg[arg].Source == srcp_src)
577 dst_full->RGB.Arg[arg].Source = free_source;
578 /* We need to do this just in case register
579 * is one of the sources already, but in the
580 * wrong spot. */
581 else if (dst_full->RGB.Arg[arg].Source == free_source && !one_way) {
582 dst_full->RGB.Arg[arg].Source = srcp_src;
583 }
584 }
585 }
586 return 1;
587 }
588
589 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
590 static int
destructive_merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)591 destructive_merge_instructions(struct rc_pair_instruction *rgb, struct rc_pair_instruction *alpha)
592 {
593 const struct rc_opcode_info *opcode;
594
595 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
596 assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
597
598 /* Presubtract registers need to be merged first so that registers
599 * needed by the presubtract operation can be placed in src0 and/or
600 * src1. */
601
602 /* Merge the rgb presubtract registers. */
603 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
604 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
605 return 0;
606 }
607 }
608 /* Merge the alpha presubtract registers */
609 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
610 if (!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)) {
611 return 0;
612 }
613 }
614
615 /* Copy alpha args into rgb */
616 opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
617
618 for (unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
619 unsigned int srcrgb = 0;
620 unsigned int srcalpha = 0;
621 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
622 rc_register_file file = 0;
623 unsigned int index = 0;
624 int source;
625
626 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
627 srcrgb = 1;
628 file = alpha->RGB.Src[oldsrc].File;
629 index = alpha->RGB.Src[oldsrc].Index;
630 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
631 srcalpha = 1;
632 file = alpha->Alpha.Src[oldsrc].File;
633 index = alpha->Alpha.Src[oldsrc].Index;
634 }
635
636 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
637 if (source < 0)
638 return 0;
639
640 rgb->Alpha.Arg[arg].Source = source;
641 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
642 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
643 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
644 }
645
646 /* Copy alpha opcode into rgb */
647 rgb->Alpha.Opcode = alpha->Alpha.Opcode;
648 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
649 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
650 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
651 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
652 rgb->Alpha.Saturate = alpha->Alpha.Saturate;
653 rgb->Alpha.Omod = alpha->Alpha.Omod;
654
655 /* Merge ALU result writing */
656 if (alpha->WriteALUResult) {
657 if (rgb->WriteALUResult)
658 return 0;
659
660 rgb->WriteALUResult = alpha->WriteALUResult;
661 rgb->ALUResultCompare = alpha->ALUResultCompare;
662 }
663
664 /* Copy SemWait */
665 rgb->SemWait |= alpha->SemWait;
666
667 return 1;
668 }
669
670 /**
671 * Try to merge the given instructions into the rgb instructions.
672 *
673 * Return true on success; on failure, return false, and keep
674 * the instructions untouched.
675 */
676 static int
merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)677 merge_instructions(struct rc_pair_instruction *rgb, struct rc_pair_instruction *alpha)
678 {
679 struct rc_pair_instruction backup;
680
681 /*Instructions can't write output registers and ALU result at the
682 * same time. */
683 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) ||
684 (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
685 return 0;
686 }
687
688 /* Writing output registers in the middle of shaders is slow, so
689 * we don't want to pair output writes with temp writes. */
690 if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) ||
691 (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
692 return 0;
693 }
694
695 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
696
697 if (destructive_merge_instructions(rgb, alpha))
698 return 1;
699
700 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
701 return 0;
702 }
703
704 static void
presub_nop(struct rc_instruction * emitted)705 presub_nop(struct rc_instruction *emitted)
706 {
707 int prev_rgb_index, prev_alpha_index, i, num_src;
708
709 /* We don't need a nop if the previous instruction is a TEX. */
710 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
711 return;
712 }
713 if (emitted->Prev->U.P.RGB.WriteMask)
714 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
715 else
716 prev_rgb_index = -1;
717 if (emitted->Prev->U.P.Alpha.WriteMask)
718 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
719 else
720 prev_alpha_index = 1;
721
722 /* Check the previous rgb instruction */
723 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
724 num_src = rc_presubtract_src_reg_count(emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
725 for (i = 0; i < num_src; i++) {
726 unsigned int index = emitted->U.P.RGB.Src[i].Index;
727 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY &&
728 (index == prev_rgb_index || index == prev_alpha_index)) {
729 emitted->Prev->U.P.Nop = 1;
730 return;
731 }
732 }
733 }
734
735 /* Check the previous alpha instruction. */
736 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
737 return;
738
739 num_src = rc_presubtract_src_reg_count(emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
740 for (i = 0; i < num_src; i++) {
741 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
742 if (emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY &&
743 (index == prev_rgb_index || index == prev_alpha_index)) {
744 emitted->Prev->U.P.Nop = 1;
745 return;
746 }
747 }
748 }
749
750 static void
rgb_to_alpha_remap(struct schedule_state * s,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,rc_register_file old_file,rc_swizzle old_swz,unsigned int new_index)751 rgb_to_alpha_remap(struct schedule_state *s, struct rc_instruction *inst,
752 struct rc_pair_instruction_arg *arg, rc_register_file old_file,
753 rc_swizzle old_swz, unsigned int new_index)
754 {
755 int new_src_index;
756 unsigned int i;
757
758 for (i = 0; i < 3; i++) {
759 if (get_swz(arg->Swizzle, i) == old_swz) {
760 SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
761 }
762 }
763 new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, old_file, new_index);
764 /* This conversion is not possible, we must have made a mistake in
765 * is_rgb_to_alpha_possible. */
766 if (new_src_index < 0) {
767 rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n");
768 return;
769 }
770
771 arg->Source = new_src_index;
772 }
773
774 static int
can_remap(unsigned int opcode)775 can_remap(unsigned int opcode)
776 {
777 switch (opcode) {
778 case RC_OPCODE_DDX:
779 case RC_OPCODE_DDY:
780 return 0;
781 default:
782 return 1;
783 }
784 }
785
786 static int
can_convert_opcode_to_alpha(unsigned int opcode)787 can_convert_opcode_to_alpha(unsigned int opcode)
788 {
789 switch (opcode) {
790 case RC_OPCODE_DDX:
791 case RC_OPCODE_DDY:
792 case RC_OPCODE_DP2:
793 case RC_OPCODE_DP3:
794 case RC_OPCODE_DP4:
795 return 0;
796 default:
797 return 1;
798 }
799 }
800
801 static void
is_rgb_to_alpha_possible(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)802 is_rgb_to_alpha_possible(void *userdata, struct rc_instruction *inst,
803 struct rc_pair_instruction_arg *arg,
804 struct rc_pair_instruction_source *src)
805 {
806 unsigned int read_chan = RC_SWIZZLE_UNUSED;
807 unsigned int alpha_sources = 0;
808 unsigned int i;
809 struct rc_reader_data *reader_data = userdata;
810
811 if (!can_remap(inst->U.P.RGB.Opcode) || !can_remap(inst->U.P.Alpha.Opcode)) {
812 reader_data->Abort = 1;
813 return;
814 }
815
816 if (!src)
817 return;
818
819 /* XXX There are some cases where we can still do the conversion if
820 * a reader reads from a presubtract source, but for now we'll prevent
821 * it. */
822 if (arg->Source == RC_PAIR_PRESUB_SRC) {
823 reader_data->Abort = 1;
824 return;
825 }
826
827 /* Make sure the source only reads the register component that we
828 * are going to be converting from. It is OK if the instruction uses
829 * this component more than once.
830 * XXX If the index we will be converting to is the same as the
831 * current index, then it is OK to read from more than one component.
832 */
833 for (i = 0; i < 3; i++) {
834 rc_swizzle swz = get_swz(arg->Swizzle, i);
835 switch (swz) {
836 case RC_SWIZZLE_X:
837 case RC_SWIZZLE_Y:
838 case RC_SWIZZLE_Z:
839 case RC_SWIZZLE_W:
840 if (read_chan == RC_SWIZZLE_UNUSED) {
841 read_chan = swz;
842 } else if (read_chan != swz) {
843 reader_data->Abort = 1;
844 return;
845 }
846 break;
847 default:
848 break;
849 }
850 }
851
852 /* Make sure there are enough alpha sources.
853 * XXX If we know what register all the readers are going
854 * to be remapped to, then in some situations we can still do
855 * the substitution, even if all 3 alpha sources are being used.*/
856 for (i = 0; i < 3; i++) {
857 if (inst->U.P.Alpha.Src[i].Used) {
858 alpha_sources++;
859 }
860 }
861 if (alpha_sources > 2) {
862 reader_data->Abort = 1;
863 return;
864 }
865 }
866
867 static int
convert_rgb_to_alpha(struct schedule_state * s,struct schedule_instruction * sched_inst)868 convert_rgb_to_alpha(struct schedule_state *s, struct schedule_instruction *sched_inst)
869 {
870 struct rc_pair_instruction *pair_inst = &sched_inst->Instruction->U.P;
871 unsigned int old_mask = pair_inst->RGB.WriteMask;
872 unsigned int old_swz = rc_mask_to_swizzle(old_mask);
873 const struct rc_opcode_info *info = rc_get_opcode_info(pair_inst->RGB.Opcode);
874 int new_index = -1;
875 unsigned int i;
876
877 if (sched_inst->GlobalReaders.Abort)
878 return 0;
879
880 /* Even though we checked that we can convert to alpha previously, it is
881 * possible that another rgb source of the reader instructions was already
882 * converted to alpha and we thus have no longer free alpha sources.
883 */
884 for (i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
885 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
886 if (reader.Inst->U.P.Alpha.Src[2].Used)
887 return 0;
888 }
889
890 if (!pair_inst->RGB.WriteMask)
891 return 0;
892
893 if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) ||
894 !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
895 return 0;
896 }
897
898 assert(sched_inst->NumWriteValues == 1);
899
900 if (!sched_inst->WriteValues[0]) {
901 assert(0);
902 return 0;
903 }
904
905 /* We start at the old index, because if we can reuse the same
906 * register and just change the swizzle then it is more likely we
907 * will be able to convert all the readers. */
908 for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
909 struct reg_value **new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
910 if (!*new_regvalp) {
911 struct reg_value **old_regvalp = get_reg_valuep(
912 s, RC_FILE_TEMPORARY, pair_inst->RGB.DestIndex, rc_mask_to_swizzle(old_mask));
913 new_index = i;
914 *new_regvalp = *old_regvalp;
915 break;
916 }
917 }
918 if (new_index < 0) {
919 return 0;
920 }
921
922 /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
923 * as the RGB opcode, then the Alpha instruction will already contain
924 * the correct opcode and instruction args, so we do not want to
925 * overwrite them.
926 */
927 if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
928 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
929 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, sizeof(pair_inst->Alpha.Arg));
930 }
931 pair_inst->Alpha.DestIndex = new_index;
932 pair_inst->Alpha.WriteMask = RC_MASK_W;
933 pair_inst->Alpha.Target = pair_inst->RGB.Target;
934 pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
935 pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
936 pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
937 pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
938 /* Move the swizzles into the first chan */
939 for (i = 0; i < info->NumSrcRegs; i++) {
940 unsigned int j;
941 for (j = 0; j < 3; j++) {
942 unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
943 if (swz != RC_SWIZZLE_UNUSED) {
944 pair_inst->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
945 break;
946 }
947 }
948 }
949 pair_inst->RGB.Opcode = RC_OPCODE_NOP;
950 pair_inst->RGB.DestIndex = 0;
951 pair_inst->RGB.WriteMask = 0;
952 pair_inst->RGB.Target = 0;
953 pair_inst->RGB.OutputWriteMask = 0;
954 pair_inst->RGB.DepthWriteMask = 0;
955 pair_inst->RGB.Saturate = 0;
956 memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
957
958 for (i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
959 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
960 rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg, RC_FILE_TEMPORARY, old_swz, new_index);
961 }
962 return 1;
963 }
964
965 static void
try_convert_and_pair(struct schedule_state * s,struct schedule_instruction ** inst_list)966 try_convert_and_pair(struct schedule_state *s, struct schedule_instruction **inst_list)
967 {
968 struct schedule_instruction *list_ptr = *inst_list;
969 while (list_ptr && *inst_list && (*inst_list)->NextReady) {
970 int paired = 0;
971 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP &&
972 list_ptr->Instruction->U.P.RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
973 goto next;
974 }
975 if (list_ptr->NumWriteValues == 1 && convert_rgb_to_alpha(s, list_ptr)) {
976
977 struct schedule_instruction *pair_ptr;
978 remove_inst_from_list(inst_list, list_ptr);
979 add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
980
981 for (pair_ptr = s->ReadyRGB; pair_ptr; pair_ptr = pair_ptr->NextReady) {
982 if (merge_instructions(&pair_ptr->Instruction->U.P, &list_ptr->Instruction->U.P)) {
983 remove_inst_from_list(&s->ReadyAlpha, list_ptr);
984 remove_inst_from_list(&s->ReadyRGB, pair_ptr);
985 pair_ptr->PairedInst = list_ptr;
986
987 add_inst_to_list(&s->ReadyFullALU, pair_ptr);
988 list_ptr = *inst_list;
989 paired = 1;
990 break;
991 }
992 }
993 }
994 if (!paired) {
995 next:
996 list_ptr = list_ptr->NextReady;
997 }
998 }
999 }
1000
1001 /**
1002 * This function attempts to merge RGB and Alpha instructions together.
1003 */
1004 static void
pair_instructions(struct schedule_state * s)1005 pair_instructions(struct schedule_state *s)
1006 {
1007 struct schedule_instruction *rgb_ptr;
1008 struct schedule_instruction *alpha_ptr;
1009
1010 /* Some pairings might fail because they require too
1011 * many source slots; try all possible pairings if necessary */
1012 rgb_ptr = s->ReadyRGB;
1013 while (rgb_ptr) {
1014 struct schedule_instruction *rgb_next = rgb_ptr->NextReady;
1015 alpha_ptr = s->ReadyAlpha;
1016 while (alpha_ptr) {
1017 struct schedule_instruction *alpha_next = alpha_ptr->NextReady;
1018 if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1019 /* Remove RGB and Alpha from their ready lists.
1020 */
1021 remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1022 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1023 rgb_ptr->PairedInst = alpha_ptr;
1024 add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1025 break;
1026 }
1027 alpha_ptr = alpha_next;
1028 }
1029 rgb_ptr = rgb_next;
1030 }
1031
1032 if (!s->Opt) {
1033 return;
1034 }
1035
1036 /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1037 * slot can be converted into Alpha instructions. */
1038 try_convert_and_pair(s, &s->ReadyFullALU);
1039
1040 /* Try to convert some of the RGB instructions to Alpha and
1041 * try to pair it with another RGB. */
1042 try_convert_and_pair(s, &s->ReadyRGB);
1043 }
1044
1045 static void
update_max_score(struct schedule_state * s,struct schedule_instruction ** list,int * max_score,struct schedule_instruction ** max_inst_out,struct schedule_instruction *** list_out)1046 update_max_score(struct schedule_state *s, struct schedule_instruction **list, int *max_score,
1047 struct schedule_instruction **max_inst_out,
1048 struct schedule_instruction ***list_out)
1049 {
1050 struct schedule_instruction *list_ptr;
1051 for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1052 int score;
1053 s->CalcScore(list_ptr);
1054 score = list_ptr->Score;
1055 if (!*max_inst_out || score > *max_score) {
1056 *max_score = score;
1057 *max_inst_out = list_ptr;
1058 *list_out = list;
1059 }
1060 }
1061 }
1062
1063 static void
emit_instruction(struct schedule_state * s,struct rc_instruction * before)1064 emit_instruction(struct schedule_state *s, struct rc_instruction *before)
1065 {
1066 int max_score = -1;
1067 struct schedule_instruction *max_inst = NULL;
1068 struct schedule_instruction **max_list = NULL;
1069 unsigned tex_count = 0;
1070 struct schedule_instruction *tex_ptr;
1071
1072 pair_instructions(s);
1073 #if VERBOSE
1074 fprintf(stderr, "Full:\n");
1075 print_list(s->ReadyFullALU);
1076 fprintf(stderr, "RGB:\n");
1077 print_list(s->ReadyRGB);
1078 fprintf(stderr, "Alpha:\n");
1079 print_list(s->ReadyAlpha);
1080 fprintf(stderr, "TEX:\n");
1081 print_list(s->ReadyTEX);
1082 #endif
1083
1084 for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1085 /* In general we want to emit KIL ASAP, however KIL does count into
1086 * the indirection limit, so for R300/R400 we only do this if we
1087 * are sure we can fit in there.
1088 */
1089 if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL &&
1090 (s->C->is_r500 || s->totalTEXCount <= 3)) {
1091 emit_all_tex(s, before);
1092 s->PrevBlockHasKil = 1;
1093 return;
1094 }
1095 tex_count++;
1096 }
1097 update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1098 update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1099 update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1100
1101 if (tex_count >= s->max_tex_group || max_score == -1 ||
1102 (s->TEXCount > 0 && tex_count == s->TEXCount) ||
1103 (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) {
1104 emit_all_tex(s, before);
1105 } else {
1106
1107 remove_inst_from_list(max_list, max_inst);
1108 rc_insert_instruction(before->Prev, max_inst->Instruction);
1109 commit_alu_instruction(s, max_inst);
1110
1111 presub_nop(before->Prev);
1112 }
1113 }
1114
1115 static void
add_tex_reader(struct schedule_state * s,struct schedule_instruction * writer,struct schedule_instruction * reader)1116 add_tex_reader(struct schedule_state *s, struct schedule_instruction *writer,
1117 struct schedule_instruction *reader)
1118 {
1119 if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1120 /*Not a TEX instructions */
1121 return;
1122 }
1123 reader->TexReadCount++;
1124 rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1125 }
1126
1127 static void
scan_read(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1128 scan_read(void *data, struct rc_instruction *inst, rc_register_file file, unsigned int index,
1129 unsigned int chan)
1130 {
1131 struct schedule_state *s = data;
1132 struct reg_value **v = get_reg_valuep(s, file, index, chan);
1133 struct reg_value_reader *reader;
1134
1135 if (!v)
1136 return;
1137
1138 if (*v && (*v)->Writer == s->Current) {
1139 /* The instruction reads and writes to a register component.
1140 * In this case, we only want to increment dependencies by one.
1141 * Why?
1142 * Because each instruction depends on the writers of its source
1143 * registers _and_ the most recent writer of its destination
1144 * register. In this case, the current instruction (s->Current)
1145 * has a dependency that both writes to one of its source
1146 * registers and was the most recent writer to its destination
1147 * register. We have already marked this dependency in
1148 * scan_write(), so we don't need to do it again.
1149 */
1150
1151 /* We need to make sure we are adding s->Current to the
1152 * previous writer's list of TexReaders, if the previous writer
1153 * was a TEX instruction.
1154 */
1155 add_tex_reader(s, s->PrevWriter[chan], s->Current);
1156
1157 return;
1158 }
1159
1160 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1161
1162 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1163 reader->Reader = s->Current;
1164 if (!*v) {
1165 /* In this situation, the instruction reads from a register
1166 * that hasn't been written to or read from in the current
1167 * block. */
1168 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1169 memset(*v, 0, sizeof(struct reg_value));
1170 (*v)->Readers = reader;
1171 } else {
1172 reader->Next = (*v)->Readers;
1173 (*v)->Readers = reader;
1174 /* Only update the current instruction's dependencies if the
1175 * register it reads from has been written to in this block. */
1176 if ((*v)->Writer) {
1177 add_tex_reader(s, (*v)->Writer, s->Current);
1178 s->Current->NumDependencies++;
1179 }
1180 }
1181 (*v)->NumReaders++;
1182
1183 if (s->Current->NumReadValues >= 12) {
1184 rc_error(s->C, "%s: NumReadValues overflow\n", __func__);
1185 } else {
1186 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1187 }
1188 }
1189
1190 static void
scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1191 scan_write(void *data, struct rc_instruction *inst, rc_register_file file, unsigned int index,
1192 unsigned int chan)
1193 {
1194 struct schedule_state *s = data;
1195 struct reg_value **pv = get_reg_valuep(s, file, index, chan);
1196 struct reg_value *newv;
1197
1198 if (!pv)
1199 return;
1200
1201 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1202
1203 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1204 memset(newv, 0, sizeof(*newv));
1205
1206 newv->Writer = s->Current;
1207
1208 if (*pv) {
1209 (*pv)->Next = newv;
1210 s->Current->NumDependencies++;
1211 /* Keep track of the previous writer to s->Current's destination
1212 * register */
1213 s->PrevWriter[chan] = (*pv)->Writer;
1214 }
1215
1216 *pv = newv;
1217
1218 if (s->Current->NumWriteValues >= 4) {
1219 rc_error(s->C, "%s: NumWriteValues overflow\n", __func__);
1220 } else {
1221 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1222 }
1223 }
1224
1225 static void
is_rgb_to_alpha_possible_normal(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)1226 is_rgb_to_alpha_possible_normal(void *userdata, struct rc_instruction *inst,
1227 struct rc_src_register *src)
1228 {
1229 struct rc_reader_data *reader_data = userdata;
1230 reader_data->Abort = 1;
1231 }
1232
1233 static void
schedule_block(struct schedule_state * s,struct rc_instruction * begin,struct rc_instruction * end)1234 schedule_block(struct schedule_state *s, struct rc_instruction *begin, struct rc_instruction *end)
1235 {
1236 unsigned int ip;
1237
1238 /* Scan instructions for data dependencies */
1239 ip = 0;
1240 for (struct rc_instruction *inst = begin; inst != end; inst = inst->Next) {
1241 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1242 memset(s->Current, 0, sizeof(struct schedule_instruction));
1243
1244 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1245 const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
1246 if (info->HasTexture) {
1247 s->TEXCount++;
1248 }
1249 }
1250
1251 /* XXX: This causes SemWait to be set for all instructions in
1252 * a block if the previous block contained a TEX instruction.
1253 * We can do better here, but it will take a lot of work. */
1254 if (s->PrevBlockHasTex) {
1255 s->Current->TexReadCount = 1;
1256 }
1257
1258 s->Current->Instruction = inst;
1259 inst->IP = ip++;
1260
1261 DBG("%i: Scanning\n", inst->IP);
1262
1263 /* The order of things here is subtle and maybe slightly
1264 * counter-intuitive, to account for the case where an
1265 * instruction writes to the same register as it reads
1266 * from. */
1267 rc_for_all_writes_chan(inst, &scan_write, s);
1268 rc_for_all_reads_chan(inst, &scan_read, s);
1269
1270 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1271
1272 if (!s->Current->NumDependencies) {
1273 instruction_ready(s, s->Current);
1274 }
1275
1276 /* Get global readers for possible RGB->Alpha conversion. */
1277 s->Current->GlobalReaders.ExitOnAbort = 1;
1278 rc_get_readers(s->C, inst, &s->Current->GlobalReaders, is_rgb_to_alpha_possible_normal,
1279 is_rgb_to_alpha_possible, NULL);
1280 }
1281
1282 /* Temporarily unlink all instructions */
1283 begin->Prev->Next = end;
1284 end->Prev = begin->Prev;
1285
1286 /* Schedule instructions back */
1287 while (!s->C->Error && (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1288 emit_instruction(s, end);
1289 }
1290 }
1291
1292 static int
is_controlflow(struct rc_instruction * inst)1293 is_controlflow(struct rc_instruction *inst)
1294 {
1295 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1296 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
1297 return opcode->IsFlowControl;
1298 }
1299 return 0;
1300 }
1301
1302 void
rc_pair_schedule(struct radeon_compiler * cc,void * user)1303 rc_pair_schedule(struct radeon_compiler *cc, void *user)
1304 {
1305 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
1306 struct schedule_state s;
1307 struct rc_instruction *inst = c->Base.Program.Instructions.Next;
1308 unsigned int *opt = user;
1309
1310 memset(&s, 0, sizeof(s));
1311 s.Opt = *opt;
1312 s.C = &c->Base;
1313 if (s.C->is_r500) {
1314 s.CalcScore = calc_score_readers;
1315 } else {
1316 s.CalcScore = calc_score_r300;
1317 }
1318 s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1319
1320 /* First go over and count all TEX. */
1321 while (inst != &c->Base.Program.Instructions) {
1322 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1323 const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
1324 if (info->HasTexture) {
1325 s.totalTEXCount++;
1326 }
1327 }
1328 inst = inst->Next;
1329 }
1330
1331 inst = c->Base.Program.Instructions.Next;
1332 while (inst != &c->Base.Program.Instructions) {
1333 struct rc_instruction *first;
1334
1335 if (is_controlflow(inst)) {
1336 /* The TexSemWait flag is already properly set for ALU
1337 * instructions using the results of normal TEX lookup,
1338 * however it was found empirically that TEXKIL also needs
1339 * synchronization with the control flow. This might not be optimal,
1340 * however the docs don't offer any guidance in this matter.
1341 */
1342 if (s.PrevBlockHasKil) {
1343 inst->U.I.TexSemWait = 1;
1344 s.PrevBlockHasKil = 0;
1345 }
1346 inst = inst->Next;
1347 continue;
1348 }
1349
1350 first = inst;
1351
1352 while (inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1353 inst = inst->Next;
1354
1355 DBG("Schedule one block\n");
1356 memset(s.Temporary, 0, sizeof(s.Temporary));
1357 s.TEXCount = 0;
1358 schedule_block(&s, first, inst);
1359 if (s.PendingTEX) {
1360 s.PrevBlockHasTex = 1;
1361 }
1362 }
1363 }
1364