• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_program_pair.h"
7 
8 #include <stdio.h>
9 
10 #include "radeon_compiler.h"
11 #include "radeon_compiler_util.h"
12 #include "radeon_dataflow.h"
13 #include "radeon_list.h"
14 #include "radeon_variable.h"
15 
16 #include "util/u_debug.h"
17 
18 #define VERBOSE 0
19 
20 #define DBG(...)                                                                                   \
21    do {                                                                                            \
22       if (VERBOSE)                                                                                 \
23          fprintf(stderr, __VA_ARGS__);                                                             \
24    } while (0)
25 
26 struct schedule_instruction {
27    struct rc_instruction *Instruction;
28 
29    /** Next instruction in the linked list of ready instructions. */
30    struct schedule_instruction *NextReady;
31 
32    /** Values that this instruction reads and writes */
33    struct reg_value *WriteValues[4];
34    struct reg_value *ReadValues[12];
35    unsigned int NumWriteValues : 3;
36    unsigned int NumReadValues : 4;
37 
38    /**
39     * Number of (read and write) dependencies that must be resolved before
40     * this instruction can be scheduled.
41     */
42    unsigned int NumDependencies : 5;
43 
44    /** List of all readers (see rc_get_readers() for the definition of
45     * "all readers"), even those outside the basic block this instruction
46     * lives in. */
47    struct rc_reader_data GlobalReaders;
48 
49    /** If the scheduler has paired an RGB and an Alpha instruction together,
50     * PairedInst references the alpha instruction's dependency information.
51     */
52    struct schedule_instruction *PairedInst;
53 
54    /** This scheduler uses the value of Score to determine which
55     * instruction to schedule.  Instructions with a higher value of Score
56     * will be scheduled first. */
57    int Score;
58 
59    /** The number of components that read from a TEX instruction. */
60    unsigned TexReadCount;
61 
62    /** For TEX instructions a list of readers */
63    struct rc_list *TexReaders;
64 };
65 
66 /**
67  * Used to keep track of which instructions read a value.
68  */
69 struct reg_value_reader {
70    struct schedule_instruction *Reader;
71    struct reg_value_reader *Next;
72 };
73 
74 /**
75  * Used to keep track which values are stored in each component of a
76  * RC_FILE_TEMPORARY.
77  */
78 struct reg_value {
79    struct schedule_instruction *Writer;
80 
81    /**
82     * Unordered linked list of instructions that read from this value.
83     * When this value becomes available, we increase all readers'
84     * dependency count.
85     */
86    struct reg_value_reader *Readers;
87 
88    /**
89     * Number of readers of this value. This is decremented each time
90     * a reader of the value is committed.
91     * When the reader count reaches zero, the dependency count
92     * of the instruction writing \ref Next is decremented.
93     */
94    unsigned int NumReaders;
95 
96    struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
97 };
98 
99 struct register_state {
100    struct reg_value *Values[4];
101 };
102 
103 struct remap_reg {
104    struct rc_instruction *Inst;
105    unsigned int OldIndex : (RC_REGISTER_INDEX_BITS + 1);
106    unsigned int OldSwizzle : 3;
107    unsigned int NewIndex : (RC_REGISTER_INDEX_BITS + 1);
108    unsigned int NewSwizzle : 3;
109    unsigned int OnlyTexReads : 1;
110    struct remap_reg *Next;
111 };
112 
113 struct schedule_state {
114    struct radeon_compiler *C;
115    struct schedule_instruction *Current;
116    /** Array of the previous writers of Current's destination register
117     * indexed by channel. */
118    struct schedule_instruction *PrevWriter[4];
119 
120    struct register_state Temporary[RC_REGISTER_MAX_INDEX];
121 
122    /**
123     * Linked lists of instructions that can be scheduled right now,
124     * based on which ALU/TEX resources they require.
125     */
126    /*@{*/
127    struct schedule_instruction *ReadyFullALU;
128    struct schedule_instruction *ReadyRGB;
129    struct schedule_instruction *ReadyAlpha;
130    struct schedule_instruction *ReadyTEX;
131    /*@}*/
132    struct rc_list *PendingTEX;
133 
134    void (*CalcScore)(struct schedule_instruction *);
135    long max_tex_group;
136    unsigned PrevBlockHasTex : 1;
137    unsigned PrevBlockHasKil : 1;
138    /* Number of TEX in the current block */
139    unsigned TEXCount;
140    /* Total number of TEX in the whole program.*/
141    unsigned totalTEXCount;
142    unsigned Opt : 1;
143 };
144 
145 static struct reg_value **
get_reg_valuep(struct schedule_state * s,rc_register_file file,unsigned int index,unsigned int chan)146 get_reg_valuep(struct schedule_state *s, rc_register_file file, unsigned int index,
147                unsigned int chan)
148 {
149    if (file != RC_FILE_TEMPORARY)
150       return NULL;
151 
152    if (index >= RC_REGISTER_MAX_INDEX) {
153       rc_error(s->C, "%s: index %i out of bounds\n", __func__, index);
154       return NULL;
155    }
156 
157    return &s->Temporary[index].Values[chan];
158 }
159 
160 static unsigned
get_tex_read_count(struct schedule_instruction * sinst)161 get_tex_read_count(struct schedule_instruction *sinst)
162 {
163    unsigned tex_read_count = sinst->TexReadCount;
164    if (sinst->PairedInst) {
165       tex_read_count += sinst->PairedInst->TexReadCount;
166    }
167    return tex_read_count;
168 }
169 
170 #if VERBOSE
171 static void
print_list(struct schedule_instruction * sinst)172 print_list(struct schedule_instruction *sinst)
173 {
174    struct schedule_instruction *ptr;
175    for (ptr = sinst; ptr; ptr = ptr->NextReady) {
176       unsigned tex_read_count = get_tex_read_count(ptr);
177       unsigned score = sinst->Score;
178       fprintf(stderr, "%u (%d) [%u],", ptr->Instruction->IP, score, tex_read_count);
179    }
180    fprintf(stderr, "\n");
181 }
182 #endif
183 
184 static void
remove_inst_from_list(struct schedule_instruction ** list,struct schedule_instruction * inst)185 remove_inst_from_list(struct schedule_instruction **list, struct schedule_instruction *inst)
186 {
187    struct schedule_instruction *prev = NULL;
188    struct schedule_instruction *list_ptr;
189    for (list_ptr = *list; list_ptr; prev = list_ptr, list_ptr = list_ptr->NextReady) {
190       if (list_ptr == inst) {
191          if (prev) {
192             prev->NextReady = inst->NextReady;
193          } else {
194             *list = inst->NextReady;
195          }
196          inst->NextReady = NULL;
197          break;
198       }
199    }
200 }
201 
202 static void
add_inst_to_list(struct schedule_instruction ** list,struct schedule_instruction * inst)203 add_inst_to_list(struct schedule_instruction **list, struct schedule_instruction *inst)
204 {
205    inst->NextReady = *list;
206    *list = inst;
207 }
208 
209 static void
add_inst_to_list_score(struct schedule_instruction ** list,struct schedule_instruction * inst)210 add_inst_to_list_score(struct schedule_instruction **list, struct schedule_instruction *inst)
211 {
212    struct schedule_instruction *temp;
213    struct schedule_instruction *prev;
214    if (!*list) {
215       *list = inst;
216       return;
217    }
218    temp = *list;
219    prev = NULL;
220    while (temp && inst->Score <= temp->Score) {
221       prev = temp;
222       temp = temp->NextReady;
223    }
224 
225    if (!prev) {
226       inst->NextReady = temp;
227       *list = inst;
228    } else {
229       prev->NextReady = inst;
230       inst->NextReady = temp;
231    }
232 }
233 
234 static void
instruction_ready(struct schedule_state * s,struct schedule_instruction * sinst)235 instruction_ready(struct schedule_state *s, struct schedule_instruction *sinst)
236 {
237    DBG("%i is now ready\n", sinst->Instruction->IP);
238 
239    /* Adding Ready TEX instructions to the end of the "Ready List" helps
240     * us emit TEX instructions in blocks without losing our place. */
241    if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
242       add_inst_to_list_score(&s->ReadyTEX, sinst);
243    else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
244       add_inst_to_list_score(&s->ReadyRGB, sinst);
245    else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
246       add_inst_to_list_score(&s->ReadyAlpha, sinst);
247    else
248       add_inst_to_list_score(&s->ReadyFullALU, sinst);
249 }
250 
251 static void
decrease_dependencies(struct schedule_state * s,struct schedule_instruction * sinst)252 decrease_dependencies(struct schedule_state *s, struct schedule_instruction *sinst)
253 {
254    assert(sinst->NumDependencies > 0);
255    sinst->NumDependencies--;
256    if (!sinst->NumDependencies)
257       instruction_ready(s, sinst);
258 }
259 
260 /* These functions provide different heuristics for scheduling instructions.
261  * The default is calc_score_readers. */
262 
263 #if 0
264 
265 static void calc_score_zero(struct schedule_instruction * sinst)
266 {
267 	sinst->Score = 0;
268 }
269 
270 static void calc_score_deps(struct schedule_instruction * sinst)
271 {
272 	int i;
273 	sinst->Score = 0;
274 	for (i = 0; i < sinst->NumWriteValues; i++) {
275 		struct reg_value * v = sinst->WriteValues[i];
276 		if (v->NumReaders) {
277 			struct reg_value_reader * r;
278 			for (r = v->Readers; r; r = r->Next) {
279 				if (r->Reader->NumDependencies == 1) {
280 					sinst->Score += 100;
281 				}
282 				sinst->Score += r->Reader->NumDependencies;
283 			}
284 		}
285 	}
286 }
287 
288 #endif
289 
290 #define NO_OUTPUT_SCORE (1 << 24)
291 
292 static void
score_no_output(struct schedule_instruction * sinst)293 score_no_output(struct schedule_instruction *sinst)
294 {
295    assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
296    if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
297        !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
298       if (sinst->PairedInst) {
299          if (!sinst->PairedInst->Instruction->U.P.RGB.OutputWriteMask &&
300              !sinst->PairedInst->Instruction->U.P.Alpha.OutputWriteMask) {
301             sinst->Score |= NO_OUTPUT_SCORE;
302          }
303 
304       } else {
305          sinst->Score |= NO_OUTPUT_SCORE;
306       }
307    }
308 }
309 
310 #define PAIRED_SCORE (1 << 16)
311 
312 static void
calc_score_r300(struct schedule_instruction * sinst)313 calc_score_r300(struct schedule_instruction *sinst)
314 {
315    unsigned src_idx;
316 
317    if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
318       sinst->Score = 0;
319       return;
320    }
321 
322    score_no_output(sinst);
323 
324    if (sinst->PairedInst) {
325       sinst->Score |= PAIRED_SCORE;
326       return;
327    }
328 
329    for (src_idx = 0; src_idx < 4; src_idx++) {
330       sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
331                       sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
332    }
333 }
334 
335 #define NO_READ_TEX_SCORE (1 << 16)
336 
337 static void
calc_score_readers(struct schedule_instruction * sinst)338 calc_score_readers(struct schedule_instruction *sinst)
339 {
340    if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
341       sinst->Score = 0;
342    } else {
343       sinst->Score = sinst->NumReadValues;
344       if (sinst->PairedInst) {
345          sinst->Score += sinst->PairedInst->NumReadValues;
346       }
347       if (get_tex_read_count(sinst) == 0) {
348          sinst->Score |= NO_READ_TEX_SCORE;
349       }
350       score_no_output(sinst);
351    }
352 }
353 
354 /**
355  * This function decreases the dependencies of the next instruction that
356  * wants to write to each of sinst's read values.
357  */
358 static void
commit_update_reads(struct schedule_state * s,struct schedule_instruction * sinst)359 commit_update_reads(struct schedule_state *s, struct schedule_instruction *sinst)
360 {
361    do {
362       for (unsigned int i = 0; i < sinst->NumReadValues; ++i) {
363          struct reg_value *v = sinst->ReadValues[i];
364          assert(v->NumReaders > 0);
365          v->NumReaders--;
366          if (!v->NumReaders) {
367             if (v->Next) {
368                decrease_dependencies(s, v->Next->Writer);
369             }
370          }
371       }
372    } while ((sinst = sinst->PairedInst));
373 }
374 
375 static void
commit_update_writes(struct schedule_state * s,struct schedule_instruction * sinst)376 commit_update_writes(struct schedule_state *s, struct schedule_instruction *sinst)
377 {
378    do {
379       for (unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
380          struct reg_value *v = sinst->WriteValues[i];
381          if (v->NumReaders) {
382             for (struct reg_value_reader *r = v->Readers; r; r = r->Next) {
383                decrease_dependencies(s, r->Reader);
384             }
385          } else {
386             /* This happens in instruction sequences of the type
387              *  OP r.x, ...;
388              *  OP r.x, r.x, ...;
389              * See also the subtlety in how instructions that both
390              * read and write the same register are scanned.
391              */
392             if (v->Next)
393                decrease_dependencies(s, v->Next->Writer);
394          }
395       }
396    } while ((sinst = sinst->PairedInst));
397 }
398 
399 static void
notify_sem_wait(struct schedule_state * s)400 notify_sem_wait(struct schedule_state *s)
401 {
402    struct rc_list *pend_ptr;
403    for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
404       struct rc_list *read_ptr;
405       struct schedule_instruction *pending = pend_ptr->Item;
406       for (read_ptr = pending->TexReaders; read_ptr; read_ptr = read_ptr->Next) {
407          struct schedule_instruction *reader = read_ptr->Item;
408          reader->TexReadCount--;
409       }
410    }
411    s->PendingTEX = NULL;
412 }
413 
414 static void
commit_alu_instruction(struct schedule_state * s,struct schedule_instruction * sinst)415 commit_alu_instruction(struct schedule_state *s, struct schedule_instruction *sinst)
416 {
417    DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
418 
419    commit_update_reads(s, sinst);
420 
421    commit_update_writes(s, sinst);
422 
423    if (get_tex_read_count(sinst) > 0) {
424       sinst->Instruction->U.P.SemWait = 1;
425       notify_sem_wait(s);
426    }
427 }
428 
429 /**
430  * Emit all ready texture instructions in a single block.
431  *
432  * Emit as a single block to (hopefully) sample many textures in parallel,
433  * and to avoid hardware indirections on R300.
434  */
435 static void
emit_all_tex(struct schedule_state * s,struct rc_instruction * before)436 emit_all_tex(struct schedule_state *s, struct rc_instruction *before)
437 {
438    struct schedule_instruction *readytex;
439    struct rc_instruction *inst_begin;
440 
441    assert(s->ReadyTEX);
442    notify_sem_wait(s);
443 
444    /* Node marker for R300 */
445    inst_begin = rc_insert_new_instruction(s->C, before->Prev);
446    inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
447 
448    /* Link texture instructions back in */
449    readytex = s->ReadyTEX;
450    while (readytex) {
451       rc_insert_instruction(before->Prev, readytex->Instruction);
452       DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
453 
454       /* All of the TEX instructions in the same TEX block have
455        * their source registers read from before any of the
456        * instructions in that block write to their destination
457        * registers.  This means that when we commit a TEX
458        * instruction, any other TEX instruction that wants to write
459        * to one of the committed instruction's source register can be
460        * marked as ready and should be emitted in the same TEX
461        * block. This prevents the following sequence from being
462        * emitted in two different TEX blocks:
463        * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
464        * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
465        */
466       commit_update_reads(s, readytex);
467       readytex = readytex->NextReady;
468    }
469    readytex = s->ReadyTEX;
470    s->ReadyTEX = NULL;
471    while (readytex) {
472       DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
473       commit_update_writes(s, readytex);
474       /* Set semaphore bits for last TEX instruction in the block */
475       if (!readytex->NextReady) {
476          readytex->Instruction->U.I.TexSemAcquire = 1;
477          readytex->Instruction->U.I.TexSemWait = 1;
478       }
479       rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
480       readytex = readytex->NextReady;
481    }
482 }
483 
484 /* This is a helper function for destructive_merge_instructions().  It helps
485  * merge presubtract sources from two instructions and makes sure the
486  * presubtract sources end up in the correct spot.  This function assumes that
487  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
488  * but no scalar instruction (alpha).
489  * @return 0 if merging the presubtract sources fails.
490  * @return 1 if merging the presubtract sources succeeds.
491  */
492 static int
merge_presub_sources(struct rc_pair_instruction * dst_full,struct rc_pair_sub_instruction src,unsigned int type)493 merge_presub_sources(struct rc_pair_instruction *dst_full, struct rc_pair_sub_instruction src,
494                      unsigned int type)
495 {
496    unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
497    struct rc_pair_sub_instruction *dst_sub;
498    const struct rc_opcode_info *info;
499 
500    assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
501 
502    switch (type) {
503    case RC_SOURCE_RGB:
504       is_rgb = 1;
505       is_alpha = 0;
506       dst_sub = &dst_full->RGB;
507       break;
508    case RC_SOURCE_ALPHA:
509       is_rgb = 0;
510       is_alpha = 1;
511       dst_sub = &dst_full->Alpha;
512       break;
513    default:
514       assert(0);
515       return 0;
516    }
517 
518    info = rc_get_opcode_info(dst_full->RGB.Opcode);
519 
520    if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
521       return 0;
522 
523    srcp_regs = rc_presubtract_src_reg_count(src.Src[RC_PAIR_PRESUB_SRC].Index);
524    for (srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
525       unsigned int arg;
526       int free_source;
527       unsigned int one_way = 0;
528       struct rc_pair_instruction_source srcp = src.Src[srcp_src];
529       struct rc_pair_instruction_source temp;
530 
531       free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, srcp.File, srcp.Index);
532 
533       /* If free_source < 0 then there are no free source
534        * slots. */
535       if (free_source < 0)
536          return 0;
537 
538       temp = dst_sub->Src[srcp_src];
539       dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
540 
541       /* srcp needs src0 and src1 to be the same */
542       if (free_source < srcp_src) {
543          if (!temp.Used)
544             continue;
545          free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index);
546          if (free_source < 0)
547             return 0;
548          one_way = 1;
549       } else {
550          dst_sub->Src[free_source] = temp;
551       }
552 
553       /* If free_source == srcp_src, then the presubtract
554        * source is already in the correct place. */
555       if (free_source == srcp_src)
556          continue;
557 
558       /* Shuffle the sources, so we can put the
559        * presubtract source in the correct place. */
560       for (arg = 0; arg < info->NumSrcRegs; arg++) {
561          /* If the arg does read both from rgb and alpha, then we need to rewrite
562           * both sources and the code currently doesn't handle this.
563           * FIXME: This is definitely solvable, however shader-db shows it is
564           * not worth the effort.
565           */
566          if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA &&
567              rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB)
568             return 0;
569 
570          /*If this arg does not read from an rgb source,
571           * do nothing. */
572          if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & type)) {
573             continue;
574          }
575 
576          if (dst_full->RGB.Arg[arg].Source == srcp_src)
577             dst_full->RGB.Arg[arg].Source = free_source;
578          /* We need to do this just in case register
579           * is one of the sources already, but in the
580           * wrong spot. */
581          else if (dst_full->RGB.Arg[arg].Source == free_source && !one_way) {
582             dst_full->RGB.Arg[arg].Source = srcp_src;
583          }
584       }
585    }
586    return 1;
587 }
588 
589 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
590 static int
destructive_merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)591 destructive_merge_instructions(struct rc_pair_instruction *rgb, struct rc_pair_instruction *alpha)
592 {
593    const struct rc_opcode_info *opcode;
594 
595    assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
596    assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
597 
598    /* Presubtract registers need to be merged first so that registers
599     * needed by the presubtract operation can be placed in src0 and/or
600     * src1. */
601 
602    /* Merge the rgb presubtract registers. */
603    if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
604       if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
605          return 0;
606       }
607    }
608    /* Merge the alpha presubtract registers */
609    if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
610       if (!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)) {
611          return 0;
612       }
613    }
614 
615    /* Copy alpha args into rgb */
616    opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
617 
618    for (unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
619       unsigned int srcrgb = 0;
620       unsigned int srcalpha = 0;
621       unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
622       rc_register_file file = 0;
623       unsigned int index = 0;
624       int source;
625 
626       if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
627          srcrgb = 1;
628          file = alpha->RGB.Src[oldsrc].File;
629          index = alpha->RGB.Src[oldsrc].Index;
630       } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
631          srcalpha = 1;
632          file = alpha->Alpha.Src[oldsrc].File;
633          index = alpha->Alpha.Src[oldsrc].Index;
634       }
635 
636       source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
637       if (source < 0)
638          return 0;
639 
640       rgb->Alpha.Arg[arg].Source = source;
641       rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
642       rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
643       rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
644    }
645 
646    /* Copy alpha opcode into rgb */
647    rgb->Alpha.Opcode = alpha->Alpha.Opcode;
648    rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
649    rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
650    rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
651    rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
652    rgb->Alpha.Saturate = alpha->Alpha.Saturate;
653    rgb->Alpha.Omod = alpha->Alpha.Omod;
654 
655    /* Merge ALU result writing */
656    if (alpha->WriteALUResult) {
657       if (rgb->WriteALUResult)
658          return 0;
659 
660       rgb->WriteALUResult = alpha->WriteALUResult;
661       rgb->ALUResultCompare = alpha->ALUResultCompare;
662    }
663 
664    /* Copy SemWait */
665    rgb->SemWait |= alpha->SemWait;
666 
667    return 1;
668 }
669 
670 /**
671  * Try to merge the given instructions into the rgb instructions.
672  *
673  * Return true on success; on failure, return false, and keep
674  * the instructions untouched.
675  */
676 static int
merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)677 merge_instructions(struct rc_pair_instruction *rgb, struct rc_pair_instruction *alpha)
678 {
679    struct rc_pair_instruction backup;
680 
681    /*Instructions can't write output registers and ALU result at the
682     * same time. */
683    if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) ||
684        (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
685       return 0;
686    }
687 
688    /* Writing output registers in the middle of shaders is slow, so
689     * we don't want to pair output writes with temp writes. */
690    if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) ||
691        (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
692       return 0;
693    }
694 
695    memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
696 
697    if (destructive_merge_instructions(rgb, alpha))
698       return 1;
699 
700    memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
701    return 0;
702 }
703 
704 static void
presub_nop(struct rc_instruction * emitted)705 presub_nop(struct rc_instruction *emitted)
706 {
707    int prev_rgb_index, prev_alpha_index, i, num_src;
708 
709    /* We don't need a nop if the previous instruction is a TEX. */
710    if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
711       return;
712    }
713    if (emitted->Prev->U.P.RGB.WriteMask)
714       prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
715    else
716       prev_rgb_index = -1;
717    if (emitted->Prev->U.P.Alpha.WriteMask)
718       prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
719    else
720       prev_alpha_index = 1;
721 
722    /* Check the previous rgb instruction */
723    if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
724       num_src = rc_presubtract_src_reg_count(emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
725       for (i = 0; i < num_src; i++) {
726          unsigned int index = emitted->U.P.RGB.Src[i].Index;
727          if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY &&
728              (index == prev_rgb_index || index == prev_alpha_index)) {
729             emitted->Prev->U.P.Nop = 1;
730             return;
731          }
732       }
733    }
734 
735    /* Check the previous alpha instruction. */
736    if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
737       return;
738 
739    num_src = rc_presubtract_src_reg_count(emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
740    for (i = 0; i < num_src; i++) {
741       unsigned int index = emitted->U.P.Alpha.Src[i].Index;
742       if (emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY &&
743           (index == prev_rgb_index || index == prev_alpha_index)) {
744          emitted->Prev->U.P.Nop = 1;
745          return;
746       }
747    }
748 }
749 
750 static void
rgb_to_alpha_remap(struct schedule_state * s,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,rc_register_file old_file,rc_swizzle old_swz,unsigned int new_index)751 rgb_to_alpha_remap(struct schedule_state *s, struct rc_instruction *inst,
752                    struct rc_pair_instruction_arg *arg, rc_register_file old_file,
753                    rc_swizzle old_swz, unsigned int new_index)
754 {
755    int new_src_index;
756    unsigned int i;
757 
758    for (i = 0; i < 3; i++) {
759       if (get_swz(arg->Swizzle, i) == old_swz) {
760          SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
761       }
762    }
763    new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, old_file, new_index);
764    /* This conversion is not possible, we must have made a mistake in
765     * is_rgb_to_alpha_possible. */
766    if (new_src_index < 0) {
767       rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n");
768       return;
769    }
770 
771    arg->Source = new_src_index;
772 }
773 
774 static int
can_remap(unsigned int opcode)775 can_remap(unsigned int opcode)
776 {
777    switch (opcode) {
778    case RC_OPCODE_DDX:
779    case RC_OPCODE_DDY:
780       return 0;
781    default:
782       return 1;
783    }
784 }
785 
786 static int
can_convert_opcode_to_alpha(unsigned int opcode)787 can_convert_opcode_to_alpha(unsigned int opcode)
788 {
789    switch (opcode) {
790    case RC_OPCODE_DDX:
791    case RC_OPCODE_DDY:
792    case RC_OPCODE_DP2:
793    case RC_OPCODE_DP3:
794    case RC_OPCODE_DP4:
795       return 0;
796    default:
797       return 1;
798    }
799 }
800 
801 static void
is_rgb_to_alpha_possible(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)802 is_rgb_to_alpha_possible(void *userdata, struct rc_instruction *inst,
803                          struct rc_pair_instruction_arg *arg,
804                          struct rc_pair_instruction_source *src)
805 {
806    unsigned int read_chan = RC_SWIZZLE_UNUSED;
807    unsigned int alpha_sources = 0;
808    unsigned int i;
809    struct rc_reader_data *reader_data = userdata;
810 
811    if (!can_remap(inst->U.P.RGB.Opcode) || !can_remap(inst->U.P.Alpha.Opcode)) {
812       reader_data->Abort = 1;
813       return;
814    }
815 
816    if (!src)
817       return;
818 
819    /* XXX There are some cases where we can still do the conversion if
820     * a reader reads from a presubtract source, but for now we'll prevent
821     * it. */
822    if (arg->Source == RC_PAIR_PRESUB_SRC) {
823       reader_data->Abort = 1;
824       return;
825    }
826 
827    /* Make sure the source only reads the register component that we
828     * are going to be converting from.  It is OK if the instruction uses
829     * this component more than once.
830     * XXX If the index we will be converting to is the same as the
831     * current index, then it is OK to read from more than one component.
832     */
833    for (i = 0; i < 3; i++) {
834       rc_swizzle swz = get_swz(arg->Swizzle, i);
835       switch (swz) {
836       case RC_SWIZZLE_X:
837       case RC_SWIZZLE_Y:
838       case RC_SWIZZLE_Z:
839       case RC_SWIZZLE_W:
840          if (read_chan == RC_SWIZZLE_UNUSED) {
841             read_chan = swz;
842          } else if (read_chan != swz) {
843             reader_data->Abort = 1;
844             return;
845          }
846          break;
847       default:
848          break;
849       }
850    }
851 
852    /* Make sure there are enough alpha sources.
853     * XXX If we know what register all the readers are going
854     * to be remapped to, then in some situations we can still do
855     * the substitution, even if all 3 alpha sources are being used.*/
856    for (i = 0; i < 3; i++) {
857       if (inst->U.P.Alpha.Src[i].Used) {
858          alpha_sources++;
859       }
860    }
861    if (alpha_sources > 2) {
862       reader_data->Abort = 1;
863       return;
864    }
865 }
866 
867 static int
convert_rgb_to_alpha(struct schedule_state * s,struct schedule_instruction * sched_inst)868 convert_rgb_to_alpha(struct schedule_state *s, struct schedule_instruction *sched_inst)
869 {
870    struct rc_pair_instruction *pair_inst = &sched_inst->Instruction->U.P;
871    unsigned int old_mask = pair_inst->RGB.WriteMask;
872    unsigned int old_swz = rc_mask_to_swizzle(old_mask);
873    const struct rc_opcode_info *info = rc_get_opcode_info(pair_inst->RGB.Opcode);
874    int new_index = -1;
875    unsigned int i;
876 
877    if (sched_inst->GlobalReaders.Abort)
878       return 0;
879 
880    /* Even though we checked that we can convert to alpha previously, it is
881     * possible that another rgb source of the reader instructions was already
882     * converted to alpha and we thus have no longer free alpha sources.
883     */
884    for (i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
885       struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
886       if (reader.Inst->U.P.Alpha.Src[2].Used)
887          return 0;
888    }
889 
890    if (!pair_inst->RGB.WriteMask)
891       return 0;
892 
893    if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) ||
894        !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
895       return 0;
896    }
897 
898    assert(sched_inst->NumWriteValues == 1);
899 
900    if (!sched_inst->WriteValues[0]) {
901       assert(0);
902       return 0;
903    }
904 
905    /* We start at the old index, because if we can reuse the same
906     * register and just change the swizzle then it is more likely we
907     * will be able to convert all the readers. */
908    for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
909       struct reg_value **new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
910       if (!*new_regvalp) {
911          struct reg_value **old_regvalp = get_reg_valuep(
912             s, RC_FILE_TEMPORARY, pair_inst->RGB.DestIndex, rc_mask_to_swizzle(old_mask));
913          new_index = i;
914          *new_regvalp = *old_regvalp;
915          break;
916       }
917    }
918    if (new_index < 0) {
919       return 0;
920    }
921 
922    /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
923     * as the RGB opcode, then the Alpha instruction will already contain
924     * the correct opcode and instruction args, so we do not want to
925     * overwrite them.
926     */
927    if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
928       pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
929       memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, sizeof(pair_inst->Alpha.Arg));
930    }
931    pair_inst->Alpha.DestIndex = new_index;
932    pair_inst->Alpha.WriteMask = RC_MASK_W;
933    pair_inst->Alpha.Target = pair_inst->RGB.Target;
934    pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
935    pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
936    pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
937    pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
938    /* Move the swizzles into the first chan */
939    for (i = 0; i < info->NumSrcRegs; i++) {
940       unsigned int j;
941       for (j = 0; j < 3; j++) {
942          unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
943          if (swz != RC_SWIZZLE_UNUSED) {
944             pair_inst->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
945             break;
946          }
947       }
948    }
949    pair_inst->RGB.Opcode = RC_OPCODE_NOP;
950    pair_inst->RGB.DestIndex = 0;
951    pair_inst->RGB.WriteMask = 0;
952    pair_inst->RGB.Target = 0;
953    pair_inst->RGB.OutputWriteMask = 0;
954    pair_inst->RGB.DepthWriteMask = 0;
955    pair_inst->RGB.Saturate = 0;
956    memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
957 
958    for (i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
959       struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
960       rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg, RC_FILE_TEMPORARY, old_swz, new_index);
961    }
962    return 1;
963 }
964 
965 static void
try_convert_and_pair(struct schedule_state * s,struct schedule_instruction ** inst_list)966 try_convert_and_pair(struct schedule_state *s, struct schedule_instruction **inst_list)
967 {
968    struct schedule_instruction *list_ptr = *inst_list;
969    while (list_ptr && *inst_list && (*inst_list)->NextReady) {
970       int paired = 0;
971       if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP &&
972           list_ptr->Instruction->U.P.RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
973          goto next;
974       }
975       if (list_ptr->NumWriteValues == 1 && convert_rgb_to_alpha(s, list_ptr)) {
976 
977          struct schedule_instruction *pair_ptr;
978          remove_inst_from_list(inst_list, list_ptr);
979          add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
980 
981          for (pair_ptr = s->ReadyRGB; pair_ptr; pair_ptr = pair_ptr->NextReady) {
982             if (merge_instructions(&pair_ptr->Instruction->U.P, &list_ptr->Instruction->U.P)) {
983                remove_inst_from_list(&s->ReadyAlpha, list_ptr);
984                remove_inst_from_list(&s->ReadyRGB, pair_ptr);
985                pair_ptr->PairedInst = list_ptr;
986 
987                add_inst_to_list(&s->ReadyFullALU, pair_ptr);
988                list_ptr = *inst_list;
989                paired = 1;
990                break;
991             }
992          }
993       }
994       if (!paired) {
995       next:
996          list_ptr = list_ptr->NextReady;
997       }
998    }
999 }
1000 
1001 /**
1002  * This function attempts to merge RGB and Alpha instructions together.
1003  */
1004 static void
pair_instructions(struct schedule_state * s)1005 pair_instructions(struct schedule_state *s)
1006 {
1007    struct schedule_instruction *rgb_ptr;
1008    struct schedule_instruction *alpha_ptr;
1009 
1010    /* Some pairings might fail because they require too
1011     * many source slots; try all possible pairings if necessary */
1012    rgb_ptr = s->ReadyRGB;
1013    while (rgb_ptr) {
1014       struct schedule_instruction *rgb_next = rgb_ptr->NextReady;
1015       alpha_ptr = s->ReadyAlpha;
1016       while (alpha_ptr) {
1017          struct schedule_instruction *alpha_next = alpha_ptr->NextReady;
1018          if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1019             /* Remove RGB and Alpha from their ready lists.
1020              */
1021             remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1022             remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1023             rgb_ptr->PairedInst = alpha_ptr;
1024             add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1025             break;
1026          }
1027          alpha_ptr = alpha_next;
1028       }
1029       rgb_ptr = rgb_next;
1030    }
1031 
1032    if (!s->Opt) {
1033       return;
1034    }
1035 
1036    /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1037     * slot can be converted into Alpha instructions. */
1038    try_convert_and_pair(s, &s->ReadyFullALU);
1039 
1040    /* Try to convert some of the RGB instructions to Alpha and
1041     * try to pair it with another RGB. */
1042    try_convert_and_pair(s, &s->ReadyRGB);
1043 }
1044 
1045 static void
update_max_score(struct schedule_state * s,struct schedule_instruction ** list,int * max_score,struct schedule_instruction ** max_inst_out,struct schedule_instruction *** list_out)1046 update_max_score(struct schedule_state *s, struct schedule_instruction **list, int *max_score,
1047                  struct schedule_instruction **max_inst_out,
1048                  struct schedule_instruction ***list_out)
1049 {
1050    struct schedule_instruction *list_ptr;
1051    for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1052       int score;
1053       s->CalcScore(list_ptr);
1054       score = list_ptr->Score;
1055       if (!*max_inst_out || score > *max_score) {
1056          *max_score = score;
1057          *max_inst_out = list_ptr;
1058          *list_out = list;
1059       }
1060    }
1061 }
1062 
1063 static void
emit_instruction(struct schedule_state * s,struct rc_instruction * before)1064 emit_instruction(struct schedule_state *s, struct rc_instruction *before)
1065 {
1066    int max_score = -1;
1067    struct schedule_instruction *max_inst = NULL;
1068    struct schedule_instruction **max_list = NULL;
1069    unsigned tex_count = 0;
1070    struct schedule_instruction *tex_ptr;
1071 
1072    pair_instructions(s);
1073 #if VERBOSE
1074    fprintf(stderr, "Full:\n");
1075    print_list(s->ReadyFullALU);
1076    fprintf(stderr, "RGB:\n");
1077    print_list(s->ReadyRGB);
1078    fprintf(stderr, "Alpha:\n");
1079    print_list(s->ReadyAlpha);
1080    fprintf(stderr, "TEX:\n");
1081    print_list(s->ReadyTEX);
1082 #endif
1083 
1084    for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1085       /* In general we want to emit KIL ASAP, however KIL does count into
1086        * the indirection limit, so for R300/R400 we only do this if we
1087        * are sure we can fit in there.
1088        */
1089       if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL &&
1090           (s->C->is_r500 || s->totalTEXCount <= 3)) {
1091          emit_all_tex(s, before);
1092          s->PrevBlockHasKil = 1;
1093          return;
1094       }
1095       tex_count++;
1096    }
1097    update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1098    update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1099    update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1100 
1101    if (tex_count >= s->max_tex_group || max_score == -1 ||
1102        (s->TEXCount > 0 && tex_count == s->TEXCount) ||
1103        (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) {
1104       emit_all_tex(s, before);
1105    } else {
1106 
1107       remove_inst_from_list(max_list, max_inst);
1108       rc_insert_instruction(before->Prev, max_inst->Instruction);
1109       commit_alu_instruction(s, max_inst);
1110 
1111       presub_nop(before->Prev);
1112    }
1113 }
1114 
1115 static void
add_tex_reader(struct schedule_state * s,struct schedule_instruction * writer,struct schedule_instruction * reader)1116 add_tex_reader(struct schedule_state *s, struct schedule_instruction *writer,
1117                struct schedule_instruction *reader)
1118 {
1119    if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1120       /*Not a TEX instructions */
1121       return;
1122    }
1123    reader->TexReadCount++;
1124    rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1125 }
1126 
1127 static void
scan_read(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1128 scan_read(void *data, struct rc_instruction *inst, rc_register_file file, unsigned int index,
1129           unsigned int chan)
1130 {
1131    struct schedule_state *s = data;
1132    struct reg_value **v = get_reg_valuep(s, file, index, chan);
1133    struct reg_value_reader *reader;
1134 
1135    if (!v)
1136       return;
1137 
1138    if (*v && (*v)->Writer == s->Current) {
1139       /* The instruction reads and writes to a register component.
1140        * In this case, we only want to increment dependencies by one.
1141        * Why?
1142        * Because each instruction depends on the writers of its source
1143        * registers _and_ the most recent writer of its destination
1144        * register.  In this case, the current instruction (s->Current)
1145        * has a dependency that both writes to one of its source
1146        * registers and was the most recent writer to its destination
1147        * register.  We have already marked this dependency in
1148        * scan_write(), so we don't need to do it again.
1149        */
1150 
1151       /* We need to make sure we are adding s->Current to the
1152        * previous writer's list of TexReaders, if the previous writer
1153        * was a TEX instruction.
1154        */
1155       add_tex_reader(s, s->PrevWriter[chan], s->Current);
1156 
1157       return;
1158    }
1159 
1160    DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1161 
1162    reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1163    reader->Reader = s->Current;
1164    if (!*v) {
1165       /* In this situation, the instruction reads from a register
1166        * that hasn't been written to or read from in the current
1167        * block. */
1168       *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1169       memset(*v, 0, sizeof(struct reg_value));
1170       (*v)->Readers = reader;
1171    } else {
1172       reader->Next = (*v)->Readers;
1173       (*v)->Readers = reader;
1174       /* Only update the current instruction's dependencies if the
1175        * register it reads from has been written to in this block. */
1176       if ((*v)->Writer) {
1177          add_tex_reader(s, (*v)->Writer, s->Current);
1178          s->Current->NumDependencies++;
1179       }
1180    }
1181    (*v)->NumReaders++;
1182 
1183    if (s->Current->NumReadValues >= 12) {
1184       rc_error(s->C, "%s: NumReadValues overflow\n", __func__);
1185    } else {
1186       s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1187    }
1188 }
1189 
1190 static void
scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1191 scan_write(void *data, struct rc_instruction *inst, rc_register_file file, unsigned int index,
1192            unsigned int chan)
1193 {
1194    struct schedule_state *s = data;
1195    struct reg_value **pv = get_reg_valuep(s, file, index, chan);
1196    struct reg_value *newv;
1197 
1198    if (!pv)
1199       return;
1200 
1201    DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1202 
1203    newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1204    memset(newv, 0, sizeof(*newv));
1205 
1206    newv->Writer = s->Current;
1207 
1208    if (*pv) {
1209       (*pv)->Next = newv;
1210       s->Current->NumDependencies++;
1211       /* Keep track of the previous writer to s->Current's destination
1212        * register */
1213       s->PrevWriter[chan] = (*pv)->Writer;
1214    }
1215 
1216    *pv = newv;
1217 
1218    if (s->Current->NumWriteValues >= 4) {
1219       rc_error(s->C, "%s: NumWriteValues overflow\n", __func__);
1220    } else {
1221       s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1222    }
1223 }
1224 
1225 static void
is_rgb_to_alpha_possible_normal(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)1226 is_rgb_to_alpha_possible_normal(void *userdata, struct rc_instruction *inst,
1227                                 struct rc_src_register *src)
1228 {
1229    struct rc_reader_data *reader_data = userdata;
1230    reader_data->Abort = 1;
1231 }
1232 
1233 static void
schedule_block(struct schedule_state * s,struct rc_instruction * begin,struct rc_instruction * end)1234 schedule_block(struct schedule_state *s, struct rc_instruction *begin, struct rc_instruction *end)
1235 {
1236    unsigned int ip;
1237 
1238    /* Scan instructions for data dependencies */
1239    ip = 0;
1240    for (struct rc_instruction *inst = begin; inst != end; inst = inst->Next) {
1241       s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1242       memset(s->Current, 0, sizeof(struct schedule_instruction));
1243 
1244       if (inst->Type == RC_INSTRUCTION_NORMAL) {
1245          const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
1246          if (info->HasTexture) {
1247             s->TEXCount++;
1248          }
1249       }
1250 
1251       /* XXX: This causes SemWait to be set for all instructions in
1252        * a block if the previous block contained a TEX instruction.
1253        * We can do better here, but it will take a lot of work. */
1254       if (s->PrevBlockHasTex) {
1255          s->Current->TexReadCount = 1;
1256       }
1257 
1258       s->Current->Instruction = inst;
1259       inst->IP = ip++;
1260 
1261       DBG("%i: Scanning\n", inst->IP);
1262 
1263       /* The order of things here is subtle and maybe slightly
1264        * counter-intuitive, to account for the case where an
1265        * instruction writes to the same register as it reads
1266        * from. */
1267       rc_for_all_writes_chan(inst, &scan_write, s);
1268       rc_for_all_reads_chan(inst, &scan_read, s);
1269 
1270       DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1271 
1272       if (!s->Current->NumDependencies) {
1273          instruction_ready(s, s->Current);
1274       }
1275 
1276       /* Get global readers for possible RGB->Alpha conversion. */
1277       s->Current->GlobalReaders.ExitOnAbort = 1;
1278       rc_get_readers(s->C, inst, &s->Current->GlobalReaders, is_rgb_to_alpha_possible_normal,
1279                      is_rgb_to_alpha_possible, NULL);
1280    }
1281 
1282    /* Temporarily unlink all instructions */
1283    begin->Prev->Next = end;
1284    end->Prev = begin->Prev;
1285 
1286    /* Schedule instructions back */
1287    while (!s->C->Error && (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1288       emit_instruction(s, end);
1289    }
1290 }
1291 
1292 static int
is_controlflow(struct rc_instruction * inst)1293 is_controlflow(struct rc_instruction *inst)
1294 {
1295    if (inst->Type == RC_INSTRUCTION_NORMAL) {
1296       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
1297       return opcode->IsFlowControl;
1298    }
1299    return 0;
1300 }
1301 
1302 void
rc_pair_schedule(struct radeon_compiler * cc,void * user)1303 rc_pair_schedule(struct radeon_compiler *cc, void *user)
1304 {
1305    struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
1306    struct schedule_state s;
1307    struct rc_instruction *inst = c->Base.Program.Instructions.Next;
1308    unsigned int *opt = user;
1309 
1310    memset(&s, 0, sizeof(s));
1311    s.Opt = *opt;
1312    s.C = &c->Base;
1313    if (s.C->is_r500) {
1314       s.CalcScore = calc_score_readers;
1315    } else {
1316       s.CalcScore = calc_score_r300;
1317    }
1318    s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1319 
1320    /* First go over and count all TEX. */
1321    while (inst != &c->Base.Program.Instructions) {
1322       if (inst->Type == RC_INSTRUCTION_NORMAL) {
1323          const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
1324          if (info->HasTexture) {
1325             s.totalTEXCount++;
1326          }
1327       }
1328       inst = inst->Next;
1329    }
1330 
1331    inst = c->Base.Program.Instructions.Next;
1332    while (inst != &c->Base.Program.Instructions) {
1333       struct rc_instruction *first;
1334 
1335       if (is_controlflow(inst)) {
1336          /* The TexSemWait flag is already properly set for ALU
1337           * instructions using the results of normal TEX lookup,
1338           * however it was found empirically that TEXKIL also needs
1339           * synchronization with the control flow. This might not be optimal,
1340           * however the docs don't offer any guidance in this matter.
1341           */
1342          if (s.PrevBlockHasKil) {
1343             inst->U.I.TexSemWait = 1;
1344             s.PrevBlockHasKil = 0;
1345          }
1346          inst = inst->Next;
1347          continue;
1348       }
1349 
1350       first = inst;
1351 
1352       while (inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1353          inst = inst->Next;
1354 
1355       DBG("Schedule one block\n");
1356       memset(s.Temporary, 0, sizeof(s.Temporary));
1357       s.TEXCount = 0;
1358       schedule_block(&s, first, inst);
1359       if (s.PendingTEX) {
1360          s.PrevBlockHasTex = 1;
1361       }
1362    }
1363 }
1364