1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35 #include "radeon_list.h"
36 #include "radeon_variable.h"
37
38 #include "util/u_debug.h"
39
40 #define VERBOSE 0
41
42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43
44 struct schedule_instruction {
45 struct rc_instruction * Instruction;
46
47 /** Next instruction in the linked list of ready instructions. */
48 struct schedule_instruction *NextReady;
49
50 /** Values that this instruction reads and writes */
51 struct reg_value * WriteValues[4];
52 struct reg_value * ReadValues[12];
53 unsigned int NumWriteValues:3;
54 unsigned int NumReadValues:4;
55
56 /**
57 * Number of (read and write) dependencies that must be resolved before
58 * this instruction can be scheduled.
59 */
60 unsigned int NumDependencies:5;
61
62 /** List of all readers (see rc_get_readers() for the definition of
63 * "all readers"), even those outside the basic block this instruction
64 * lives in. */
65 struct rc_reader_data GlobalReaders;
66
67 /** If the scheduler has paired an RGB and an Alpha instruction together,
68 * PairedInst references the alpha instruction's dependency information.
69 */
70 struct schedule_instruction * PairedInst;
71
72 /** This scheduler uses the value of Score to determine which
73 * instruction to schedule. Instructions with a higher value of Score
74 * will be scheduled first. */
75 int Score;
76
77 /** The number of components that read from a TEX instruction. */
78 unsigned TexReadCount;
79
80 /** For TEX instructions a list of readers */
81 struct rc_list * TexReaders;
82 };
83
84
85 /**
86 * Used to keep track of which instructions read a value.
87 */
88 struct reg_value_reader {
89 struct schedule_instruction *Reader;
90 struct reg_value_reader *Next;
91 };
92
93 /**
94 * Used to keep track which values are stored in each component of a
95 * RC_FILE_TEMPORARY.
96 */
97 struct reg_value {
98 struct schedule_instruction * Writer;
99
100 /**
101 * Unordered linked list of instructions that read from this value.
102 * When this value becomes available, we increase all readers'
103 * dependency count.
104 */
105 struct reg_value_reader *Readers;
106
107 /**
108 * Number of readers of this value. This is decremented each time
109 * a reader of the value is committed.
110 * When the reader count reaches zero, the dependency count
111 * of the instruction writing \ref Next is decremented.
112 */
113 unsigned int NumReaders;
114
115 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116 };
117
118 struct register_state {
119 struct reg_value * Values[4];
120 };
121
122 struct remap_reg {
123 struct rc_instruction * Inst;
124 unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125 unsigned int OldSwizzle:3;
126 unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127 unsigned int NewSwizzle:3;
128 unsigned int OnlyTexReads:1;
129 struct remap_reg * Next;
130 };
131
132 struct schedule_state {
133 struct radeon_compiler * C;
134 struct schedule_instruction * Current;
135 /** Array of the previous writers of Current's destination register
136 * indexed by channel. */
137 struct schedule_instruction * PrevWriter[4];
138
139 struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140
141 /**
142 * Linked lists of instructions that can be scheduled right now,
143 * based on which ALU/TEX resources they require.
144 */
145 /*@{*/
146 struct schedule_instruction *ReadyFullALU;
147 struct schedule_instruction *ReadyRGB;
148 struct schedule_instruction *ReadyAlpha;
149 struct schedule_instruction *ReadyTEX;
150 /*@}*/
151 struct rc_list *PendingTEX;
152
153 void (*CalcScore)(struct schedule_instruction *);
154 long max_tex_group;
155 unsigned PrevBlockHasTex:1;
156 unsigned TEXCount;
157 unsigned Opt:1;
158 };
159
get_reg_valuep(struct schedule_state * s,rc_register_file file,unsigned int index,unsigned int chan)160 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
161 rc_register_file file, unsigned int index, unsigned int chan)
162 {
163 if (file != RC_FILE_TEMPORARY)
164 return NULL;
165
166 if (index >= RC_REGISTER_MAX_INDEX) {
167 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
168 return NULL;
169 }
170
171 return &s->Temporary[index].Values[chan];
172 }
173
get_tex_read_count(struct schedule_instruction * sinst)174 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
175 {
176 unsigned tex_read_count = sinst->TexReadCount;
177 if (sinst->PairedInst) {
178 tex_read_count += sinst->PairedInst->TexReadCount;
179 }
180 return tex_read_count;
181 }
182
183 #if VERBOSE
print_list(struct schedule_instruction * sinst)184 static void print_list(struct schedule_instruction * sinst)
185 {
186 struct schedule_instruction * ptr;
187 for (ptr = sinst; ptr; ptr=ptr->NextReady) {
188 unsigned tex_read_count = get_tex_read_count(ptr);
189 unsigned score = sinst->Score;
190 fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
191 tex_read_count);
192 }
193 fprintf(stderr, "\n");
194 }
195 #endif
196
remove_inst_from_list(struct schedule_instruction ** list,struct schedule_instruction * inst)197 static void remove_inst_from_list(struct schedule_instruction ** list,
198 struct schedule_instruction * inst)
199 {
200 struct schedule_instruction * prev = NULL;
201 struct schedule_instruction * list_ptr;
202 for (list_ptr = *list; list_ptr; prev = list_ptr,
203 list_ptr = list_ptr->NextReady) {
204 if (list_ptr == inst) {
205 if (prev) {
206 prev->NextReady = inst->NextReady;
207 } else {
208 *list = inst->NextReady;
209 }
210 inst->NextReady = NULL;
211 break;
212 }
213 }
214 }
215
add_inst_to_list(struct schedule_instruction ** list,struct schedule_instruction * inst)216 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
217 {
218 inst->NextReady = *list;
219 *list = inst;
220 }
221
add_inst_to_list_score(struct schedule_instruction ** list,struct schedule_instruction * inst)222 static void add_inst_to_list_score(struct schedule_instruction ** list,
223 struct schedule_instruction * inst)
224 {
225 struct schedule_instruction * temp;
226 struct schedule_instruction * prev;
227 if (!*list) {
228 *list = inst;
229 return;
230 }
231 temp = *list;
232 prev = NULL;
233 while(temp && inst->Score <= temp->Score) {
234 prev = temp;
235 temp = temp->NextReady;
236 }
237
238 if (!prev) {
239 inst->NextReady = temp;
240 *list = inst;
241 } else {
242 prev->NextReady = inst;
243 inst->NextReady = temp;
244 }
245 }
246
instruction_ready(struct schedule_state * s,struct schedule_instruction * sinst)247 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
248 {
249 DBG("%i is now ready\n", sinst->Instruction->IP);
250
251 /* Adding Ready TEX instructions to the end of the "Ready List" helps
252 * us emit TEX instructions in blocks without losing our place. */
253 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
254 add_inst_to_list_score(&s->ReadyTEX, sinst);
255 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
256 add_inst_to_list_score(&s->ReadyRGB, sinst);
257 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
258 add_inst_to_list_score(&s->ReadyAlpha, sinst);
259 else
260 add_inst_to_list_score(&s->ReadyFullALU, sinst);
261 }
262
decrease_dependencies(struct schedule_state * s,struct schedule_instruction * sinst)263 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
264 {
265 assert(sinst->NumDependencies > 0);
266 sinst->NumDependencies--;
267 if (!sinst->NumDependencies)
268 instruction_ready(s, sinst);
269 }
270
271 /* These functions provide different heuristics for scheduling instructions.
272 * The default is calc_score_readers. */
273
274 #if 0
275
276 static void calc_score_zero(struct schedule_instruction * sinst)
277 {
278 sinst->Score = 0;
279 }
280
281 static void calc_score_deps(struct schedule_instruction * sinst)
282 {
283 int i;
284 sinst->Score = 0;
285 for (i = 0; i < sinst->NumWriteValues; i++) {
286 struct reg_value * v = sinst->WriteValues[i];
287 if (v->NumReaders) {
288 struct reg_value_reader * r;
289 for (r = v->Readers; r; r = r->Next) {
290 if (r->Reader->NumDependencies == 1) {
291 sinst->Score += 100;
292 }
293 sinst->Score += r->Reader->NumDependencies;
294 }
295 }
296 }
297 }
298
299 #endif
300
301 #define NO_OUTPUT_SCORE (1 << 24)
302
score_no_output(struct schedule_instruction * sinst)303 static void score_no_output(struct schedule_instruction * sinst)
304 {
305 assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
306 if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
307 !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
308 if (sinst->PairedInst) {
309 if (!sinst->PairedInst->Instruction->U.P.
310 RGB.OutputWriteMask
311 && !sinst->PairedInst->Instruction->U.P.
312 Alpha.OutputWriteMask) {
313 sinst->Score |= NO_OUTPUT_SCORE;
314 }
315
316 } else {
317 sinst->Score |= NO_OUTPUT_SCORE;
318 }
319 }
320 }
321
322 #define PAIRED_SCORE (1 << 16)
323
calc_score_r300(struct schedule_instruction * sinst)324 static void calc_score_r300(struct schedule_instruction * sinst)
325 {
326 unsigned src_idx;
327
328 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
329 sinst->Score = 0;
330 return;
331 }
332
333 score_no_output(sinst);
334
335 if (sinst->PairedInst) {
336 sinst->Score |= PAIRED_SCORE;
337 return;
338 }
339
340 for (src_idx = 0; src_idx < 4; src_idx++) {
341 sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
342 sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
343 }
344 }
345
346 #define NO_READ_TEX_SCORE (1 << 16)
347
calc_score_readers(struct schedule_instruction * sinst)348 static void calc_score_readers(struct schedule_instruction * sinst)
349 {
350 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
351 sinst->Score = 0;
352 } else {
353 sinst->Score = sinst->NumReadValues;
354 if (sinst->PairedInst) {
355 sinst->Score += sinst->PairedInst->NumReadValues;
356 }
357 if (get_tex_read_count(sinst) == 0) {
358 sinst->Score |= NO_READ_TEX_SCORE;
359 }
360 score_no_output(sinst);
361 }
362 }
363
364 /**
365 * This function decreases the dependencies of the next instruction that
366 * wants to write to each of sinst's read values.
367 */
commit_update_reads(struct schedule_state * s,struct schedule_instruction * sinst)368 static void commit_update_reads(struct schedule_state * s,
369 struct schedule_instruction * sinst){
370 do {
371 for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
372 struct reg_value * v = sinst->ReadValues[i];
373 assert(v->NumReaders > 0);
374 v->NumReaders--;
375 if (!v->NumReaders) {
376 if (v->Next) {
377 decrease_dependencies(s, v->Next->Writer);
378 }
379 }
380 }
381 } while ((sinst = sinst->PairedInst));
382 }
383
commit_update_writes(struct schedule_state * s,struct schedule_instruction * sinst)384 static void commit_update_writes(struct schedule_state * s,
385 struct schedule_instruction * sinst){
386 do {
387 for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
388 struct reg_value * v = sinst->WriteValues[i];
389 if (v->NumReaders) {
390 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
391 decrease_dependencies(s, r->Reader);
392 }
393 } else {
394 /* This happens in instruction sequences of the type
395 * OP r.x, ...;
396 * OP r.x, r.x, ...;
397 * See also the subtlety in how instructions that both
398 * read and write the same register are scanned.
399 */
400 if (v->Next)
401 decrease_dependencies(s, v->Next->Writer);
402 }
403 }
404 } while ((sinst = sinst->PairedInst));
405 }
406
notify_sem_wait(struct schedule_state * s)407 static void notify_sem_wait(struct schedule_state *s)
408 {
409 struct rc_list * pend_ptr;
410 for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
411 struct rc_list * read_ptr;
412 struct schedule_instruction * pending = pend_ptr->Item;
413 for (read_ptr = pending->TexReaders; read_ptr;
414 read_ptr = read_ptr->Next) {
415 struct schedule_instruction * reader = read_ptr->Item;
416 reader->TexReadCount--;
417 }
418 }
419 s->PendingTEX = NULL;
420 }
421
commit_alu_instruction(struct schedule_state * s,struct schedule_instruction * sinst)422 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
423 {
424 DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
425
426 commit_update_reads(s, sinst);
427
428 commit_update_writes(s, sinst);
429
430 if (get_tex_read_count(sinst) > 0) {
431 sinst->Instruction->U.P.SemWait = 1;
432 notify_sem_wait(s);
433 }
434 }
435
436 /**
437 * Emit all ready texture instructions in a single block.
438 *
439 * Emit as a single block to (hopefully) sample many textures in parallel,
440 * and to avoid hardware indirections on R300.
441 */
emit_all_tex(struct schedule_state * s,struct rc_instruction * before)442 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
443 {
444 struct schedule_instruction *readytex;
445 struct rc_instruction * inst_begin;
446
447 assert(s->ReadyTEX);
448 notify_sem_wait(s);
449
450 /* Node marker for R300 */
451 inst_begin = rc_insert_new_instruction(s->C, before->Prev);
452 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
453
454 /* Link texture instructions back in */
455 readytex = s->ReadyTEX;
456 while(readytex) {
457 rc_insert_instruction(before->Prev, readytex->Instruction);
458 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
459
460 /* All of the TEX instructions in the same TEX block have
461 * their source registers read from before any of the
462 * instructions in that block write to their destination
463 * registers. This means that when we commit a TEX
464 * instruction, any other TEX instruction that wants to write
465 * to one of the committed instruction's source register can be
466 * marked as ready and should be emitted in the same TEX
467 * block. This prevents the following sequence from being
468 * emitted in two different TEX blocks:
469 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
470 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
471 */
472 commit_update_reads(s, readytex);
473 readytex = readytex->NextReady;
474 }
475 readytex = s->ReadyTEX;
476 s->ReadyTEX = NULL;
477 while(readytex){
478 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
479 commit_update_writes(s, readytex);
480 /* Set semaphore bits for last TEX instruction in the block */
481 if (!readytex->NextReady) {
482 readytex->Instruction->U.I.TexSemAcquire = 1;
483 readytex->Instruction->U.I.TexSemWait = 1;
484 }
485 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
486 readytex = readytex->NextReady;
487 }
488 }
489
490 /* This is a helper function for destructive_merge_instructions(). It helps
491 * merge presubtract sources from two instructions and makes sure the
492 * presubtract sources end up in the correct spot. This function assumes that
493 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
494 * but no scalar instruction (alpha).
495 * @return 0 if merging the presubtract sources fails.
496 * @retrun 1 if merging the presubtract sources succeeds.
497 */
merge_presub_sources(struct rc_pair_instruction * dst_full,struct rc_pair_sub_instruction src,unsigned int type)498 static int merge_presub_sources(
499 struct rc_pair_instruction * dst_full,
500 struct rc_pair_sub_instruction src,
501 unsigned int type)
502 {
503 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
504 struct rc_pair_sub_instruction * dst_sub;
505 const struct rc_opcode_info * info;
506
507 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
508
509 switch(type) {
510 case RC_SOURCE_RGB:
511 is_rgb = 1;
512 is_alpha = 0;
513 dst_sub = &dst_full->RGB;
514 break;
515 case RC_SOURCE_ALPHA:
516 is_rgb = 0;
517 is_alpha = 1;
518 dst_sub = &dst_full->Alpha;
519 break;
520 default:
521 assert(0);
522 return 0;
523 }
524
525 info = rc_get_opcode_info(dst_full->RGB.Opcode);
526
527 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
528 return 0;
529
530 srcp_regs = rc_presubtract_src_reg_count(
531 src.Src[RC_PAIR_PRESUB_SRC].Index);
532 for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
533 unsigned int arg;
534 int free_source;
535 unsigned int one_way = 0;
536 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
537 struct rc_pair_instruction_source temp;
538
539 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
540 srcp.File, srcp.Index);
541
542 /* If free_source < 0 then there are no free source
543 * slots. */
544 if (free_source < 0)
545 return 0;
546
547 temp = dst_sub->Src[srcp_src];
548 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
549
550 /* srcp needs src0 and src1 to be the same */
551 if (free_source < srcp_src) {
552 if (!temp.Used)
553 continue;
554 free_source = rc_pair_alloc_source(dst_full, is_rgb,
555 is_alpha, temp.File, temp.Index);
556 if (free_source < 0)
557 return 0;
558 one_way = 1;
559 } else {
560 dst_sub->Src[free_source] = temp;
561 }
562
563 /* If free_source == srcp_src, then the presubtract
564 * source is already in the correct place. */
565 if (free_source == srcp_src)
566 continue;
567
568 /* Shuffle the sources, so we can put the
569 * presubtract source in the correct place. */
570 for(arg = 0; arg < info->NumSrcRegs; arg++) {
571 /*If this arg does not read from an rgb source,
572 * do nothing. */
573 if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
574 & type)) {
575 continue;
576 }
577
578 if (dst_full->RGB.Arg[arg].Source == srcp_src)
579 dst_full->RGB.Arg[arg].Source = free_source;
580 /* We need to do this just in case register
581 * is one of the sources already, but in the
582 * wrong spot. */
583 else if(dst_full->RGB.Arg[arg].Source == free_source
584 && !one_way) {
585 dst_full->RGB.Arg[arg].Source = srcp_src;
586 }
587 }
588 }
589 return 1;
590 }
591
592
593 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
destructive_merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)594 static int destructive_merge_instructions(
595 struct rc_pair_instruction * rgb,
596 struct rc_pair_instruction * alpha)
597 {
598 const struct rc_opcode_info * opcode;
599
600 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
601 assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
602
603 /* Presubtract registers need to be merged first so that registers
604 * needed by the presubtract operation can be placed in src0 and/or
605 * src1. */
606
607 /* Merge the rgb presubtract registers. */
608 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
609 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
610 return 0;
611 }
612 }
613 /* Merge the alpha presubtract registers */
614 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
615 if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
616 return 0;
617 }
618 }
619
620 /* Copy alpha args into rgb */
621 opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
622
623 for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
624 unsigned int srcrgb = 0;
625 unsigned int srcalpha = 0;
626 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
627 rc_register_file file = 0;
628 unsigned int index = 0;
629 int source;
630
631 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
632 srcrgb = 1;
633 file = alpha->RGB.Src[oldsrc].File;
634 index = alpha->RGB.Src[oldsrc].Index;
635 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
636 srcalpha = 1;
637 file = alpha->Alpha.Src[oldsrc].File;
638 index = alpha->Alpha.Src[oldsrc].Index;
639 }
640
641 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
642 if (source < 0)
643 return 0;
644
645 rgb->Alpha.Arg[arg].Source = source;
646 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
647 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
648 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
649 }
650
651 /* Copy alpha opcode into rgb */
652 rgb->Alpha.Opcode = alpha->Alpha.Opcode;
653 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
654 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
655 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
656 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
657 rgb->Alpha.Saturate = alpha->Alpha.Saturate;
658 rgb->Alpha.Omod = alpha->Alpha.Omod;
659
660 /* Merge ALU result writing */
661 if (alpha->WriteALUResult) {
662 if (rgb->WriteALUResult)
663 return 0;
664
665 rgb->WriteALUResult = alpha->WriteALUResult;
666 rgb->ALUResultCompare = alpha->ALUResultCompare;
667 }
668
669 /* Copy SemWait */
670 rgb->SemWait |= alpha->SemWait;
671
672 return 1;
673 }
674
675 /**
676 * Try to merge the given instructions into the rgb instructions.
677 *
678 * Return true on success; on failure, return false, and keep
679 * the instructions untouched.
680 */
merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)681 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
682 {
683 struct rc_pair_instruction backup;
684
685 /*Instructions can't write output registers and ALU result at the
686 * same time. */
687 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
688 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
689 return 0;
690 }
691
692 /* Writing output registers in the middle of shaders is slow, so
693 * we don't want to pair output writes with temp writes. */
694 if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
695 || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
696 return 0;
697 }
698
699 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
700
701 if (destructive_merge_instructions(rgb, alpha))
702 return 1;
703
704 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
705 return 0;
706 }
707
presub_nop(struct rc_instruction * emitted)708 static void presub_nop(struct rc_instruction * emitted) {
709 int prev_rgb_index, prev_alpha_index, i, num_src;
710
711 /* We don't need a nop if the previous instruction is a TEX. */
712 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
713 return;
714 }
715 if (emitted->Prev->U.P.RGB.WriteMask)
716 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
717 else
718 prev_rgb_index = -1;
719 if (emitted->Prev->U.P.Alpha.WriteMask)
720 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
721 else
722 prev_alpha_index = 1;
723
724 /* Check the previous rgb instruction */
725 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
726 num_src = rc_presubtract_src_reg_count(
727 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
728 for (i = 0; i < num_src; i++) {
729 unsigned int index = emitted->U.P.RGB.Src[i].Index;
730 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
731 && (index == prev_rgb_index
732 || index == prev_alpha_index)) {
733 emitted->Prev->U.P.Nop = 1;
734 return;
735 }
736 }
737 }
738
739 /* Check the previous alpha instruction. */
740 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
741 return;
742
743 num_src = rc_presubtract_src_reg_count(
744 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
745 for (i = 0; i < num_src; i++) {
746 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
747 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
748 && (index == prev_rgb_index || index == prev_alpha_index)) {
749 emitted->Prev->U.P.Nop = 1;
750 return;
751 }
752 }
753 }
754
rgb_to_alpha_remap(struct schedule_state * s,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,rc_register_file old_file,rc_swizzle old_swz,unsigned int new_index)755 static void rgb_to_alpha_remap (
756 struct schedule_state * s,
757 struct rc_instruction * inst,
758 struct rc_pair_instruction_arg * arg,
759 rc_register_file old_file,
760 rc_swizzle old_swz,
761 unsigned int new_index)
762 {
763 int new_src_index;
764 unsigned int i;
765
766 for (i = 0; i < 3; i++) {
767 if (get_swz(arg->Swizzle, i) == old_swz) {
768 SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
769 }
770 }
771 new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
772 old_file, new_index);
773 /* This conversion is not possible, we must have made a mistake in
774 * is_rgb_to_alpha_possible. */
775 if (new_src_index < 0) {
776 rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n");
777 return;
778 }
779
780 arg->Source = new_src_index;
781 }
782
can_remap(unsigned int opcode)783 static int can_remap(unsigned int opcode)
784 {
785 switch(opcode) {
786 case RC_OPCODE_DDX:
787 case RC_OPCODE_DDY:
788 return 0;
789 default:
790 return 1;
791 }
792 }
793
can_convert_opcode_to_alpha(unsigned int opcode)794 static int can_convert_opcode_to_alpha(unsigned int opcode)
795 {
796 switch(opcode) {
797 case RC_OPCODE_DDX:
798 case RC_OPCODE_DDY:
799 case RC_OPCODE_DP2:
800 case RC_OPCODE_DP3:
801 case RC_OPCODE_DP4:
802 return 0;
803 default:
804 return 1;
805 }
806 }
807
is_rgb_to_alpha_possible(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)808 static void is_rgb_to_alpha_possible(
809 void * userdata,
810 struct rc_instruction * inst,
811 struct rc_pair_instruction_arg * arg,
812 struct rc_pair_instruction_source * src)
813 {
814 unsigned int read_chan = RC_SWIZZLE_UNUSED;
815 unsigned int alpha_sources = 0;
816 unsigned int i;
817 struct rc_reader_data * reader_data = userdata;
818
819 if (!can_remap(inst->U.P.RGB.Opcode)
820 || !can_remap(inst->U.P.Alpha.Opcode)) {
821 reader_data->Abort = 1;
822 return;
823 }
824
825 if (!src)
826 return;
827
828 /* XXX There are some cases where we can still do the conversion if
829 * a reader reads from a presubtract source, but for now we'll prevent
830 * it. */
831 if (arg->Source == RC_PAIR_PRESUB_SRC) {
832 reader_data->Abort = 1;
833 return;
834 }
835
836 /* Make sure the source only reads the register component that we
837 * are going to be convering from. It is OK if the instruction uses
838 * this component more than once.
839 * XXX If the index we will be converting to is the same as the
840 * current index, then it is OK to read from more than one component.
841 */
842 for (i = 0; i < 3; i++) {
843 rc_swizzle swz = get_swz(arg->Swizzle, i);
844 switch(swz) {
845 case RC_SWIZZLE_X:
846 case RC_SWIZZLE_Y:
847 case RC_SWIZZLE_Z:
848 case RC_SWIZZLE_W:
849 if (read_chan == RC_SWIZZLE_UNUSED) {
850 read_chan = swz;
851 } else if (read_chan != swz) {
852 reader_data->Abort = 1;
853 return;
854 }
855 break;
856 default:
857 break;
858 }
859 }
860
861 /* Make sure there are enough alpha sources.
862 * XXX If we know what register all the readers are going
863 * to be remapped to, then in some situations we can still do
864 * the substitution, even if all 3 alpha sources are being used.*/
865 for (i = 0; i < 3; i++) {
866 if (inst->U.P.Alpha.Src[i].Used) {
867 alpha_sources++;
868 }
869 }
870 if (alpha_sources > 2) {
871 reader_data->Abort = 1;
872 return;
873 }
874 }
875
convert_rgb_to_alpha(struct schedule_state * s,struct schedule_instruction * sched_inst)876 static int convert_rgb_to_alpha(
877 struct schedule_state * s,
878 struct schedule_instruction * sched_inst)
879 {
880 struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
881 unsigned int old_mask = pair_inst->RGB.WriteMask;
882 unsigned int old_swz = rc_mask_to_swizzle(old_mask);
883 const struct rc_opcode_info * info =
884 rc_get_opcode_info(pair_inst->RGB.Opcode);
885 int new_index = -1;
886 unsigned int i;
887
888 if (sched_inst->GlobalReaders.Abort)
889 return 0;
890
891 if (!pair_inst->RGB.WriteMask)
892 return 0;
893
894 if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
895 || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
896 return 0;
897 }
898
899 assert(sched_inst->NumWriteValues == 1);
900
901 if (!sched_inst->WriteValues[0]) {
902 assert(0);
903 return 0;
904 }
905
906 /* We start at the old index, because if we can reuse the same
907 * register and just change the swizzle then it is more likely we
908 * will be able to convert all the readers. */
909 for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
910 struct reg_value ** new_regvalp = get_reg_valuep(
911 s, RC_FILE_TEMPORARY, i, 3);
912 if (!*new_regvalp) {
913 struct reg_value ** old_regvalp =
914 get_reg_valuep(s,
915 RC_FILE_TEMPORARY,
916 pair_inst->RGB.DestIndex,
917 rc_mask_to_swizzle(old_mask));
918 new_index = i;
919 *new_regvalp = *old_regvalp;
920 *old_regvalp = NULL;
921 new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
922 break;
923 }
924 }
925 if (new_index < 0) {
926 return 0;
927 }
928
929 /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
930 * as the RGB opcode, then the Alpha instruction will already contain
931 * the correct opcode and instruction args, so we do not want to
932 * overwrite them.
933 */
934 if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
935 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
936 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
937 sizeof(pair_inst->Alpha.Arg));
938 }
939 pair_inst->Alpha.DestIndex = new_index;
940 pair_inst->Alpha.WriteMask = RC_MASK_W;
941 pair_inst->Alpha.Target = pair_inst->RGB.Target;
942 pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
943 pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
944 pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
945 pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
946 /* Move the swizzles into the first chan */
947 for (i = 0; i < info->NumSrcRegs; i++) {
948 unsigned int j;
949 for (j = 0; j < 3; j++) {
950 unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
951 if (swz != RC_SWIZZLE_UNUSED) {
952 pair_inst->Alpha.Arg[i].Swizzle =
953 rc_init_swizzle(swz, 1);
954 break;
955 }
956 }
957 }
958 pair_inst->RGB.Opcode = RC_OPCODE_NOP;
959 pair_inst->RGB.DestIndex = 0;
960 pair_inst->RGB.WriteMask = 0;
961 pair_inst->RGB.Target = 0;
962 pair_inst->RGB.OutputWriteMask = 0;
963 pair_inst->RGB.DepthWriteMask = 0;
964 pair_inst->RGB.Saturate = 0;
965 memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
966
967 for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
968 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
969 rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg,
970 RC_FILE_TEMPORARY, old_swz, new_index);
971 }
972 return 1;
973 }
974
try_convert_and_pair(struct schedule_state * s,struct schedule_instruction ** inst_list)975 static void try_convert_and_pair(
976 struct schedule_state *s,
977 struct schedule_instruction ** inst_list)
978 {
979 struct schedule_instruction * list_ptr = *inst_list;
980 while (list_ptr && *inst_list && (*inst_list)->NextReady) {
981 int paired = 0;
982 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
983 && list_ptr->Instruction->U.P.RGB.Opcode
984 != RC_OPCODE_REPL_ALPHA) {
985 goto next;
986 }
987 if (list_ptr->NumWriteValues == 1
988 && convert_rgb_to_alpha(s, list_ptr)) {
989
990 struct schedule_instruction * pair_ptr;
991 remove_inst_from_list(inst_list, list_ptr);
992 add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
993
994 for (pair_ptr = s->ReadyRGB; pair_ptr;
995 pair_ptr = pair_ptr->NextReady) {
996 if (merge_instructions(&pair_ptr->Instruction->U.P,
997 &list_ptr->Instruction->U.P)) {
998 remove_inst_from_list(&s->ReadyAlpha, list_ptr);
999 remove_inst_from_list(&s->ReadyRGB, pair_ptr);
1000 pair_ptr->PairedInst = list_ptr;
1001
1002 add_inst_to_list(&s->ReadyFullALU, pair_ptr);
1003 list_ptr = *inst_list;
1004 paired = 1;
1005 break;
1006 }
1007
1008 }
1009 }
1010 if (!paired) {
1011 next:
1012 list_ptr = list_ptr->NextReady;
1013 }
1014 }
1015 }
1016
1017 /**
1018 * This function attempts to merge RGB and Alpha instructions together.
1019 */
pair_instructions(struct schedule_state * s)1020 static void pair_instructions(struct schedule_state * s)
1021 {
1022 struct schedule_instruction *rgb_ptr;
1023 struct schedule_instruction *alpha_ptr;
1024
1025 /* Some pairings might fail because they require too
1026 * many source slots; try all possible pairings if necessary */
1027 rgb_ptr = s->ReadyRGB;
1028 while(rgb_ptr) {
1029 struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1030 alpha_ptr = s->ReadyAlpha;
1031 while(alpha_ptr) {
1032 struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1033 if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1034 /* Remove RGB and Alpha from their ready lists.
1035 */
1036 remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1037 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1038 rgb_ptr->PairedInst = alpha_ptr;
1039 add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1040 break;
1041 }
1042 alpha_ptr = alpha_next;
1043 }
1044 rgb_ptr = rgb_next;
1045 }
1046
1047 if (!s->Opt) {
1048 return;
1049 }
1050
1051 /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1052 * slot can be converted into Alpha instructions. */
1053 try_convert_and_pair(s, &s->ReadyFullALU);
1054
1055 /* Try to convert some of the RGB instructions to Alpha and
1056 * try to pair it with another RGB. */
1057 try_convert_and_pair(s, &s->ReadyRGB);
1058 }
1059
update_max_score(struct schedule_state * s,struct schedule_instruction ** list,int * max_score,struct schedule_instruction ** max_inst_out,struct schedule_instruction *** list_out)1060 static void update_max_score(
1061 struct schedule_state * s,
1062 struct schedule_instruction ** list,
1063 int * max_score,
1064 struct schedule_instruction ** max_inst_out,
1065 struct schedule_instruction *** list_out)
1066 {
1067 struct schedule_instruction * list_ptr;
1068 for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1069 int score;
1070 s->CalcScore(list_ptr);
1071 score = list_ptr->Score;
1072 if (!*max_inst_out || score > *max_score) {
1073 *max_score = score;
1074 *max_inst_out = list_ptr;
1075 *list_out = list;
1076 }
1077 }
1078 }
1079
emit_instruction(struct schedule_state * s,struct rc_instruction * before)1080 static void emit_instruction(
1081 struct schedule_state * s,
1082 struct rc_instruction * before)
1083 {
1084 int max_score = -1;
1085 struct schedule_instruction * max_inst = NULL;
1086 struct schedule_instruction ** max_list = NULL;
1087 unsigned tex_count = 0;
1088 struct schedule_instruction * tex_ptr;
1089
1090 pair_instructions(s);
1091 #if VERBOSE
1092 fprintf(stderr, "Full:\n");
1093 print_list(s->ReadyFullALU);
1094 fprintf(stderr, "RGB:\n");
1095 print_list(s->ReadyRGB);
1096 fprintf(stderr, "Alpha:\n");
1097 print_list(s->ReadyAlpha);
1098 fprintf(stderr, "TEX:\n");
1099 print_list(s->ReadyTEX);
1100 #endif
1101
1102 for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1103 if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1104 emit_all_tex(s, before);
1105 return;
1106 }
1107 tex_count++;
1108 }
1109 update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1110 update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1111 update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1112
1113 if (tex_count >= s->max_tex_group || max_score == -1
1114 || (s->TEXCount > 0 && tex_count == s->TEXCount)
1115 || (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) {
1116 emit_all_tex(s, before);
1117 } else {
1118
1119
1120 remove_inst_from_list(max_list, max_inst);
1121 rc_insert_instruction(before->Prev, max_inst->Instruction);
1122 commit_alu_instruction(s, max_inst);
1123
1124 presub_nop(before->Prev);
1125 }
1126 }
1127
add_tex_reader(struct schedule_state * s,struct schedule_instruction * writer,struct schedule_instruction * reader)1128 static void add_tex_reader(
1129 struct schedule_state * s,
1130 struct schedule_instruction * writer,
1131 struct schedule_instruction * reader)
1132 {
1133 if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1134 /*Not a TEX instructions */
1135 return;
1136 }
1137 reader->TexReadCount++;
1138 rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1139 }
1140
scan_read(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1141 static void scan_read(void * data, struct rc_instruction * inst,
1142 rc_register_file file, unsigned int index, unsigned int chan)
1143 {
1144 struct schedule_state * s = data;
1145 struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1146 struct reg_value_reader * reader;
1147
1148 if (!v)
1149 return;
1150
1151 if (*v && (*v)->Writer == s->Current) {
1152 /* The instruction reads and writes to a register component.
1153 * In this case, we only want to increment dependencies by one.
1154 * Why?
1155 * Because each instruction depends on the writers of its source
1156 * registers _and_ the most recent writer of its destination
1157 * register. In this case, the current instruction (s->Current)
1158 * has a dependency that both writes to one of its source
1159 * registers and was the most recent writer to its destination
1160 * register. We have already marked this dependency in
1161 * scan_write(), so we don't need to do it again.
1162 */
1163
1164 /* We need to make sure we are adding s->Current to the
1165 * previous writer's list of TexReaders, if the previous writer
1166 * was a TEX instruction.
1167 */
1168 add_tex_reader(s, s->PrevWriter[chan], s->Current);
1169
1170 return;
1171 }
1172
1173 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1174
1175 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1176 reader->Reader = s->Current;
1177 if (!*v) {
1178 /* In this situation, the instruction reads from a register
1179 * that hasn't been written to or read from in the current
1180 * block. */
1181 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1182 memset(*v, 0, sizeof(struct reg_value));
1183 (*v)->Readers = reader;
1184 } else {
1185 reader->Next = (*v)->Readers;
1186 (*v)->Readers = reader;
1187 /* Only update the current instruction's dependencies if the
1188 * register it reads from has been written to in this block. */
1189 if ((*v)->Writer) {
1190 add_tex_reader(s, (*v)->Writer, s->Current);
1191 s->Current->NumDependencies++;
1192 }
1193 }
1194 (*v)->NumReaders++;
1195
1196 if (s->Current->NumReadValues >= 12) {
1197 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
1198 } else {
1199 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1200 }
1201 }
1202
scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1203 static void scan_write(void * data, struct rc_instruction * inst,
1204 rc_register_file file, unsigned int index, unsigned int chan)
1205 {
1206 struct schedule_state * s = data;
1207 struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1208 struct reg_value * newv;
1209
1210 if (!pv)
1211 return;
1212
1213 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1214
1215 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1216 memset(newv, 0, sizeof(*newv));
1217
1218 newv->Writer = s->Current;
1219
1220 if (*pv) {
1221 (*pv)->Next = newv;
1222 s->Current->NumDependencies++;
1223 /* Keep track of the previous writer to s->Current's destination
1224 * register */
1225 s->PrevWriter[chan] = (*pv)->Writer;
1226 }
1227
1228 *pv = newv;
1229
1230 if (s->Current->NumWriteValues >= 4) {
1231 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
1232 } else {
1233 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1234 }
1235 }
1236
is_rgb_to_alpha_possible_normal(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)1237 static void is_rgb_to_alpha_possible_normal(
1238 void * userdata,
1239 struct rc_instruction * inst,
1240 struct rc_src_register * src)
1241 {
1242 struct rc_reader_data * reader_data = userdata;
1243 reader_data->Abort = 1;
1244
1245 }
1246
schedule_block(struct schedule_state * s,struct rc_instruction * begin,struct rc_instruction * end)1247 static void schedule_block(struct schedule_state * s,
1248 struct rc_instruction * begin, struct rc_instruction * end)
1249 {
1250 unsigned int ip;
1251
1252 /* Scan instructions for data dependencies */
1253 ip = 0;
1254 for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1255 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1256 memset(s->Current, 0, sizeof(struct schedule_instruction));
1257
1258 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1259 const struct rc_opcode_info * info =
1260 rc_get_opcode_info(inst->U.I.Opcode);
1261 if (info->HasTexture) {
1262 s->TEXCount++;
1263 }
1264 }
1265
1266 /* XXX: This causes SemWait to be set for all instructions in
1267 * a block if the previous block contained a TEX instruction.
1268 * We can do better here, but it will take a lot of work. */
1269 if (s->PrevBlockHasTex) {
1270 s->Current->TexReadCount = 1;
1271 }
1272
1273 s->Current->Instruction = inst;
1274 inst->IP = ip++;
1275
1276 DBG("%i: Scanning\n", inst->IP);
1277
1278 /* The order of things here is subtle and maybe slightly
1279 * counter-intuitive, to account for the case where an
1280 * instruction writes to the same register as it reads
1281 * from. */
1282 rc_for_all_writes_chan(inst, &scan_write, s);
1283 rc_for_all_reads_chan(inst, &scan_read, s);
1284
1285 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1286
1287 if (!s->Current->NumDependencies) {
1288 instruction_ready(s, s->Current);
1289 }
1290
1291 /* Get global readers for possible RGB->Alpha conversion. */
1292 s->Current->GlobalReaders.ExitOnAbort = 1;
1293 rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1294 is_rgb_to_alpha_possible_normal,
1295 is_rgb_to_alpha_possible, NULL);
1296 }
1297
1298 /* Temporarily unlink all instructions */
1299 begin->Prev->Next = end;
1300 end->Prev = begin->Prev;
1301
1302 /* Schedule instructions back */
1303 while(!s->C->Error &&
1304 (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1305 emit_instruction(s, end);
1306 }
1307 }
1308
is_controlflow(struct rc_instruction * inst)1309 static int is_controlflow(struct rc_instruction * inst)
1310 {
1311 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1312 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1313 return opcode->IsFlowControl;
1314 }
1315 return 0;
1316 }
1317
rc_pair_schedule(struct radeon_compiler * cc,void * user)1318 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1319 {
1320 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1321 struct schedule_state s;
1322 struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1323 unsigned int * opt = user;
1324
1325 memset(&s, 0, sizeof(s));
1326 s.Opt = *opt;
1327 s.C = &c->Base;
1328 if (s.C->is_r500) {
1329 s.CalcScore = calc_score_readers;
1330 } else {
1331 s.CalcScore = calc_score_r300;
1332 }
1333 s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1334 while(inst != &c->Base.Program.Instructions) {
1335 struct rc_instruction * first;
1336
1337 if (is_controlflow(inst)) {
1338 inst = inst->Next;
1339 continue;
1340 }
1341
1342 first = inst;
1343
1344 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1345 inst = inst->Next;
1346
1347 DBG("Schedule one block\n");
1348 memset(s.Temporary, 0, sizeof(s.Temporary));
1349 s.TEXCount = 0;
1350 schedule_block(&s, first, inst);
1351 if (s.PendingTEX) {
1352 s.PrevBlockHasTex = 1;
1353 }
1354 }
1355 }
1356