• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27 
28 #include "radeon_program_pair.h"
29 
30 #include <stdio.h>
31 
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35 #include "radeon_list.h"
36 #include "radeon_variable.h"
37 
38 #include "util/u_debug.h"
39 
40 #define VERBOSE 0
41 
42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43 
44 struct schedule_instruction {
45 	struct rc_instruction * Instruction;
46 
47 	/** Next instruction in the linked list of ready instructions. */
48 	struct schedule_instruction *NextReady;
49 
50 	/** Values that this instruction reads and writes */
51 	struct reg_value * WriteValues[4];
52 	struct reg_value * ReadValues[12];
53 	unsigned int NumWriteValues:3;
54 	unsigned int NumReadValues:4;
55 
56 	/**
57 	 * Number of (read and write) dependencies that must be resolved before
58 	 * this instruction can be scheduled.
59 	 */
60 	unsigned int NumDependencies:5;
61 
62 	/** List of all readers (see rc_get_readers() for the definition of
63 	 * "all readers"), even those outside the basic block this instruction
64 	 * lives in. */
65 	struct rc_reader_data GlobalReaders;
66 
67 	/** If the scheduler has paired an RGB and an Alpha instruction together,
68 	 * PairedInst references the alpha instruction's dependency information.
69 	 */
70 	struct schedule_instruction * PairedInst;
71 
72 	/** This scheduler uses the value of Score to determine which
73 	 * instruction to schedule.  Instructions with a higher value of Score
74 	 * will be scheduled first. */
75 	int Score;
76 
77 	/** The number of components that read from a TEX instruction. */
78 	unsigned TexReadCount;
79 
80 	/** For TEX instructions a list of readers */
81 	struct rc_list * TexReaders;
82 };
83 
84 
85 /**
86  * Used to keep track of which instructions read a value.
87  */
88 struct reg_value_reader {
89 	struct schedule_instruction *Reader;
90 	struct reg_value_reader *Next;
91 };
92 
93 /**
94  * Used to keep track which values are stored in each component of a
95  * RC_FILE_TEMPORARY.
96  */
97 struct reg_value {
98 	struct schedule_instruction * Writer;
99 
100 	/**
101 	 * Unordered linked list of instructions that read from this value.
102 	 * When this value becomes available, we increase all readers'
103 	 * dependency count.
104 	 */
105 	struct reg_value_reader *Readers;
106 
107 	/**
108 	 * Number of readers of this value. This is decremented each time
109 	 * a reader of the value is committed.
110 	 * When the reader count reaches zero, the dependency count
111 	 * of the instruction writing \ref Next is decremented.
112 	 */
113 	unsigned int NumReaders;
114 
115 	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116 };
117 
118 struct register_state {
119 	struct reg_value * Values[4];
120 };
121 
122 struct remap_reg {
123 	struct rc_instruction * Inst;
124 	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125 	unsigned int OldSwizzle:3;
126 	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127 	unsigned int NewSwizzle:3;
128 	unsigned int OnlyTexReads:1;
129 	struct remap_reg * Next;
130 };
131 
132 struct schedule_state {
133 	struct radeon_compiler * C;
134 	struct schedule_instruction * Current;
135 	/** Array of the previous writers of Current's destination register
136 	 * indexed by channel. */
137 	struct schedule_instruction * PrevWriter[4];
138 
139 	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140 
141 	/**
142 	 * Linked lists of instructions that can be scheduled right now,
143 	 * based on which ALU/TEX resources they require.
144 	 */
145 	/*@{*/
146 	struct schedule_instruction *ReadyFullALU;
147 	struct schedule_instruction *ReadyRGB;
148 	struct schedule_instruction *ReadyAlpha;
149 	struct schedule_instruction *ReadyTEX;
150 	/*@}*/
151 	struct rc_list *PendingTEX;
152 
153 	void (*CalcScore)(struct schedule_instruction *);
154 	long max_tex_group;
155 	unsigned PrevBlockHasTex:1;
156 	unsigned PrevBlockHasKil:1;
157 	unsigned TEXCount;
158 	unsigned Opt:1;
159 };
160 
get_reg_valuep(struct schedule_state * s,rc_register_file file,unsigned int index,unsigned int chan)161 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
162 		rc_register_file file, unsigned int index, unsigned int chan)
163 {
164 	if (file != RC_FILE_TEMPORARY)
165 		return NULL;
166 
167 	if (index >= RC_REGISTER_MAX_INDEX) {
168 		rc_error(s->C, "%s: index %i out of bounds\n", __func__, index);
169 		return NULL;
170 	}
171 
172 	return &s->Temporary[index].Values[chan];
173 }
174 
get_tex_read_count(struct schedule_instruction * sinst)175 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
176 {
177 	unsigned tex_read_count = sinst->TexReadCount;
178 	if (sinst->PairedInst) {
179 		tex_read_count += sinst->PairedInst->TexReadCount;
180 	}
181 	return tex_read_count;
182 }
183 
184 #if VERBOSE
print_list(struct schedule_instruction * sinst)185 static void print_list(struct schedule_instruction * sinst)
186 {
187 	struct schedule_instruction * ptr;
188 	for (ptr = sinst; ptr; ptr=ptr->NextReady) {
189 		unsigned tex_read_count = get_tex_read_count(ptr);
190 		unsigned score = sinst->Score;
191 		fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
192 						tex_read_count);
193 	}
194 	fprintf(stderr, "\n");
195 }
196 #endif
197 
remove_inst_from_list(struct schedule_instruction ** list,struct schedule_instruction * inst)198 static void remove_inst_from_list(struct schedule_instruction ** list,
199 					struct schedule_instruction * inst)
200 {
201 	struct schedule_instruction * prev = NULL;
202 	struct schedule_instruction * list_ptr;
203 	for (list_ptr = *list; list_ptr; prev = list_ptr,
204 					list_ptr = list_ptr->NextReady) {
205 		if (list_ptr == inst) {
206 			if (prev) {
207 				prev->NextReady = inst->NextReady;
208 			} else {
209 				*list = inst->NextReady;
210 			}
211 			inst->NextReady = NULL;
212 			break;
213 		}
214 	}
215 }
216 
add_inst_to_list(struct schedule_instruction ** list,struct schedule_instruction * inst)217 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
218 {
219 	inst->NextReady = *list;
220 	*list = inst;
221 }
222 
add_inst_to_list_score(struct schedule_instruction ** list,struct schedule_instruction * inst)223 static void add_inst_to_list_score(struct schedule_instruction ** list,
224 					struct schedule_instruction * inst)
225 {
226 	struct schedule_instruction * temp;
227 	struct schedule_instruction * prev;
228 	if (!*list) {
229 		*list = inst;
230 		return;
231 	}
232 	temp = *list;
233 	prev = NULL;
234 	while(temp && inst->Score <= temp->Score) {
235 		prev = temp;
236 		temp = temp->NextReady;
237 	}
238 
239 	if (!prev) {
240 		inst->NextReady = temp;
241 		*list = inst;
242 	} else {
243 		prev->NextReady = inst;
244 		inst->NextReady = temp;
245 	}
246 }
247 
instruction_ready(struct schedule_state * s,struct schedule_instruction * sinst)248 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
249 {
250 	DBG("%i is now ready\n", sinst->Instruction->IP);
251 
252 	/* Adding Ready TEX instructions to the end of the "Ready List" helps
253 	 * us emit TEX instructions in blocks without losing our place. */
254 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
255 		add_inst_to_list_score(&s->ReadyTEX, sinst);
256 	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
257 		add_inst_to_list_score(&s->ReadyRGB, sinst);
258 	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
259 		add_inst_to_list_score(&s->ReadyAlpha, sinst);
260 	else
261 		add_inst_to_list_score(&s->ReadyFullALU, sinst);
262 }
263 
decrease_dependencies(struct schedule_state * s,struct schedule_instruction * sinst)264 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
265 {
266 	assert(sinst->NumDependencies > 0);
267 	sinst->NumDependencies--;
268 	if (!sinst->NumDependencies)
269 		instruction_ready(s, sinst);
270 }
271 
272 /* These functions provide different heuristics for scheduling instructions.
273  * The default is calc_score_readers. */
274 
275 #if 0
276 
277 static void calc_score_zero(struct schedule_instruction * sinst)
278 {
279 	sinst->Score = 0;
280 }
281 
282 static void calc_score_deps(struct schedule_instruction * sinst)
283 {
284 	int i;
285 	sinst->Score = 0;
286 	for (i = 0; i < sinst->NumWriteValues; i++) {
287 		struct reg_value * v = sinst->WriteValues[i];
288 		if (v->NumReaders) {
289 			struct reg_value_reader * r;
290 			for (r = v->Readers; r; r = r->Next) {
291 				if (r->Reader->NumDependencies == 1) {
292 					sinst->Score += 100;
293 				}
294 				sinst->Score += r->Reader->NumDependencies;
295 			}
296 		}
297 	}
298 }
299 
300 #endif
301 
302 #define NO_OUTPUT_SCORE (1 << 24)
303 
score_no_output(struct schedule_instruction * sinst)304 static void score_no_output(struct schedule_instruction * sinst)
305 {
306 	assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
307 	if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
308 			!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
309 		if (sinst->PairedInst) {
310 			if (!sinst->PairedInst->Instruction->U.P.
311 							RGB.OutputWriteMask
312 					&& !sinst->PairedInst->Instruction->U.P.
313 							Alpha.OutputWriteMask) {
314 				sinst->Score |= NO_OUTPUT_SCORE;
315 			}
316 
317 		} else {
318 			sinst->Score |= NO_OUTPUT_SCORE;
319 		}
320 	}
321 }
322 
323 #define PAIRED_SCORE (1 << 16)
324 
calc_score_r300(struct schedule_instruction * sinst)325 static void calc_score_r300(struct schedule_instruction * sinst)
326 {
327 	unsigned src_idx;
328 
329 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
330 		sinst->Score = 0;
331 		return;
332 	}
333 
334 	score_no_output(sinst);
335 
336 	if (sinst->PairedInst) {
337 		sinst->Score |= PAIRED_SCORE;
338 		return;
339 	}
340 
341 	for (src_idx = 0; src_idx < 4; src_idx++) {
342 		sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
343 				sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
344 	}
345 }
346 
347 #define NO_READ_TEX_SCORE (1 << 16)
348 
calc_score_readers(struct schedule_instruction * sinst)349 static void calc_score_readers(struct schedule_instruction * sinst)
350 {
351 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
352 		sinst->Score = 0;
353 	} else {
354 		sinst->Score = sinst->NumReadValues;
355 		if (sinst->PairedInst) {
356 			sinst->Score += sinst->PairedInst->NumReadValues;
357 		}
358 		if (get_tex_read_count(sinst) == 0) {
359 			sinst->Score |= NO_READ_TEX_SCORE;
360 		}
361 		score_no_output(sinst);
362 	}
363 }
364 
365 /**
366  * This function decreases the dependencies of the next instruction that
367  * wants to write to each of sinst's read values.
368  */
commit_update_reads(struct schedule_state * s,struct schedule_instruction * sinst)369 static void commit_update_reads(struct schedule_state * s,
370 					struct schedule_instruction * sinst){
371 	do {
372 		for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
373 			struct reg_value * v = sinst->ReadValues[i];
374 			assert(v->NumReaders > 0);
375 			v->NumReaders--;
376 			if (!v->NumReaders) {
377 				if (v->Next) {
378 					decrease_dependencies(s, v->Next->Writer);
379 				}
380 			}
381 		}
382 	} while ((sinst = sinst->PairedInst));
383 }
384 
commit_update_writes(struct schedule_state * s,struct schedule_instruction * sinst)385 static void commit_update_writes(struct schedule_state * s,
386 					struct schedule_instruction * sinst){
387 	do {
388 		for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
389 			struct reg_value * v = sinst->WriteValues[i];
390 			if (v->NumReaders) {
391 				for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
392 					decrease_dependencies(s, r->Reader);
393 				}
394 			} else {
395 				/* This happens in instruction sequences of the type
396 				 *  OP r.x, ...;
397 				 *  OP r.x, r.x, ...;
398 				 * See also the subtlety in how instructions that both
399 				 * read and write the same register are scanned.
400 				 */
401 				if (v->Next)
402 					decrease_dependencies(s, v->Next->Writer);
403 			}
404 		}
405 	} while ((sinst = sinst->PairedInst));
406 }
407 
notify_sem_wait(struct schedule_state * s)408 static void notify_sem_wait(struct schedule_state *s)
409 {
410 	struct rc_list * pend_ptr;
411 	for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
412 		struct rc_list * read_ptr;
413 		struct schedule_instruction * pending = pend_ptr->Item;
414 		for (read_ptr = pending->TexReaders; read_ptr;
415 						read_ptr = read_ptr->Next) {
416 			struct schedule_instruction * reader = read_ptr->Item;
417 			reader->TexReadCount--;
418 		}
419 	}
420 	s->PendingTEX = NULL;
421 }
422 
commit_alu_instruction(struct schedule_state * s,struct schedule_instruction * sinst)423 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
424 {
425 	DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
426 
427 	commit_update_reads(s, sinst);
428 
429 	commit_update_writes(s, sinst);
430 
431 	if (get_tex_read_count(sinst) > 0) {
432 		sinst->Instruction->U.P.SemWait = 1;
433 		notify_sem_wait(s);
434 	}
435 }
436 
437 /**
438  * Emit all ready texture instructions in a single block.
439  *
440  * Emit as a single block to (hopefully) sample many textures in parallel,
441  * and to avoid hardware indirections on R300.
442  */
emit_all_tex(struct schedule_state * s,struct rc_instruction * before)443 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
444 {
445 	struct schedule_instruction *readytex;
446 	struct rc_instruction * inst_begin;
447 
448 	assert(s->ReadyTEX);
449 	notify_sem_wait(s);
450 
451 	/* Node marker for R300 */
452 	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
453 	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
454 
455 	/* Link texture instructions back in */
456 	readytex = s->ReadyTEX;
457 	while(readytex) {
458 		rc_insert_instruction(before->Prev, readytex->Instruction);
459 		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
460 
461 		/* All of the TEX instructions in the same TEX block have
462 		 * their source registers read from before any of the
463 		 * instructions in that block write to their destination
464 		 * registers.  This means that when we commit a TEX
465 		 * instruction, any other TEX instruction that wants to write
466 		 * to one of the committed instruction's source register can be
467 		 * marked as ready and should be emitted in the same TEX
468 		 * block. This prevents the following sequence from being
469 		 * emitted in two different TEX blocks:
470 		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
471 		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
472 		 */
473 		commit_update_reads(s, readytex);
474 		readytex = readytex->NextReady;
475 	}
476 	readytex = s->ReadyTEX;
477 	s->ReadyTEX = NULL;
478 	while(readytex){
479 		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
480 		commit_update_writes(s, readytex);
481 		/* Set semaphore bits for last TEX instruction in the block */
482 		if (!readytex->NextReady) {
483 			readytex->Instruction->U.I.TexSemAcquire = 1;
484 			readytex->Instruction->U.I.TexSemWait = 1;
485 		}
486 		rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
487 		readytex = readytex->NextReady;
488 	}
489 }
490 
491 /* This is a helper function for destructive_merge_instructions().  It helps
492  * merge presubtract sources from two instructions and makes sure the
493  * presubtract sources end up in the correct spot.  This function assumes that
494  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
495  * but no scalar instruction (alpha).
496  * @return 0 if merging the presubtract sources fails.
497  * @return 1 if merging the presubtract sources succeeds.
498  */
merge_presub_sources(struct rc_pair_instruction * dst_full,struct rc_pair_sub_instruction src,unsigned int type)499 static int merge_presub_sources(
500 	struct rc_pair_instruction * dst_full,
501 	struct rc_pair_sub_instruction src,
502 	unsigned int type)
503 {
504 	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
505 	struct rc_pair_sub_instruction * dst_sub;
506 	const struct rc_opcode_info * info;
507 
508 	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
509 
510 	switch(type) {
511 	case RC_SOURCE_RGB:
512 		is_rgb = 1;
513 		is_alpha = 0;
514 		dst_sub = &dst_full->RGB;
515 		break;
516 	case RC_SOURCE_ALPHA:
517 		is_rgb = 0;
518 		is_alpha = 1;
519 		dst_sub = &dst_full->Alpha;
520 		break;
521 	default:
522 		assert(0);
523 		return 0;
524 	}
525 
526 	info = rc_get_opcode_info(dst_full->RGB.Opcode);
527 
528 	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
529 		return 0;
530 
531 	srcp_regs = rc_presubtract_src_reg_count(
532 					src.Src[RC_PAIR_PRESUB_SRC].Index);
533 	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
534 		unsigned int arg;
535 		int free_source;
536 		unsigned int one_way = 0;
537 		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
538 		struct rc_pair_instruction_source temp;
539 
540 		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
541 							srcp.File, srcp.Index);
542 
543 		/* If free_source < 0 then there are no free source
544 		 * slots. */
545 		if (free_source < 0)
546 			return 0;
547 
548 		temp = dst_sub->Src[srcp_src];
549 		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
550 
551 		/* srcp needs src0 and src1 to be the same */
552 		if (free_source < srcp_src) {
553 			if (!temp.Used)
554 				continue;
555 			free_source = rc_pair_alloc_source(dst_full, is_rgb,
556 					is_alpha, temp.File, temp.Index);
557 			if (free_source < 0)
558 				return 0;
559 			one_way = 1;
560 		} else {
561 			dst_sub->Src[free_source] = temp;
562 		}
563 
564 		/* If free_source == srcp_src, then the presubtract
565 		 * source is already in the correct place. */
566 		if (free_source == srcp_src)
567 			continue;
568 
569 		/* Shuffle the sources, so we can put the
570 		 * presubtract source in the correct place. */
571 		for(arg = 0; arg < info->NumSrcRegs; arg++) {
572 			/* If the arg does read both from rgb and alpha, then we need to rewrite
573 			 * both sources and the code currently doesn't handle this.
574 			 * FIXME: This is definitely solvable, however shader-db shows it is
575 			 * not worth the effort.
576 			 */
577 			if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA &&
578 				rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB)
579 				return 0;
580 
581 			/*If this arg does not read from an rgb source,
582 			 * do nothing. */
583 			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
584 								& type)) {
585 				continue;
586 			}
587 
588 			if (dst_full->RGB.Arg[arg].Source == srcp_src)
589 				dst_full->RGB.Arg[arg].Source = free_source;
590 			/* We need to do this just in case register
591 			 * is one of the sources already, but in the
592 			 * wrong spot. */
593 			else if(dst_full->RGB.Arg[arg].Source == free_source
594 							&& !one_way) {
595 				dst_full->RGB.Arg[arg].Source = srcp_src;
596 			}
597 		}
598 	}
599 	return 1;
600 }
601 
602 
603 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
destructive_merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)604 static int destructive_merge_instructions(
605 		struct rc_pair_instruction * rgb,
606 		struct rc_pair_instruction * alpha)
607 {
608 	const struct rc_opcode_info * opcode;
609 
610 	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
611 	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
612 
613 	/* Presubtract registers need to be merged first so that registers
614 	 * needed by the presubtract operation can be placed in src0 and/or
615 	 * src1. */
616 
617 	/* Merge the rgb presubtract registers. */
618 	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
619 		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
620 			return 0;
621 		}
622 	}
623 	/* Merge the alpha presubtract registers */
624 	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
625 		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
626 			return 0;
627 		}
628 	}
629 
630 	/* Copy alpha args into rgb */
631 	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
632 
633 	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
634 		unsigned int srcrgb = 0;
635 		unsigned int srcalpha = 0;
636 		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
637 		rc_register_file file = 0;
638 		unsigned int index = 0;
639 		int source;
640 
641 		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
642 			srcrgb = 1;
643 			file = alpha->RGB.Src[oldsrc].File;
644 			index = alpha->RGB.Src[oldsrc].Index;
645 		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
646 			srcalpha = 1;
647 			file = alpha->Alpha.Src[oldsrc].File;
648 			index = alpha->Alpha.Src[oldsrc].Index;
649 		}
650 
651 		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
652 		if (source < 0)
653 			return 0;
654 
655 		rgb->Alpha.Arg[arg].Source = source;
656 		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
657 		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
658 		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
659 	}
660 
661 	/* Copy alpha opcode into rgb */
662 	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
663 	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
664 	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
665 	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
666 	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
667 	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
668 	rgb->Alpha.Omod = alpha->Alpha.Omod;
669 
670 	/* Merge ALU result writing */
671 	if (alpha->WriteALUResult) {
672 		if (rgb->WriteALUResult)
673 			return 0;
674 
675 		rgb->WriteALUResult = alpha->WriteALUResult;
676 		rgb->ALUResultCompare = alpha->ALUResultCompare;
677 	}
678 
679 	/* Copy SemWait */
680 	rgb->SemWait |= alpha->SemWait;
681 
682 	return 1;
683 }
684 
685 /**
686  * Try to merge the given instructions into the rgb instructions.
687  *
688  * Return true on success; on failure, return false, and keep
689  * the instructions untouched.
690  */
merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)691 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
692 {
693 	struct rc_pair_instruction backup;
694 
695 	/*Instructions can't write output registers and ALU result at the
696 	 * same time. */
697 	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
698 		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
699 		return 0;
700 	}
701 
702 	/* Writing output registers in the middle of shaders is slow, so
703 	 * we don't want to pair output writes with temp writes. */
704 	if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
705 		|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
706 		return 0;
707 	}
708 
709 	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
710 
711 	if (destructive_merge_instructions(rgb, alpha))
712 		return 1;
713 
714 	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
715 	return 0;
716 }
717 
presub_nop(struct rc_instruction * emitted)718 static void presub_nop(struct rc_instruction * emitted) {
719 	int prev_rgb_index, prev_alpha_index, i, num_src;
720 
721 	/* We don't need a nop if the previous instruction is a TEX. */
722 	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
723 		return;
724 	}
725 	if (emitted->Prev->U.P.RGB.WriteMask)
726 		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
727 	else
728 		prev_rgb_index = -1;
729 	if (emitted->Prev->U.P.Alpha.WriteMask)
730 		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
731 	else
732 		prev_alpha_index = 1;
733 
734 	/* Check the previous rgb instruction */
735 	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
736 		num_src = rc_presubtract_src_reg_count(
737 				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
738 		for (i = 0; i < num_src; i++) {
739 			unsigned int index = emitted->U.P.RGB.Src[i].Index;
740 			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
741 			    && (index  == prev_rgb_index
742 				|| index == prev_alpha_index)) {
743 				emitted->Prev->U.P.Nop = 1;
744 				return;
745 			}
746 		}
747 	}
748 
749 	/* Check the previous alpha instruction. */
750 	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
751 		return;
752 
753 	num_src = rc_presubtract_src_reg_count(
754 				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
755 	for (i = 0; i < num_src; i++) {
756 		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
757 		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
758 		   && (index == prev_rgb_index || index == prev_alpha_index)) {
759 			emitted->Prev->U.P.Nop = 1;
760 			return;
761 		}
762 	}
763 }
764 
rgb_to_alpha_remap(struct schedule_state * s,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,rc_register_file old_file,rc_swizzle old_swz,unsigned int new_index)765 static void rgb_to_alpha_remap (
766 	struct schedule_state * s,
767 	struct rc_instruction * inst,
768 	struct rc_pair_instruction_arg * arg,
769 	rc_register_file old_file,
770 	rc_swizzle old_swz,
771 	unsigned int new_index)
772 {
773 	int new_src_index;
774 	unsigned int i;
775 
776 	for (i = 0; i < 3; i++) {
777 		if (get_swz(arg->Swizzle, i) == old_swz) {
778 			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
779 		}
780 	}
781 	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
782 							old_file, new_index);
783 	/* This conversion is not possible, we must have made a mistake in
784 	 * is_rgb_to_alpha_possible. */
785 	if (new_src_index < 0) {
786         rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n");
787 		return;
788 	}
789 
790 	arg->Source = new_src_index;
791 }
792 
can_remap(unsigned int opcode)793 static int can_remap(unsigned int opcode)
794 {
795 	switch(opcode) {
796 	case RC_OPCODE_DDX:
797 	case RC_OPCODE_DDY:
798 		return 0;
799 	default:
800 		return 1;
801 	}
802 }
803 
can_convert_opcode_to_alpha(unsigned int opcode)804 static int can_convert_opcode_to_alpha(unsigned int opcode)
805 {
806 	switch(opcode) {
807 	case RC_OPCODE_DDX:
808 	case RC_OPCODE_DDY:
809 	case RC_OPCODE_DP2:
810 	case RC_OPCODE_DP3:
811 	case RC_OPCODE_DP4:
812 		return 0;
813 	default:
814 		return 1;
815 	}
816 }
817 
is_rgb_to_alpha_possible(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)818 static void is_rgb_to_alpha_possible(
819 	void * userdata,
820 	struct rc_instruction * inst,
821 	struct rc_pair_instruction_arg * arg,
822 	struct rc_pair_instruction_source * src)
823 {
824 	unsigned int read_chan = RC_SWIZZLE_UNUSED;
825 	unsigned int alpha_sources = 0;
826 	unsigned int i;
827 	struct rc_reader_data * reader_data = userdata;
828 
829 	if (!can_remap(inst->U.P.RGB.Opcode)
830 	    || !can_remap(inst->U.P.Alpha.Opcode)) {
831 		reader_data->Abort = 1;
832 		return;
833 	}
834 
835 	if (!src)
836 		return;
837 
838 	/* XXX There are some cases where we can still do the conversion if
839 	 * a reader reads from a presubtract source, but for now we'll prevent
840 	 * it. */
841 	if (arg->Source == RC_PAIR_PRESUB_SRC) {
842 		reader_data->Abort = 1;
843 		return;
844 	}
845 
846 	/* Make sure the source only reads the register component that we
847 	 * are going to be converting from.  It is OK if the instruction uses
848 	 * this component more than once.
849 	 * XXX If the index we will be converting to is the same as the
850 	 * current index, then it is OK to read from more than one component.
851 	 */
852 	for (i = 0; i < 3; i++) {
853 		rc_swizzle swz = get_swz(arg->Swizzle, i);
854 		switch(swz) {
855 		case RC_SWIZZLE_X:
856 		case RC_SWIZZLE_Y:
857 		case RC_SWIZZLE_Z:
858 		case RC_SWIZZLE_W:
859 			if (read_chan == RC_SWIZZLE_UNUSED) {
860 				read_chan = swz;
861 			} else if (read_chan != swz) {
862 				reader_data->Abort = 1;
863 				return;
864 			}
865 			break;
866 		default:
867 			break;
868 		}
869 	}
870 
871 	/* Make sure there are enough alpha sources.
872 	 * XXX If we know what register all the readers are going
873 	 * to be remapped to, then in some situations we can still do
874 	 * the substitution, even if all 3 alpha sources are being used.*/
875 	for (i = 0; i < 3; i++) {
876 		if (inst->U.P.Alpha.Src[i].Used) {
877 			alpha_sources++;
878 		}
879 	}
880 	if (alpha_sources > 2) {
881 		reader_data->Abort = 1;
882 		return;
883 	}
884 }
885 
convert_rgb_to_alpha(struct schedule_state * s,struct schedule_instruction * sched_inst)886 static int convert_rgb_to_alpha(
887 	struct schedule_state * s,
888 	struct schedule_instruction * sched_inst)
889 {
890 	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
891 	unsigned int old_mask = pair_inst->RGB.WriteMask;
892 	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
893 	const struct rc_opcode_info * info =
894 				rc_get_opcode_info(pair_inst->RGB.Opcode);
895 	int new_index = -1;
896 	unsigned int i;
897 
898 	if (sched_inst->GlobalReaders.Abort)
899 		return 0;
900 
901 	/* Even though we checked that we can convert to alpha previously, it is
902 	 * possible that another rgb source of the reader instructions was already
903 	 * converted to alpha and we thus have no longer free alpha sources.
904 	 */
905 	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
906 		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
907 		if (reader.Inst->U.P.Alpha.Src[2].Used)
908 			return 0;
909 	}
910 
911 	if (!pair_inst->RGB.WriteMask)
912 		return 0;
913 
914 	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
915 	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
916 		return 0;
917 	}
918 
919 	assert(sched_inst->NumWriteValues == 1);
920 
921 	if (!sched_inst->WriteValues[0]) {
922 		assert(0);
923 		return 0;
924 	}
925 
926 	/* We start at the old index, because if we can reuse the same
927 	 * register and just change the swizzle then it is more likely we
928 	 * will be able to convert all the readers. */
929 	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
930 		struct reg_value ** new_regvalp = get_reg_valuep(
931 						s, RC_FILE_TEMPORARY, i, 3);
932 		if (!*new_regvalp) {
933 			struct reg_value ** old_regvalp =
934 				get_reg_valuep(s,
935 					RC_FILE_TEMPORARY,
936 					pair_inst->RGB.DestIndex,
937 					rc_mask_to_swizzle(old_mask));
938 			new_index = i;
939 			*new_regvalp = *old_regvalp;
940 			break;
941 		}
942 	}
943 	if (new_index < 0) {
944 		return 0;
945 	}
946 
947 	/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
948 	 * as the RGB opcode, then the Alpha instruction will already contain
949 	 * the correct opcode and instruction args, so we do not want to
950 	 * overwrite them.
951 	 */
952 	if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
953 		pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
954 		memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
955 						sizeof(pair_inst->Alpha.Arg));
956 	}
957 	pair_inst->Alpha.DestIndex = new_index;
958 	pair_inst->Alpha.WriteMask = RC_MASK_W;
959 	pair_inst->Alpha.Target = pair_inst->RGB.Target;
960 	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
961 	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
962 	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
963 	pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
964 	/* Move the swizzles into the first chan */
965 	for (i = 0; i < info->NumSrcRegs; i++) {
966 		unsigned int j;
967 		for (j = 0; j < 3; j++) {
968 			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
969 			if (swz != RC_SWIZZLE_UNUSED) {
970 				pair_inst->Alpha.Arg[i].Swizzle =
971 							rc_init_swizzle(swz, 1);
972 				break;
973 			}
974 		}
975 	}
976 	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
977 	pair_inst->RGB.DestIndex = 0;
978 	pair_inst->RGB.WriteMask = 0;
979 	pair_inst->RGB.Target = 0;
980 	pair_inst->RGB.OutputWriteMask = 0;
981 	pair_inst->RGB.DepthWriteMask = 0;
982 	pair_inst->RGB.Saturate = 0;
983 	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
984 
985 	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
986 		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
987 		rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg,
988 					RC_FILE_TEMPORARY, old_swz, new_index);
989 	}
990 	return 1;
991 }
992 
try_convert_and_pair(struct schedule_state * s,struct schedule_instruction ** inst_list)993 static void try_convert_and_pair(
994 	struct schedule_state *s,
995 	struct schedule_instruction ** inst_list)
996 {
997 	struct schedule_instruction * list_ptr = *inst_list;
998 	while (list_ptr && *inst_list && (*inst_list)->NextReady) {
999 		int paired = 0;
1000 		if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
1001 			&& list_ptr->Instruction->U.P.RGB.Opcode
1002 						!= RC_OPCODE_REPL_ALPHA) {
1003 				goto next;
1004 		}
1005 		if (list_ptr->NumWriteValues == 1
1006 					&& convert_rgb_to_alpha(s, list_ptr)) {
1007 
1008 			struct schedule_instruction * pair_ptr;
1009 			remove_inst_from_list(inst_list, list_ptr);
1010 			add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
1011 
1012 			for (pair_ptr = s->ReadyRGB; pair_ptr;
1013 					pair_ptr = pair_ptr->NextReady) {
1014 				if (merge_instructions(&pair_ptr->Instruction->U.P,
1015 						&list_ptr->Instruction->U.P)) {
1016 					remove_inst_from_list(&s->ReadyAlpha, list_ptr);
1017 					remove_inst_from_list(&s->ReadyRGB, pair_ptr);
1018 					pair_ptr->PairedInst = list_ptr;
1019 
1020 					add_inst_to_list(&s->ReadyFullALU, pair_ptr);
1021 					list_ptr = *inst_list;
1022 					paired = 1;
1023 					break;
1024 				}
1025 
1026 			}
1027 		}
1028 		if (!paired) {
1029 next:
1030 			list_ptr = list_ptr->NextReady;
1031 		}
1032 	}
1033 }
1034 
1035 /**
1036  * This function attempts to merge RGB and Alpha instructions together.
1037  */
pair_instructions(struct schedule_state * s)1038 static void pair_instructions(struct schedule_state * s)
1039 {
1040 	struct schedule_instruction *rgb_ptr;
1041 	struct schedule_instruction *alpha_ptr;
1042 
1043 	/* Some pairings might fail because they require too
1044 	 * many source slots; try all possible pairings if necessary */
1045 	rgb_ptr = s->ReadyRGB;
1046 	while(rgb_ptr) {
1047 		struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1048 		alpha_ptr = s->ReadyAlpha;
1049 		while(alpha_ptr) {
1050 			struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1051 			if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1052 				/* Remove RGB and Alpha from their ready lists.
1053 				 */
1054 				remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1055 				remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1056 				rgb_ptr->PairedInst = alpha_ptr;
1057 				add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1058 				break;
1059 			}
1060 			alpha_ptr = alpha_next;
1061 		}
1062 		rgb_ptr = rgb_next;
1063 	}
1064 
1065 	if (!s->Opt) {
1066 		return;
1067 	}
1068 
1069 	/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1070 	 * slot can be converted into Alpha instructions. */
1071 	try_convert_and_pair(s, &s->ReadyFullALU);
1072 
1073 	/* Try to convert some of the RGB instructions to Alpha and
1074 	 * try to pair it with another RGB. */
1075 	try_convert_and_pair(s, &s->ReadyRGB);
1076 }
1077 
update_max_score(struct schedule_state * s,struct schedule_instruction ** list,int * max_score,struct schedule_instruction ** max_inst_out,struct schedule_instruction *** list_out)1078 static void update_max_score(
1079 	struct schedule_state * s,
1080 	struct schedule_instruction ** list,
1081 	int * max_score,
1082 	struct schedule_instruction ** max_inst_out,
1083 	struct schedule_instruction *** list_out)
1084 {
1085 	struct schedule_instruction * list_ptr;
1086 	for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1087 		int score;
1088 		s->CalcScore(list_ptr);
1089 		score = list_ptr->Score;
1090 		if (!*max_inst_out || score > *max_score) {
1091 			*max_score = score;
1092 			*max_inst_out = list_ptr;
1093 			*list_out = list;
1094 		}
1095 	}
1096 }
1097 
emit_instruction(struct schedule_state * s,struct rc_instruction * before)1098 static void emit_instruction(
1099 	struct schedule_state * s,
1100 	struct rc_instruction * before)
1101 {
1102 	int max_score = -1;
1103 	struct schedule_instruction * max_inst = NULL;
1104 	struct schedule_instruction ** max_list = NULL;
1105 	unsigned tex_count = 0;
1106 	struct schedule_instruction * tex_ptr;
1107 
1108 	pair_instructions(s);
1109 #if VERBOSE
1110 	fprintf(stderr, "Full:\n");
1111 	print_list(s->ReadyFullALU);
1112 	fprintf(stderr, "RGB:\n");
1113 	print_list(s->ReadyRGB);
1114 	fprintf(stderr, "Alpha:\n");
1115 	print_list(s->ReadyAlpha);
1116 	fprintf(stderr, "TEX:\n");
1117 	print_list(s->ReadyTEX);
1118 #endif
1119 
1120 	for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1121 		if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1122 			emit_all_tex(s, before);
1123 			s->PrevBlockHasKil = 1;
1124 			return;
1125 		}
1126 		tex_count++;
1127 	}
1128 	update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1129 	update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1130 	update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1131 
1132 	if (tex_count >= s->max_tex_group || max_score == -1
1133 		|| (s->TEXCount > 0 && tex_count == s->TEXCount)
1134 		|| (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) {
1135 		emit_all_tex(s, before);
1136 	} else {
1137 
1138 
1139 		remove_inst_from_list(max_list, max_inst);
1140 		rc_insert_instruction(before->Prev, max_inst->Instruction);
1141 		commit_alu_instruction(s, max_inst);
1142 
1143 		presub_nop(before->Prev);
1144 	}
1145 }
1146 
add_tex_reader(struct schedule_state * s,struct schedule_instruction * writer,struct schedule_instruction * reader)1147 static void add_tex_reader(
1148 	struct schedule_state * s,
1149 	struct schedule_instruction * writer,
1150 	struct schedule_instruction * reader)
1151 {
1152 	if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1153 		/*Not a TEX instructions */
1154 		return;
1155 	}
1156 	reader->TexReadCount++;
1157 	rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1158 }
1159 
scan_read(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1160 static void scan_read(void * data, struct rc_instruction * inst,
1161 		rc_register_file file, unsigned int index, unsigned int chan)
1162 {
1163 	struct schedule_state * s = data;
1164 	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1165 	struct reg_value_reader * reader;
1166 
1167 	if (!v)
1168 		return;
1169 
1170 	if (*v && (*v)->Writer == s->Current) {
1171 		/* The instruction reads and writes to a register component.
1172 		 * In this case, we only want to increment dependencies by one.
1173 		 * Why?
1174 		 * Because each instruction depends on the writers of its source
1175 		 * registers _and_ the most recent writer of its destination
1176 		 * register.  In this case, the current instruction (s->Current)
1177 		 * has a dependency that both writes to one of its source
1178 		 * registers and was the most recent writer to its destination
1179 		 * register.  We have already marked this dependency in
1180 		 * scan_write(), so we don't need to do it again.
1181 		 */
1182 
1183 		/* We need to make sure we are adding s->Current to the
1184 		 * previous writer's list of TexReaders, if the previous writer
1185 		 * was a TEX instruction.
1186 		 */
1187 		add_tex_reader(s, s->PrevWriter[chan], s->Current);
1188 
1189 		return;
1190 	}
1191 
1192 	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1193 
1194 	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1195 	reader->Reader = s->Current;
1196 	if (!*v) {
1197 		/* In this situation, the instruction reads from a register
1198 		 * that hasn't been written to or read from in the current
1199 		 * block. */
1200 		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1201 		memset(*v, 0, sizeof(struct reg_value));
1202 		(*v)->Readers = reader;
1203 	} else {
1204 		reader->Next = (*v)->Readers;
1205 		(*v)->Readers = reader;
1206 		/* Only update the current instruction's dependencies if the
1207 		 * register it reads from has been written to in this block. */
1208 		if ((*v)->Writer) {
1209 			add_tex_reader(s, (*v)->Writer, s->Current);
1210 			s->Current->NumDependencies++;
1211 		}
1212 	}
1213 	(*v)->NumReaders++;
1214 
1215 	if (s->Current->NumReadValues >= 12) {
1216 		rc_error(s->C, "%s: NumReadValues overflow\n", __func__);
1217 	} else {
1218 		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1219 	}
1220 }
1221 
scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1222 static void scan_write(void * data, struct rc_instruction * inst,
1223 		rc_register_file file, unsigned int index, unsigned int chan)
1224 {
1225 	struct schedule_state * s = data;
1226 	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1227 	struct reg_value * newv;
1228 
1229 	if (!pv)
1230 		return;
1231 
1232 	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1233 
1234 	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1235 	memset(newv, 0, sizeof(*newv));
1236 
1237 	newv->Writer = s->Current;
1238 
1239 	if (*pv) {
1240 		(*pv)->Next = newv;
1241 		s->Current->NumDependencies++;
1242 		/* Keep track of the previous writer to s->Current's destination
1243 		 * register */
1244 		s->PrevWriter[chan] = (*pv)->Writer;
1245 	}
1246 
1247 	*pv = newv;
1248 
1249 	if (s->Current->NumWriteValues >= 4) {
1250 		rc_error(s->C, "%s: NumWriteValues overflow\n", __func__);
1251 	} else {
1252 		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1253 	}
1254 }
1255 
is_rgb_to_alpha_possible_normal(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)1256 static void is_rgb_to_alpha_possible_normal(
1257 	void * userdata,
1258 	struct rc_instruction * inst,
1259 	struct rc_src_register * src)
1260 {
1261 	struct rc_reader_data * reader_data = userdata;
1262 	reader_data->Abort = 1;
1263 
1264 }
1265 
schedule_block(struct schedule_state * s,struct rc_instruction * begin,struct rc_instruction * end)1266 static void schedule_block(struct schedule_state * s,
1267 		struct rc_instruction * begin, struct rc_instruction * end)
1268 {
1269 	unsigned int ip;
1270 
1271 	/* Scan instructions for data dependencies */
1272 	ip = 0;
1273 	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1274 		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1275 		memset(s->Current, 0, sizeof(struct schedule_instruction));
1276 
1277 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
1278 			const struct rc_opcode_info * info =
1279 					rc_get_opcode_info(inst->U.I.Opcode);
1280 			if (info->HasTexture) {
1281 				s->TEXCount++;
1282 			}
1283 		}
1284 
1285 		/* XXX: This causes SemWait to be set for all instructions in
1286 		 * a block if the previous block contained a TEX instruction.
1287 		 * We can do better here, but it will take a lot of work. */
1288 		if (s->PrevBlockHasTex) {
1289 			s->Current->TexReadCount = 1;
1290 		}
1291 
1292 		s->Current->Instruction = inst;
1293 		inst->IP = ip++;
1294 
1295 		DBG("%i: Scanning\n", inst->IP);
1296 
1297 		/* The order of things here is subtle and maybe slightly
1298 		 * counter-intuitive, to account for the case where an
1299 		 * instruction writes to the same register as it reads
1300 		 * from. */
1301 		rc_for_all_writes_chan(inst, &scan_write, s);
1302 		rc_for_all_reads_chan(inst, &scan_read, s);
1303 
1304 		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1305 
1306 		if (!s->Current->NumDependencies) {
1307 			instruction_ready(s, s->Current);
1308 		}
1309 
1310 		/* Get global readers for possible RGB->Alpha conversion. */
1311 		s->Current->GlobalReaders.ExitOnAbort = 1;
1312 		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1313 				is_rgb_to_alpha_possible_normal,
1314 				is_rgb_to_alpha_possible, NULL);
1315 	}
1316 
1317 	/* Temporarily unlink all instructions */
1318 	begin->Prev->Next = end;
1319 	end->Prev = begin->Prev;
1320 
1321 	/* Schedule instructions back */
1322 	while(!s->C->Error &&
1323 	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1324 		emit_instruction(s, end);
1325 	}
1326 }
1327 
is_controlflow(struct rc_instruction * inst)1328 static int is_controlflow(struct rc_instruction * inst)
1329 {
1330 	if (inst->Type == RC_INSTRUCTION_NORMAL) {
1331 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1332 		return opcode->IsFlowControl;
1333 	}
1334 	return 0;
1335 }
1336 
rc_pair_schedule(struct radeon_compiler * cc,void * user)1337 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1338 {
1339 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1340 	struct schedule_state s;
1341 	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1342 	unsigned int * opt = user;
1343 
1344 	memset(&s, 0, sizeof(s));
1345 	s.Opt = *opt;
1346 	s.C = &c->Base;
1347 	if (s.C->is_r500) {
1348 		s.CalcScore = calc_score_readers;
1349 	} else {
1350 		s.CalcScore = calc_score_r300;
1351 	}
1352 	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1353 	while(inst != &c->Base.Program.Instructions) {
1354 		struct rc_instruction * first;
1355 
1356 		if (is_controlflow(inst)) {
1357 			/* The TexSemWait flag is already properly set for ALU
1358 			 * instructions using the results of normal TEX lookup,
1359 			 * however it was found empirically that TEXKIL also needs
1360 			 * synchronization with the control flow. This might not be optimal,
1361 			 * however the docs don't offer any guidance in this matter.
1362 			 */
1363 			if (s.PrevBlockHasKil) {
1364 				inst->U.I.TexSemWait = 1;
1365 				s.PrevBlockHasKil = 0;
1366 			}
1367 			inst = inst->Next;
1368 			continue;
1369 		}
1370 
1371 		first = inst;
1372 
1373 		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1374 			inst = inst->Next;
1375 
1376 		DBG("Schedule one block\n");
1377 		memset(s.Temporary, 0, sizeof(s.Temporary));
1378 		s.TEXCount = 0;
1379 		schedule_block(&s, first, inst);
1380 		if (s.PendingTEX) {
1381 			s.PrevBlockHasTex = 1;
1382 		}
1383 	}
1384 }
1385