• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27 
28 #include "radeon_program_pair.h"
29 
30 #include <stdio.h>
31 
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35 #include "radeon_list.h"
36 #include "radeon_variable.h"
37 
38 #include "util/u_debug.h"
39 
40 #define VERBOSE 0
41 
42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43 
44 struct schedule_instruction {
45 	struct rc_instruction * Instruction;
46 
47 	/** Next instruction in the linked list of ready instructions. */
48 	struct schedule_instruction *NextReady;
49 
50 	/** Values that this instruction reads and writes */
51 	struct reg_value * WriteValues[4];
52 	struct reg_value * ReadValues[12];
53 	unsigned int NumWriteValues:3;
54 	unsigned int NumReadValues:4;
55 
56 	/**
57 	 * Number of (read and write) dependencies that must be resolved before
58 	 * this instruction can be scheduled.
59 	 */
60 	unsigned int NumDependencies:5;
61 
62 	/** List of all readers (see rc_get_readers() for the definition of
63 	 * "all readers"), even those outside the basic block this instruction
64 	 * lives in. */
65 	struct rc_reader_data GlobalReaders;
66 
67 	/** If the scheduler has paired an RGB and an Alpha instruction together,
68 	 * PairedInst references the alpha instruction's dependency information.
69 	 */
70 	struct schedule_instruction * PairedInst;
71 
72 	/** This scheduler uses the value of Score to determine which
73 	 * instruction to schedule.  Instructions with a higher value of Score
74 	 * will be scheduled first. */
75 	int Score;
76 
77 	/** The number of components that read from a TEX instruction. */
78 	unsigned TexReadCount;
79 
80 	/** For TEX instructions a list of readers */
81 	struct rc_list * TexReaders;
82 };
83 
84 
85 /**
86  * Used to keep track of which instructions read a value.
87  */
88 struct reg_value_reader {
89 	struct schedule_instruction *Reader;
90 	struct reg_value_reader *Next;
91 };
92 
93 /**
94  * Used to keep track which values are stored in each component of a
95  * RC_FILE_TEMPORARY.
96  */
97 struct reg_value {
98 	struct schedule_instruction * Writer;
99 
100 	/**
101 	 * Unordered linked list of instructions that read from this value.
102 	 * When this value becomes available, we increase all readers'
103 	 * dependency count.
104 	 */
105 	struct reg_value_reader *Readers;
106 
107 	/**
108 	 * Number of readers of this value. This is decremented each time
109 	 * a reader of the value is committed.
110 	 * When the reader count reaches zero, the dependency count
111 	 * of the instruction writing \ref Next is decremented.
112 	 */
113 	unsigned int NumReaders;
114 
115 	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116 };
117 
118 struct register_state {
119 	struct reg_value * Values[4];
120 };
121 
122 struct remap_reg {
123 	struct rc_instruction * Inst;
124 	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125 	unsigned int OldSwizzle:3;
126 	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127 	unsigned int NewSwizzle:3;
128 	unsigned int OnlyTexReads:1;
129 	struct remap_reg * Next;
130 };
131 
132 struct schedule_state {
133 	struct radeon_compiler * C;
134 	struct schedule_instruction * Current;
135 	/** Array of the previous writers of Current's destination register
136 	 * indexed by channel. */
137 	struct schedule_instruction * PrevWriter[4];
138 
139 	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140 
141 	/**
142 	 * Linked lists of instructions that can be scheduled right now,
143 	 * based on which ALU/TEX resources they require.
144 	 */
145 	/*@{*/
146 	struct schedule_instruction *ReadyFullALU;
147 	struct schedule_instruction *ReadyRGB;
148 	struct schedule_instruction *ReadyAlpha;
149 	struct schedule_instruction *ReadyTEX;
150 	/*@}*/
151 	struct rc_list *PendingTEX;
152 
153 	void (*CalcScore)(struct schedule_instruction *);
154 	long max_tex_group;
155 	unsigned PrevBlockHasTex:1;
156 	unsigned TEXCount;
157 	unsigned Opt:1;
158 };
159 
get_reg_valuep(struct schedule_state * s,rc_register_file file,unsigned int index,unsigned int chan)160 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
161 		rc_register_file file, unsigned int index, unsigned int chan)
162 {
163 	if (file != RC_FILE_TEMPORARY)
164 		return 0;
165 
166 	if (index >= RC_REGISTER_MAX_INDEX) {
167 		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
168 		return 0;
169 	}
170 
171 	return &s->Temporary[index].Values[chan];
172 }
173 
get_tex_read_count(struct schedule_instruction * sinst)174 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
175 {
176 	unsigned tex_read_count = sinst->TexReadCount;
177 	if (sinst->PairedInst) {
178 		tex_read_count += sinst->PairedInst->TexReadCount;
179 	}
180 	return tex_read_count;
181 }
182 
183 #if VERBOSE
print_list(struct schedule_instruction * sinst)184 static void print_list(struct schedule_instruction * sinst)
185 {
186 	struct schedule_instruction * ptr;
187 	for (ptr = sinst; ptr; ptr=ptr->NextReady) {
188 		unsigned tex_read_count = get_tex_read_count(ptr);
189 		unsigned score = sinst->Score;
190 		fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
191 						tex_read_count);
192 	}
193 	fprintf(stderr, "\n");
194 }
195 #endif
196 
remove_inst_from_list(struct schedule_instruction ** list,struct schedule_instruction * inst)197 static void remove_inst_from_list(struct schedule_instruction ** list,
198 					struct schedule_instruction * inst)
199 {
200 	struct schedule_instruction * prev = NULL;
201 	struct schedule_instruction * list_ptr;
202 	for (list_ptr = *list; list_ptr; prev = list_ptr,
203 					list_ptr = list_ptr->NextReady) {
204 		if (list_ptr == inst) {
205 			if (prev) {
206 				prev->NextReady = inst->NextReady;
207 			} else {
208 				*list = inst->NextReady;
209 			}
210 			inst->NextReady = NULL;
211 			break;
212 		}
213 	}
214 }
215 
add_inst_to_list(struct schedule_instruction ** list,struct schedule_instruction * inst)216 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
217 {
218 	inst->NextReady = *list;
219 	*list = inst;
220 }
221 
add_inst_to_list_score(struct schedule_instruction ** list,struct schedule_instruction * inst)222 static void add_inst_to_list_score(struct schedule_instruction ** list,
223 					struct schedule_instruction * inst)
224 {
225 	struct schedule_instruction * temp;
226 	struct schedule_instruction * prev;
227 	if (!*list) {
228 		*list = inst;
229 		return;
230 	}
231 	temp = *list;
232 	prev = NULL;
233 	while(temp && inst->Score <= temp->Score) {
234 		prev = temp;
235 		temp = temp->NextReady;
236 	}
237 
238 	if (!prev) {
239 		inst->NextReady = temp;
240 		*list = inst;
241 	} else {
242 		prev->NextReady = inst;
243 		inst->NextReady = temp;
244 	}
245 }
246 
instruction_ready(struct schedule_state * s,struct schedule_instruction * sinst)247 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
248 {
249 	DBG("%i is now ready\n", sinst->Instruction->IP);
250 
251 	/* Adding Ready TEX instructions to the end of the "Ready List" helps
252 	 * us emit TEX instructions in blocks without losing our place. */
253 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
254 		add_inst_to_list_score(&s->ReadyTEX, sinst);
255 	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
256 		add_inst_to_list_score(&s->ReadyRGB, sinst);
257 	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
258 		add_inst_to_list_score(&s->ReadyAlpha, sinst);
259 	else
260 		add_inst_to_list_score(&s->ReadyFullALU, sinst);
261 }
262 
decrease_dependencies(struct schedule_state * s,struct schedule_instruction * sinst)263 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
264 {
265 	assert(sinst->NumDependencies > 0);
266 	sinst->NumDependencies--;
267 	if (!sinst->NumDependencies)
268 		instruction_ready(s, sinst);
269 }
270 
271 /* These functions provide different heuristics for scheduling instructions.
272  * The default is calc_score_readers. */
273 
274 #if 0
275 
276 static void calc_score_zero(struct schedule_instruction * sinst)
277 {
278 	sinst->Score = 0;
279 }
280 
281 static void calc_score_deps(struct schedule_instruction * sinst)
282 {
283 	int i;
284 	sinst->Score = 0;
285 	for (i = 0; i < sinst->NumWriteValues; i++) {
286 		struct reg_value * v = sinst->WriteValues[i];
287 		if (v->NumReaders) {
288 			struct reg_value_reader * r;
289 			for (r = v->Readers; r; r = r->Next) {
290 				if (r->Reader->NumDependencies == 1) {
291 					sinst->Score += 100;
292 				}
293 				sinst->Score += r->Reader->NumDependencies;
294 			}
295 		}
296 	}
297 }
298 
299 #endif
300 
301 #define NO_OUTPUT_SCORE (1 << 24)
302 
score_no_output(struct schedule_instruction * sinst)303 static void score_no_output(struct schedule_instruction * sinst)
304 {
305 	assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
306 	if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
307 			!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
308 		if (sinst->PairedInst) {
309 			if (!sinst->PairedInst->Instruction->U.P.
310 							RGB.OutputWriteMask
311 					&& !sinst->PairedInst->Instruction->U.P.
312 							Alpha.OutputWriteMask) {
313 				sinst->Score |= NO_OUTPUT_SCORE;
314 			}
315 
316 		} else {
317 			sinst->Score |= NO_OUTPUT_SCORE;
318 		}
319 	}
320 }
321 
322 #define PAIRED_SCORE (1 << 16)
323 
calc_score_r300(struct schedule_instruction * sinst)324 static void calc_score_r300(struct schedule_instruction * sinst)
325 {
326 	unsigned src_idx;
327 
328 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
329 		sinst->Score = 0;
330 		return;
331 	}
332 
333 	score_no_output(sinst);
334 
335 	if (sinst->PairedInst) {
336 		sinst->Score |= PAIRED_SCORE;
337 		return;
338 	}
339 
340 	for (src_idx = 0; src_idx < 4; src_idx++) {
341 		sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
342 				sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
343 	}
344 }
345 
346 #define NO_READ_TEX_SCORE (1 << 16)
347 
calc_score_readers(struct schedule_instruction * sinst)348 static void calc_score_readers(struct schedule_instruction * sinst)
349 {
350 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
351 		sinst->Score = 0;
352 	} else {
353 		sinst->Score = sinst->NumReadValues;
354 		if (sinst->PairedInst) {
355 			sinst->Score += sinst->PairedInst->NumReadValues;
356 		}
357 		if (get_tex_read_count(sinst) == 0) {
358 			sinst->Score |= NO_READ_TEX_SCORE;
359 		}
360 		score_no_output(sinst);
361 	}
362 }
363 
364 /**
365  * This function decreases the dependencies of the next instruction that
366  * wants to write to each of sinst's read values.
367  */
commit_update_reads(struct schedule_state * s,struct schedule_instruction * sinst)368 static void commit_update_reads(struct schedule_state * s,
369 					struct schedule_instruction * sinst){
370 	unsigned int i;
371 	for(i = 0; i < sinst->NumReadValues; ++i) {
372 		struct reg_value * v = sinst->ReadValues[i];
373 		assert(v->NumReaders > 0);
374 		v->NumReaders--;
375 		if (!v->NumReaders) {
376 			if (v->Next) {
377 				decrease_dependencies(s, v->Next->Writer);
378 			}
379 		}
380 	}
381 	if (sinst->PairedInst) {
382 		commit_update_reads(s, sinst->PairedInst);
383 	}
384 }
385 
commit_update_writes(struct schedule_state * s,struct schedule_instruction * sinst)386 static void commit_update_writes(struct schedule_state * s,
387 					struct schedule_instruction * sinst){
388 	unsigned int i;
389 	for(i = 0; i < sinst->NumWriteValues; ++i) {
390 		struct reg_value * v = sinst->WriteValues[i];
391 		if (v->NumReaders) {
392 			for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
393 				decrease_dependencies(s, r->Reader);
394 			}
395 		} else {
396 			/* This happens in instruction sequences of the type
397 			 *  OP r.x, ...;
398 			 *  OP r.x, r.x, ...;
399 			 * See also the subtlety in how instructions that both
400 			 * read and write the same register are scanned.
401 			 */
402 			if (v->Next)
403 				decrease_dependencies(s, v->Next->Writer);
404 		}
405 	}
406 	if (sinst->PairedInst) {
407 		commit_update_writes(s, sinst->PairedInst);
408 	}
409 }
410 
notify_sem_wait(struct schedule_state * s)411 static void notify_sem_wait(struct schedule_state *s)
412 {
413 	struct rc_list * pend_ptr;
414 	for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
415 		struct rc_list * read_ptr;
416 		struct schedule_instruction * pending = pend_ptr->Item;
417 		for (read_ptr = pending->TexReaders; read_ptr;
418 						read_ptr = read_ptr->Next) {
419 			struct schedule_instruction * reader = read_ptr->Item;
420 			reader->TexReadCount--;
421 		}
422 	}
423 	s->PendingTEX = NULL;
424 }
425 
commit_alu_instruction(struct schedule_state * s,struct schedule_instruction * sinst)426 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
427 {
428 	DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
429 
430 	commit_update_reads(s, sinst);
431 
432 	commit_update_writes(s, sinst);
433 
434 	if (get_tex_read_count(sinst) > 0) {
435 		sinst->Instruction->U.P.SemWait = 1;
436 		notify_sem_wait(s);
437 	}
438 }
439 
440 /**
441  * Emit all ready texture instructions in a single block.
442  *
443  * Emit as a single block to (hopefully) sample many textures in parallel,
444  * and to avoid hardware indirections on R300.
445  */
emit_all_tex(struct schedule_state * s,struct rc_instruction * before)446 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
447 {
448 	struct schedule_instruction *readytex;
449 	struct rc_instruction * inst_begin;
450 
451 	assert(s->ReadyTEX);
452 	notify_sem_wait(s);
453 
454 	/* Node marker for R300 */
455 	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
456 	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
457 
458 	/* Link texture instructions back in */
459 	readytex = s->ReadyTEX;
460 	while(readytex) {
461 		rc_insert_instruction(before->Prev, readytex->Instruction);
462 		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
463 
464 		/* All of the TEX instructions in the same TEX block have
465 		 * their source registers read from before any of the
466 		 * instructions in that block write to their destination
467 		 * registers.  This means that when we commit a TEX
468 		 * instruction, any other TEX instruction that wants to write
469 		 * to one of the committed instruction's source register can be
470 		 * marked as ready and should be emitted in the same TEX
471 		 * block. This prevents the following sequence from being
472 		 * emitted in two different TEX blocks:
473 		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
474 		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
475 		 */
476 		commit_update_reads(s, readytex);
477 		readytex = readytex->NextReady;
478 	}
479 	readytex = s->ReadyTEX;
480 	s->ReadyTEX = 0;
481 	while(readytex){
482 		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
483 		commit_update_writes(s, readytex);
484 		/* Set semaphore bits for last TEX instruction in the block */
485 		if (!readytex->NextReady) {
486 			readytex->Instruction->U.I.TexSemAcquire = 1;
487 			readytex->Instruction->U.I.TexSemWait = 1;
488 		}
489 		rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
490 		readytex = readytex->NextReady;
491 	}
492 }
493 
494 /* This is a helper function for destructive_merge_instructions().  It helps
495  * merge presubtract sources from two instructions and makes sure the
496  * presubtract sources end up in the correct spot.  This function assumes that
497  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
498  * but no scalar instruction (alpha).
499  * @return 0 if merging the presubtract sources fails.
500  * @retrun 1 if merging the presubtract sources succeeds.
501  */
merge_presub_sources(struct rc_pair_instruction * dst_full,struct rc_pair_sub_instruction src,unsigned int type)502 static int merge_presub_sources(
503 	struct rc_pair_instruction * dst_full,
504 	struct rc_pair_sub_instruction src,
505 	unsigned int type)
506 {
507 	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
508 	struct rc_pair_sub_instruction * dst_sub;
509 	const struct rc_opcode_info * info;
510 
511 	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
512 
513 	switch(type) {
514 	case RC_SOURCE_RGB:
515 		is_rgb = 1;
516 		is_alpha = 0;
517 		dst_sub = &dst_full->RGB;
518 		break;
519 	case RC_SOURCE_ALPHA:
520 		is_rgb = 0;
521 		is_alpha = 1;
522 		dst_sub = &dst_full->Alpha;
523 		break;
524 	default:
525 		assert(0);
526 		return 0;
527 	}
528 
529 	info = rc_get_opcode_info(dst_full->RGB.Opcode);
530 
531 	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
532 		return 0;
533 
534 	srcp_regs = rc_presubtract_src_reg_count(
535 					src.Src[RC_PAIR_PRESUB_SRC].Index);
536 	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
537 		unsigned int arg;
538 		int free_source;
539 		unsigned int one_way = 0;
540 		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
541 		struct rc_pair_instruction_source temp;
542 
543 		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
544 							srcp.File, srcp.Index);
545 
546 		/* If free_source < 0 then there are no free source
547 		 * slots. */
548 		if (free_source < 0)
549 			return 0;
550 
551 		temp = dst_sub->Src[srcp_src];
552 		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
553 
554 		/* srcp needs src0 and src1 to be the same */
555 		if (free_source < srcp_src) {
556 			if (!temp.Used)
557 				continue;
558 			free_source = rc_pair_alloc_source(dst_full, is_rgb,
559 					is_alpha, temp.File, temp.Index);
560 			if (free_source < 0)
561 				return 0;
562 			one_way = 1;
563 		} else {
564 			dst_sub->Src[free_source] = temp;
565 		}
566 
567 		/* If free_source == srcp_src, then the presubtract
568 		 * source is already in the correct place. */
569 		if (free_source == srcp_src)
570 			continue;
571 
572 		/* Shuffle the sources, so we can put the
573 		 * presubtract source in the correct place. */
574 		for(arg = 0; arg < info->NumSrcRegs; arg++) {
575 			/*If this arg does not read from an rgb source,
576 			 * do nothing. */
577 			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
578 								& type)) {
579 				continue;
580 			}
581 
582 			if (dst_full->RGB.Arg[arg].Source == srcp_src)
583 				dst_full->RGB.Arg[arg].Source = free_source;
584 			/* We need to do this just in case register
585 			 * is one of the sources already, but in the
586 			 * wrong spot. */
587 			else if(dst_full->RGB.Arg[arg].Source == free_source
588 							&& !one_way) {
589 				dst_full->RGB.Arg[arg].Source = srcp_src;
590 			}
591 		}
592 	}
593 	return 1;
594 }
595 
596 
597 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
destructive_merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)598 static int destructive_merge_instructions(
599 		struct rc_pair_instruction * rgb,
600 		struct rc_pair_instruction * alpha)
601 {
602 	const struct rc_opcode_info * opcode;
603 
604 	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
605 	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
606 
607 	/* Presubtract registers need to be merged first so that registers
608 	 * needed by the presubtract operation can be placed in src0 and/or
609 	 * src1. */
610 
611 	/* Merge the rgb presubtract registers. */
612 	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
613 		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
614 			return 0;
615 		}
616 	}
617 	/* Merge the alpha presubtract registers */
618 	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
619 		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
620 			return 0;
621 		}
622 	}
623 
624 	/* Copy alpha args into rgb */
625 	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
626 
627 	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
628 		unsigned int srcrgb = 0;
629 		unsigned int srcalpha = 0;
630 		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
631 		rc_register_file file = 0;
632 		unsigned int index = 0;
633 		int source;
634 
635 		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
636 			srcrgb = 1;
637 			file = alpha->RGB.Src[oldsrc].File;
638 			index = alpha->RGB.Src[oldsrc].Index;
639 		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
640 			srcalpha = 1;
641 			file = alpha->Alpha.Src[oldsrc].File;
642 			index = alpha->Alpha.Src[oldsrc].Index;
643 		}
644 
645 		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
646 		if (source < 0)
647 			return 0;
648 
649 		rgb->Alpha.Arg[arg].Source = source;
650 		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
651 		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
652 		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
653 	}
654 
655 	/* Copy alpha opcode into rgb */
656 	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
657 	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
658 	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
659 	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
660 	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
661 	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
662 	rgb->Alpha.Omod = alpha->Alpha.Omod;
663 
664 	/* Merge ALU result writing */
665 	if (alpha->WriteALUResult) {
666 		if (rgb->WriteALUResult)
667 			return 0;
668 
669 		rgb->WriteALUResult = alpha->WriteALUResult;
670 		rgb->ALUResultCompare = alpha->ALUResultCompare;
671 	}
672 
673 	/* Copy SemWait */
674 	rgb->SemWait |= alpha->SemWait;
675 
676 	return 1;
677 }
678 
679 /**
680  * Try to merge the given instructions into the rgb instructions.
681  *
682  * Return true on success; on failure, return false, and keep
683  * the instructions untouched.
684  */
merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)685 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
686 {
687 	struct rc_pair_instruction backup;
688 
689 	/*Instructions can't write output registers and ALU result at the
690 	 * same time. */
691 	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
692 		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
693 		return 0;
694 	}
695 
696 	/* Writing output registers in the middle of shaders is slow, so
697 	 * we don't want to pair output writes with temp writes. */
698 	if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
699 		|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
700 		return 0;
701 	}
702 
703 	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
704 
705 	if (destructive_merge_instructions(rgb, alpha))
706 		return 1;
707 
708 	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
709 	return 0;
710 }
711 
presub_nop(struct rc_instruction * emitted)712 static void presub_nop(struct rc_instruction * emitted) {
713 	int prev_rgb_index, prev_alpha_index, i, num_src;
714 
715 	/* We don't need a nop if the previous instruction is a TEX. */
716 	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
717 		return;
718 	}
719 	if (emitted->Prev->U.P.RGB.WriteMask)
720 		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
721 	else
722 		prev_rgb_index = -1;
723 	if (emitted->Prev->U.P.Alpha.WriteMask)
724 		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
725 	else
726 		prev_alpha_index = 1;
727 
728 	/* Check the previous rgb instruction */
729 	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
730 		num_src = rc_presubtract_src_reg_count(
731 				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
732 		for (i = 0; i < num_src; i++) {
733 			unsigned int index = emitted->U.P.RGB.Src[i].Index;
734 			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
735 			    && (index  == prev_rgb_index
736 				|| index == prev_alpha_index)) {
737 				emitted->Prev->U.P.Nop = 1;
738 				return;
739 			}
740 		}
741 	}
742 
743 	/* Check the previous alpha instruction. */
744 	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
745 		return;
746 
747 	num_src = rc_presubtract_src_reg_count(
748 				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
749 	for (i = 0; i < num_src; i++) {
750 		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
751 		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
752 		   && (index == prev_rgb_index || index == prev_alpha_index)) {
753 			emitted->Prev->U.P.Nop = 1;
754 			return;
755 		}
756 	}
757 }
758 
rgb_to_alpha_remap(struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,rc_register_file old_file,rc_swizzle old_swz,unsigned int new_index)759 static void rgb_to_alpha_remap (
760 	struct rc_instruction * inst,
761 	struct rc_pair_instruction_arg * arg,
762 	rc_register_file old_file,
763 	rc_swizzle old_swz,
764 	unsigned int new_index)
765 {
766 	int new_src_index;
767 	unsigned int i;
768 
769 	for (i = 0; i < 3; i++) {
770 		if (get_swz(arg->Swizzle, i) == old_swz) {
771 			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
772 		}
773 	}
774 	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
775 							old_file, new_index);
776 	/* This conversion is not possible, we must have made a mistake in
777 	 * is_rgb_to_alpha_possible. */
778 	if (new_src_index < 0) {
779 		assert(0);
780 		return;
781 	}
782 
783 	arg->Source = new_src_index;
784 }
785 
can_remap(unsigned int opcode)786 static int can_remap(unsigned int opcode)
787 {
788 	switch(opcode) {
789 	case RC_OPCODE_DDX:
790 	case RC_OPCODE_DDY:
791 		return 0;
792 	default:
793 		return 1;
794 	}
795 }
796 
can_convert_opcode_to_alpha(unsigned int opcode)797 static int can_convert_opcode_to_alpha(unsigned int opcode)
798 {
799 	switch(opcode) {
800 	case RC_OPCODE_DDX:
801 	case RC_OPCODE_DDY:
802 	case RC_OPCODE_DP2:
803 	case RC_OPCODE_DP3:
804 	case RC_OPCODE_DP4:
805 	case RC_OPCODE_DPH:
806 		return 0;
807 	default:
808 		return 1;
809 	}
810 }
811 
is_rgb_to_alpha_possible(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)812 static void is_rgb_to_alpha_possible(
813 	void * userdata,
814 	struct rc_instruction * inst,
815 	struct rc_pair_instruction_arg * arg,
816 	struct rc_pair_instruction_source * src)
817 {
818 	unsigned int read_chan = RC_SWIZZLE_UNUSED;
819 	unsigned int alpha_sources = 0;
820 	unsigned int i;
821 	struct rc_reader_data * reader_data = userdata;
822 
823 	if (!can_remap(inst->U.P.RGB.Opcode)
824 	    || !can_remap(inst->U.P.Alpha.Opcode)) {
825 		reader_data->Abort = 1;
826 		return;
827 	}
828 
829 	if (!src)
830 		return;
831 
832 	/* XXX There are some cases where we can still do the conversion if
833 	 * a reader reads from a presubtract source, but for now we'll prevent
834 	 * it. */
835 	if (arg->Source == RC_PAIR_PRESUB_SRC) {
836 		reader_data->Abort = 1;
837 		return;
838 	}
839 
840 	/* Make sure the source only reads the register component that we
841 	 * are going to be convering from.  It is OK if the instruction uses
842 	 * this component more than once.
843 	 * XXX If the index we will be converting to is the same as the
844 	 * current index, then it is OK to read from more than one component.
845 	 */
846 	for (i = 0; i < 3; i++) {
847 		rc_swizzle swz = get_swz(arg->Swizzle, i);
848 		switch(swz) {
849 		case RC_SWIZZLE_X:
850 		case RC_SWIZZLE_Y:
851 		case RC_SWIZZLE_Z:
852 		case RC_SWIZZLE_W:
853 			if (read_chan == RC_SWIZZLE_UNUSED) {
854 				read_chan = swz;
855 			} else if (read_chan != swz) {
856 				reader_data->Abort = 1;
857 				return;
858 			}
859 			break;
860 		default:
861 			break;
862 		}
863 	}
864 
865 	/* Make sure there are enough alpha sources.
866 	 * XXX If we know what register all the readers are going
867 	 * to be remapped to, then in some situations we can still do
868 	 * the substitution, even if all 3 alpha sources are being used.*/
869 	for (i = 0; i < 3; i++) {
870 		if (inst->U.P.Alpha.Src[i].Used) {
871 			alpha_sources++;
872 		}
873 	}
874 	if (alpha_sources > 2) {
875 		reader_data->Abort = 1;
876 		return;
877 	}
878 }
879 
convert_rgb_to_alpha(struct schedule_state * s,struct schedule_instruction * sched_inst)880 static int convert_rgb_to_alpha(
881 	struct schedule_state * s,
882 	struct schedule_instruction * sched_inst)
883 {
884 	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
885 	unsigned int old_mask = pair_inst->RGB.WriteMask;
886 	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
887 	const struct rc_opcode_info * info =
888 				rc_get_opcode_info(pair_inst->RGB.Opcode);
889 	int new_index = -1;
890 	unsigned int i;
891 
892 	if (sched_inst->GlobalReaders.Abort)
893 		return 0;
894 
895 	if (!pair_inst->RGB.WriteMask)
896 		return 0;
897 
898 	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
899 	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
900 		return 0;
901 	}
902 
903 	assert(sched_inst->NumWriteValues == 1);
904 
905 	if (!sched_inst->WriteValues[0]) {
906 		assert(0);
907 		return 0;
908 	}
909 
910 	/* We start at the old index, because if we can reuse the same
911 	 * register and just change the swizzle then it is more likely we
912 	 * will be able to convert all the readers. */
913 	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
914 		struct reg_value ** new_regvalp = get_reg_valuep(
915 						s, RC_FILE_TEMPORARY, i, 3);
916 		if (!*new_regvalp) {
917 			struct reg_value ** old_regvalp =
918 				get_reg_valuep(s,
919 					RC_FILE_TEMPORARY,
920 					pair_inst->RGB.DestIndex,
921 					rc_mask_to_swizzle(old_mask));
922 			new_index = i;
923 			*new_regvalp = *old_regvalp;
924 			*old_regvalp = NULL;
925 			new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
926 			break;
927 		}
928 	}
929 	if (new_index < 0) {
930 		return 0;
931 	}
932 
933 	/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
934 	 * as the RGB opcode, then the Alpha instruction will already contain
935 	 * the correct opcode and instruction args, so we do not want to
936 	 * overwrite them.
937 	 */
938 	if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
939 		pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
940 		memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
941 						sizeof(pair_inst->Alpha.Arg));
942 	}
943 	pair_inst->Alpha.DestIndex = new_index;
944 	pair_inst->Alpha.WriteMask = RC_MASK_W;
945 	pair_inst->Alpha.Target = pair_inst->RGB.Target;
946 	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
947 	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
948 	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
949 	pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
950 	/* Move the swizzles into the first chan */
951 	for (i = 0; i < info->NumSrcRegs; i++) {
952 		unsigned int j;
953 		for (j = 0; j < 3; j++) {
954 			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
955 			if (swz != RC_SWIZZLE_UNUSED) {
956 				pair_inst->Alpha.Arg[i].Swizzle =
957 							rc_init_swizzle(swz, 1);
958 				break;
959 			}
960 		}
961 	}
962 	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
963 	pair_inst->RGB.DestIndex = 0;
964 	pair_inst->RGB.WriteMask = 0;
965 	pair_inst->RGB.Target = 0;
966 	pair_inst->RGB.OutputWriteMask = 0;
967 	pair_inst->RGB.DepthWriteMask = 0;
968 	pair_inst->RGB.Saturate = 0;
969 	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
970 
971 	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
972 		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
973 		rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
974 					RC_FILE_TEMPORARY, old_swz, new_index);
975 	}
976 	return 1;
977 }
978 
try_convert_and_pair(struct schedule_state * s,struct schedule_instruction ** inst_list)979 static void try_convert_and_pair(
980 	struct schedule_state *s,
981 	struct schedule_instruction ** inst_list)
982 {
983 	struct schedule_instruction * list_ptr = *inst_list;
984 	while (list_ptr && *inst_list && (*inst_list)->NextReady) {
985 		int paired = 0;
986 		if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
987 			&& list_ptr->Instruction->U.P.RGB.Opcode
988 						!= RC_OPCODE_REPL_ALPHA) {
989 				goto next;
990 		}
991 		if (list_ptr->NumWriteValues == 1
992 					&& convert_rgb_to_alpha(s, list_ptr)) {
993 
994 			struct schedule_instruction * pair_ptr;
995 			remove_inst_from_list(inst_list, list_ptr);
996 			add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
997 
998 			for (pair_ptr = s->ReadyRGB; pair_ptr;
999 					pair_ptr = pair_ptr->NextReady) {
1000 				if (merge_instructions(&pair_ptr->Instruction->U.P,
1001 						&list_ptr->Instruction->U.P)) {
1002 					remove_inst_from_list(&s->ReadyAlpha, list_ptr);
1003 					remove_inst_from_list(&s->ReadyRGB, pair_ptr);
1004 					pair_ptr->PairedInst = list_ptr;
1005 
1006 					add_inst_to_list(&s->ReadyFullALU, pair_ptr);
1007 					list_ptr = *inst_list;
1008 					paired = 1;
1009 					break;
1010 				}
1011 
1012 			}
1013 		}
1014 		if (!paired) {
1015 next:
1016 			list_ptr = list_ptr->NextReady;
1017 		}
1018 	}
1019 }
1020 
1021 /**
1022  * This function attempts to merge RGB and Alpha instructions together.
1023  */
pair_instructions(struct schedule_state * s)1024 static void pair_instructions(struct schedule_state * s)
1025 {
1026 	struct schedule_instruction *rgb_ptr;
1027 	struct schedule_instruction *alpha_ptr;
1028 
1029 	/* Some pairings might fail because they require too
1030 	 * many source slots; try all possible pairings if necessary */
1031 	rgb_ptr = s->ReadyRGB;
1032 	while(rgb_ptr) {
1033 		struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1034 		alpha_ptr = s->ReadyAlpha;
1035 		while(alpha_ptr) {
1036 			struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1037 			if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1038 				/* Remove RGB and Alpha from their ready lists.
1039 				 */
1040 				remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1041 				remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1042 				rgb_ptr->PairedInst = alpha_ptr;
1043 				add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1044 				break;
1045 			}
1046 			alpha_ptr = alpha_next;
1047 		}
1048 		rgb_ptr = rgb_next;
1049 	}
1050 
1051 	if (!s->Opt) {
1052 		return;
1053 	}
1054 
1055 	/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1056 	 * slot can be converted into Alpha instructions. */
1057 	try_convert_and_pair(s, &s->ReadyFullALU);
1058 
1059 	/* Try to convert some of the RGB instructions to Alpha and
1060 	 * try to pair it with another RGB. */
1061 	try_convert_and_pair(s, &s->ReadyRGB);
1062 }
1063 
update_max_score(struct schedule_state * s,struct schedule_instruction ** list,int * max_score,struct schedule_instruction ** max_inst_out,struct schedule_instruction *** list_out)1064 static void update_max_score(
1065 	struct schedule_state * s,
1066 	struct schedule_instruction ** list,
1067 	int * max_score,
1068 	struct schedule_instruction ** max_inst_out,
1069 	struct schedule_instruction *** list_out)
1070 {
1071 	struct schedule_instruction * list_ptr;
1072 	for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1073 		int score;
1074 		s->CalcScore(list_ptr);
1075 		score = list_ptr->Score;
1076 		if (!*max_inst_out || score > *max_score) {
1077 			*max_score = score;
1078 			*max_inst_out = list_ptr;
1079 			*list_out = list;
1080 		}
1081 	}
1082 }
1083 
emit_instruction(struct schedule_state * s,struct rc_instruction * before)1084 static void emit_instruction(
1085 	struct schedule_state * s,
1086 	struct rc_instruction * before)
1087 {
1088 	int max_score = -1;
1089 	struct schedule_instruction * max_inst = NULL;
1090 	struct schedule_instruction ** max_list = NULL;
1091 	unsigned tex_count = 0;
1092 	struct schedule_instruction * tex_ptr;
1093 
1094 	pair_instructions(s);
1095 #if VERBOSE
1096 	fprintf(stderr, "Full:\n");
1097 	print_list(s->ReadyFullALU);
1098 	fprintf(stderr, "RGB:\n");
1099 	print_list(s->ReadyRGB);
1100 	fprintf(stderr, "Alpha:\n");
1101 	print_list(s->ReadyAlpha);
1102 	fprintf(stderr, "TEX:\n");
1103 	print_list(s->ReadyTEX);
1104 #endif
1105 
1106 	for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1107 		if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1108 			emit_all_tex(s, before);
1109 			return;
1110 		}
1111 		tex_count++;
1112 	}
1113 	update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1114 	update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1115 	update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1116 
1117 	if (tex_count >= s->max_tex_group || max_score == -1
1118 		|| (s->TEXCount > 0 && tex_count == s->TEXCount)
1119 		|| (!s->C->is_r500 && tex_count > 0 && max_score == -1)) {
1120 		emit_all_tex(s, before);
1121 	} else {
1122 
1123 
1124 		remove_inst_from_list(max_list, max_inst);
1125 		rc_insert_instruction(before->Prev, max_inst->Instruction);
1126 		commit_alu_instruction(s, max_inst);
1127 
1128 		presub_nop(before->Prev);
1129 	}
1130 }
1131 
add_tex_reader(struct schedule_state * s,struct schedule_instruction * writer,struct schedule_instruction * reader)1132 static void add_tex_reader(
1133 	struct schedule_state * s,
1134 	struct schedule_instruction * writer,
1135 	struct schedule_instruction * reader)
1136 {
1137 	if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1138 		/*Not a TEX instructions */
1139 		return;
1140 	}
1141 	reader->TexReadCount++;
1142 	rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1143 }
1144 
scan_read(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1145 static void scan_read(void * data, struct rc_instruction * inst,
1146 		rc_register_file file, unsigned int index, unsigned int chan)
1147 {
1148 	struct schedule_state * s = data;
1149 	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1150 	struct reg_value_reader * reader;
1151 
1152 	if (!v)
1153 		return;
1154 
1155 	if (*v && (*v)->Writer == s->Current) {
1156 		/* The instruction reads and writes to a register component.
1157 		 * In this case, we only want to increment dependencies by one.
1158 		 * Why?
1159 		 * Because each instruction depends on the writers of its source
1160 		 * registers _and_ the most recent writer of its destination
1161 		 * register.  In this case, the current instruction (s->Current)
1162 		 * has a dependency that both writes to one of its source
1163 		 * registers and was the most recent writer to its destination
1164 		 * register.  We have already marked this dependency in
1165 		 * scan_write(), so we don't need to do it again.
1166 		 */
1167 
1168 		/* We need to make sure we are adding s->Current to the
1169 		 * previous writer's list of TexReaders, if the previous writer
1170 		 * was a TEX instruction.
1171 		 */
1172 		add_tex_reader(s, s->PrevWriter[chan], s->Current);
1173 
1174 		return;
1175 	}
1176 
1177 	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1178 
1179 	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1180 	reader->Reader = s->Current;
1181 	if (!*v) {
1182 		/* In this situation, the instruction reads from a register
1183 		 * that hasn't been written to or read from in the current
1184 		 * block. */
1185 		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1186 		memset(*v, 0, sizeof(struct reg_value));
1187 		(*v)->Readers = reader;
1188 	} else {
1189 		reader->Next = (*v)->Readers;
1190 		(*v)->Readers = reader;
1191 		/* Only update the current instruction's dependencies if the
1192 		 * register it reads from has been written to in this block. */
1193 		if ((*v)->Writer) {
1194 			add_tex_reader(s, (*v)->Writer, s->Current);
1195 			s->Current->NumDependencies++;
1196 		}
1197 	}
1198 	(*v)->NumReaders++;
1199 
1200 	if (s->Current->NumReadValues >= 12) {
1201 		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
1202 	} else {
1203 		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1204 	}
1205 }
1206 
scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1207 static void scan_write(void * data, struct rc_instruction * inst,
1208 		rc_register_file file, unsigned int index, unsigned int chan)
1209 {
1210 	struct schedule_state * s = data;
1211 	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1212 	struct reg_value * newv;
1213 
1214 	if (!pv)
1215 		return;
1216 
1217 	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1218 
1219 	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1220 	memset(newv, 0, sizeof(*newv));
1221 
1222 	newv->Writer = s->Current;
1223 
1224 	if (*pv) {
1225 		(*pv)->Next = newv;
1226 		s->Current->NumDependencies++;
1227 		/* Keep track of the previous writer to s->Current's destination
1228 		 * register */
1229 		s->PrevWriter[chan] = (*pv)->Writer;
1230 	}
1231 
1232 	*pv = newv;
1233 
1234 	if (s->Current->NumWriteValues >= 4) {
1235 		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
1236 	} else {
1237 		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1238 	}
1239 }
1240 
is_rgb_to_alpha_possible_normal(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)1241 static void is_rgb_to_alpha_possible_normal(
1242 	void * userdata,
1243 	struct rc_instruction * inst,
1244 	struct rc_src_register * src)
1245 {
1246 	struct rc_reader_data * reader_data = userdata;
1247 	reader_data->Abort = 1;
1248 
1249 }
1250 
schedule_block(struct schedule_state * s,struct rc_instruction * begin,struct rc_instruction * end)1251 static void schedule_block(struct schedule_state * s,
1252 		struct rc_instruction * begin, struct rc_instruction * end)
1253 {
1254 	unsigned int ip;
1255 
1256 	/* Scan instructions for data dependencies */
1257 	ip = 0;
1258 	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1259 		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1260 		memset(s->Current, 0, sizeof(struct schedule_instruction));
1261 
1262 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
1263 			const struct rc_opcode_info * info =
1264 					rc_get_opcode_info(inst->U.I.Opcode);
1265 			if (info->HasTexture) {
1266 				s->TEXCount++;
1267 			}
1268 		}
1269 
1270 		/* XXX: This causes SemWait to be set for all instructions in
1271 		 * a block if the previous block contained a TEX instruction.
1272 		 * We can do better here, but it will take a lot of work. */
1273 		if (s->PrevBlockHasTex) {
1274 			s->Current->TexReadCount = 1;
1275 		}
1276 
1277 		s->Current->Instruction = inst;
1278 		inst->IP = ip++;
1279 
1280 		DBG("%i: Scanning\n", inst->IP);
1281 
1282 		/* The order of things here is subtle and maybe slightly
1283 		 * counter-intuitive, to account for the case where an
1284 		 * instruction writes to the same register as it reads
1285 		 * from. */
1286 		rc_for_all_writes_chan(inst, &scan_write, s);
1287 		rc_for_all_reads_chan(inst, &scan_read, s);
1288 
1289 		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1290 
1291 		if (!s->Current->NumDependencies) {
1292 			instruction_ready(s, s->Current);
1293 		}
1294 
1295 		/* Get global readers for possible RGB->Alpha conversion. */
1296 		s->Current->GlobalReaders.ExitOnAbort = 1;
1297 		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1298 				is_rgb_to_alpha_possible_normal,
1299 				is_rgb_to_alpha_possible, NULL);
1300 	}
1301 
1302 	/* Temporarily unlink all instructions */
1303 	begin->Prev->Next = end;
1304 	end->Prev = begin->Prev;
1305 
1306 	/* Schedule instructions back */
1307 	while(!s->C->Error &&
1308 	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1309 		emit_instruction(s, end);
1310 	}
1311 }
1312 
is_controlflow(struct rc_instruction * inst)1313 static int is_controlflow(struct rc_instruction * inst)
1314 {
1315 	if (inst->Type == RC_INSTRUCTION_NORMAL) {
1316 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1317 		return opcode->IsFlowControl;
1318 	}
1319 	return 0;
1320 }
1321 
rc_pair_schedule(struct radeon_compiler * cc,void * user)1322 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1323 {
1324 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1325 	struct schedule_state s;
1326 	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1327 	unsigned int * opt = user;
1328 
1329 	memset(&s, 0, sizeof(s));
1330 	s.Opt = *opt;
1331 	s.C = &c->Base;
1332 	if (s.C->is_r500) {
1333 		s.CalcScore = calc_score_readers;
1334 	} else {
1335 		s.CalcScore = calc_score_r300;
1336 	}
1337 	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1338 	while(inst != &c->Base.Program.Instructions) {
1339 		struct rc_instruction * first;
1340 
1341 		if (is_controlflow(inst)) {
1342 			inst = inst->Next;
1343 			continue;
1344 		}
1345 
1346 		first = inst;
1347 
1348 		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1349 			inst = inst->Next;
1350 
1351 		DBG("Schedule one block\n");
1352 		memset(s.Temporary, 0, sizeof(s.Temporary));
1353 		s.TEXCount = 0;
1354 		schedule_block(&s, first, inst);
1355 		if (s.PendingTEX) {
1356 			s.PrevBlockHasTex = 1;
1357 		}
1358 	}
1359 }
1360