1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "util/u_debug.h"
31 #include "pipe/p_state.h"
32 #include "radeon_dataflow.h"
33 #include "radeon_program.h"
34 #include "radeon_program_pair.h"
35 #include "radeon_regalloc.h"
36 #include "radeon_compiler_util.h"
37
38
rc_init(struct radeon_compiler * c,const struct rc_regalloc_state * rs)39 void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
40 {
41 memset(c, 0, sizeof(*c));
42
43 memory_pool_init(&c->Pool);
44 c->Program.Instructions.Prev = &c->Program.Instructions;
45 c->Program.Instructions.Next = &c->Program.Instructions;
46 c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
47 c->regalloc_state = rs;
48 c->max_temp_index = -1;
49 }
50
rc_destroy(struct radeon_compiler * c)51 void rc_destroy(struct radeon_compiler * c)
52 {
53 rc_constants_destroy(&c->Program.Constants);
54 memory_pool_destroy(&c->Pool);
55 free(c->ErrorMsg);
56 }
57
rc_debug(struct radeon_compiler * c,const char * fmt,...)58 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
59 {
60 va_list ap;
61
62 if (!(c->Debug & RC_DBG_LOG))
63 return;
64
65 va_start(ap, fmt);
66 vfprintf(stderr, fmt, ap);
67 va_end(ap);
68 }
69
rc_error(struct radeon_compiler * c,const char * fmt,...)70 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
71 {
72 va_list ap;
73
74 c->Error = 1;
75
76 if (!c->ErrorMsg) {
77 /* Only remember the first error */
78 char buf[1024];
79 int written;
80
81 va_start(ap, fmt);
82 written = vsnprintf(buf, sizeof(buf), fmt, ap);
83 va_end(ap);
84
85 if (written < sizeof(buf)) {
86 c->ErrorMsg = strdup(buf);
87 } else {
88 c->ErrorMsg = malloc(written + 1);
89
90 va_start(ap, fmt);
91 vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
92 va_end(ap);
93 }
94 }
95
96 if (c->Debug & RC_DBG_LOG) {
97 fprintf(stderr, "r300compiler error: ");
98
99 va_start(ap, fmt);
100 vfprintf(stderr, fmt, ap);
101 va_end(ap);
102 }
103 }
104
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)105 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
106 {
107 rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
108 return 1;
109 }
110
rc_mark_unused_channels(struct radeon_compiler * c,void * user)111 void rc_mark_unused_channels(struct radeon_compiler * c, void *user)
112 {
113 unsigned int srcmasks[3];
114
115 for(struct rc_instruction * inst = c->Program.Instructions.Next;
116 inst != &c->Program.Instructions;
117 inst = inst->Next) {
118
119 rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks);
120
121 for(unsigned int src = 0; src < 3; ++src) {
122 for(unsigned int chan = 0; chan < 4; ++chan) {
123 if (!GET_BIT(srcmasks[src], chan))
124 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
125 }
126 }
127 }
128 }
129
130 /**
131 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
132 * based on which inputs and outputs are actually referenced
133 * in program instructions.
134 */
rc_calculate_inputs_outputs(struct radeon_compiler * c)135 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
136 {
137 struct rc_instruction *inst;
138
139 c->Program.InputsRead = 0;
140 c->Program.OutputsWritten = 0;
141
142 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
143 {
144 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
145 int i;
146
147 for (i = 0; i < opcode->NumSrcRegs; ++i) {
148 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
149 c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
150 }
151
152 if (opcode->HasDstReg) {
153 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
154 c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
155 }
156 }
157 }
158
159 /**
160 * Rewrite the program such that a given output is duplicated.
161 */
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)162 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
163 {
164 unsigned tempreg = rc_find_free_temporary(c);
165 struct rc_instruction * inst;
166 struct rc_instruction * insert_pos = c->Program.Instructions.Prev;
167 struct rc_instruction * last_write_inst = NULL;
168 unsigned branch_depth = 0;
169 unsigned loop_depth = 0;
170 bool emit_after_control_flow = false;
171 unsigned num_writes = 0;
172
173 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
174 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
175
176 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
177 loop_depth++;
178 if (inst->U.I.Opcode == RC_OPCODE_IF)
179 branch_depth++;
180 if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
181 (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
182 if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
183 insert_pos = inst;
184 emit_after_control_flow = false;
185 }
186
187 if (opcode->HasDstReg) {
188 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
189 num_writes++;
190 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
191 inst->U.I.DstReg.Index = tempreg;
192 insert_pos = inst;
193 last_write_inst = inst;
194 if (loop_depth != 0 && branch_depth != 0)
195 emit_after_control_flow = true;
196 }
197 }
198 }
199
200 /* If there is only a single write, just duplicate the whole instruction instead.
201 * We can do this even when the single write was is a control flow.
202 */
203 if (num_writes == 1) {
204 last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
205 last_write_inst->U.I.DstReg.Index = output;
206
207 inst = rc_insert_new_instruction(c, last_write_inst);
208 struct rc_instruction * prev = inst->Prev;
209 struct rc_instruction * next = inst->Next;
210 memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
211 inst->Prev = prev;
212 inst->Next = next;
213 inst->U.I.DstReg.Index = dup_output;
214 } else {
215 inst = rc_insert_new_instruction(c, insert_pos);
216 inst->U.I.Opcode = RC_OPCODE_MOV;
217 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
218 inst->U.I.DstReg.Index = output;
219
220 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
221 inst->U.I.SrcReg[0].Index = tempreg;
222 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
223
224 inst = rc_insert_new_instruction(c, inst);
225 inst->U.I.Opcode = RC_OPCODE_MOV;
226 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
227 inst->U.I.DstReg.Index = dup_output;
228
229 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
230 inst->U.I.SrcReg[0].Index = tempreg;
231 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
232 }
233
234 c->Program.OutputsWritten |= 1U << dup_output;
235 }
236
237
238 /**
239 * Introduce standard code fragment to deal with fragment.position.
240 */
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)241 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
242 int full_vtransform)
243 {
244 unsigned tempregi = rc_find_free_temporary(c);
245 struct rc_instruction * inst_rcp;
246 struct rc_instruction * inst_mul;
247 struct rc_instruction * inst_mad;
248 struct rc_instruction * inst;
249
250 c->Program.InputsRead &= ~(1U << wpos);
251 c->Program.InputsRead |= 1U << new_input;
252
253 /* perspective divide */
254 inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
255 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
256
257 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
258 inst_rcp->U.I.DstReg.Index = tempregi;
259 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
260
261 inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
262 inst_rcp->U.I.SrcReg[0].Index = new_input;
263 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
264
265 inst_mul = rc_insert_new_instruction(c, inst_rcp);
266 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
267
268 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
269 inst_mul->U.I.DstReg.Index = tempregi;
270 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
271
272 inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
273 inst_mul->U.I.SrcReg[0].Index = new_input;
274
275 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
276 inst_mul->U.I.SrcReg[1].Index = tempregi;
277 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
278
279 /* viewport transformation */
280 inst_mad = rc_insert_new_instruction(c, inst_mul);
281 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
282
283 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
284 inst_mad->U.I.DstReg.Index = tempregi;
285 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
286
287 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
288 inst_mad->U.I.SrcReg[0].Index = tempregi;
289 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
290
291 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
292 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
293
294 inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
295 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
296
297 if (full_vtransform) {
298 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
299 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
300 } else {
301 inst_mad->U.I.SrcReg[1].Index =
302 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
303 }
304
305 for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
306 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
307 unsigned i;
308
309 for(i = 0; i < opcode->NumSrcRegs; i++) {
310 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
311 inst->U.I.SrcReg[i].Index == wpos) {
312 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
313 inst->U.I.SrcReg[i].Index = tempregi;
314 }
315 }
316 }
317 }
318
319
320 /**
321 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
322 * Gallium and OpenGL define it the other way around.
323 *
324 * So let's just negate FACE at the beginning of the shader and rewrite the rest
325 * of the shader to read from the newly allocated temporary.
326 */
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)327 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
328 {
329 unsigned tempregi = rc_find_free_temporary(c);
330 struct rc_instruction *inst_add;
331 struct rc_instruction *inst;
332
333 /* perspective divide */
334 inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
335 inst_add->U.I.Opcode = RC_OPCODE_ADD;
336
337 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
338 inst_add->U.I.DstReg.Index = tempregi;
339 inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
340
341 inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
342 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
343
344 inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
345 inst_add->U.I.SrcReg[1].Index = face;
346 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
347 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
348
349 for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
350 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
351 unsigned i;
352
353 for(i = 0; i < opcode->NumSrcRegs; i++) {
354 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
355 inst->U.I.SrcReg[i].Index == face) {
356 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
357 inst->U.I.SrcReg[i].Index = tempregi;
358 }
359 }
360 }
361 }
362
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)363 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
364 rc_register_file file, unsigned int index, unsigned int mask)
365 {
366 struct rc_program_stats *s = userdata;
367 if (file == RC_FILE_TEMPORARY)
368 (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
369 if (file == RC_FILE_INLINE)
370 s->num_inline_literals++;
371 if (file == RC_FILE_CONSTANT)
372 s->num_consts = MAX2(s->num_consts, index + 1);
373 }
374
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)375 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
376 {
377 struct rc_instruction * tmp;
378 memset(s, 0, sizeof(*s));
379 unsigned ip = 0;
380 int last_begintex = -1;
381
382 for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
383 tmp = tmp->Next, ip++){
384 const struct rc_opcode_info * info;
385 rc_for_all_reads_mask(tmp, reg_count_callback, s);
386 if (tmp->Type == RC_INSTRUCTION_NORMAL) {
387 info = rc_get_opcode_info(tmp->U.I.Opcode);
388 if (info->Opcode == RC_OPCODE_BEGIN_TEX) {
389 /* The R5xx docs mention ~30 cycles in section 8.3.1 */
390 s->num_cycles += 30;
391 last_begintex = ip;
392 continue;
393 }
394 if (info->Opcode == RC_OPCODE_MAD &&
395 rc_inst_has_three_diff_temp_srcs(tmp))
396 s->num_cycles++;
397 } else {
398 if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
399 s->num_presub_ops++;
400 if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
401 s->num_presub_ops++;
402 /* Assuming alpha will never be a flow control or
403 * a tex instruction. */
404 if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
405 s->num_alpha_insts++;
406 if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
407 s->num_rgb_insts++;
408 if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
409 tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
410 s->num_omod_ops++;
411 }
412 if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
413 tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
414 s->num_omod_ops++;
415 }
416 if (tmp->U.P.Nop)
417 s->num_cycles++;
418 /* SemWait has effect only on R500, the more instructions we can put
419 * between the tex block and the first texture semaphore, the better.
420 */
421 if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) {
422 s->num_cycles -= MIN2(30, ip - last_begintex);
423 last_begintex = -1;
424 }
425 info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
426 }
427 if (info->IsFlowControl) {
428 s->num_fc_insts++;
429 if (info->Opcode == RC_OPCODE_BGNLOOP)
430 s->num_loops++;
431 }
432 /* VS flow control was already translated to the predicate instructions */
433 if (c->type == RC_VERTEX_PROGRAM)
434 if (strstr(info->Name, "PRED") != NULL)
435 s->num_pred_insts++;
436
437 if (info->HasTexture)
438 s->num_tex_insts++;
439 s->num_insts++;
440 s->num_cycles++;
441 }
442 /* Increment here because the reg_count_callback store the max
443 * temporary reg index in s->nun_temp_regs. */
444 s->num_temp_regs++;
445 }
446
print_stats(struct radeon_compiler * c)447 static void print_stats(struct radeon_compiler * c)
448 {
449 struct rc_program_stats s;
450
451 rc_get_stats(c, &s);
452
453 /* Note that we print some dummy values for instruction categories that
454 * only the FS has, because shader-db's report.py wants all shaders to
455 * have the same set.
456 */
457 util_debug_message(c->debug, SHADER_INFO,
458 "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol,"
459 "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles",
460 c->type == RC_VERTEX_PROGRAM ? "VS" : "FS",
461 s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts,
462 s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
463 s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals,
464 s.num_cycles);
465 }
466
467 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
468 "Vertex Program",
469 "Fragment Program"
470 };
471
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)472 bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
473 {
474 for (unsigned i = 0; list[i].name; i++) {
475 if (list[i].predicate) {
476 list[i].run(c, list[i].user);
477
478 if (c->Error)
479 return false;
480
481 if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
482 fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
483 rc_print_program(&c->Program);
484 }
485 }
486 }
487 return true;
488 }
489
490 /* Executes a list of compiler passes given in the parameter 'list'. */
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)491 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
492 {
493 if (c->Debug & RC_DBG_LOG) {
494 fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
495 rc_print_program(&c->Program);
496 }
497
498 if(rc_run_compiler_passes(c, list)) {
499 print_stats(c);
500 }
501 }
502
rc_validate_final_shader(struct radeon_compiler * c,void * user)503 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
504 {
505 /* Check the number of constants. */
506 if (c->Program.Constants.Count > c->max_constants) {
507 rc_error(c, "Too many constants. Max: %i, Got: %i\n",
508 c->max_constants, c->Program.Constants.Count);
509 }
510 }
511