1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "util/u_debug.h"
31 #include "pipe/p_state.h"
32 #include "radeon_dataflow.h"
33 #include "radeon_program.h"
34 #include "radeon_program_pair.h"
35 #include "radeon_regalloc.h"
36 #include "radeon_compiler_util.h"
37
38
rc_init(struct radeon_compiler * c,const struct rc_regalloc_state * rs)39 void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
40 {
41 memset(c, 0, sizeof(*c));
42
43 memory_pool_init(&c->Pool);
44 c->Program.Instructions.Prev = &c->Program.Instructions;
45 c->Program.Instructions.Next = &c->Program.Instructions;
46 c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
47 c->regalloc_state = rs;
48 }
49
rc_destroy(struct radeon_compiler * c)50 void rc_destroy(struct radeon_compiler * c)
51 {
52 rc_constants_destroy(&c->Program.Constants);
53 memory_pool_destroy(&c->Pool);
54 free(c->ErrorMsg);
55 }
56
rc_debug(struct radeon_compiler * c,const char * fmt,...)57 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
58 {
59 va_list ap;
60
61 if (!(c->Debug & RC_DBG_LOG))
62 return;
63
64 va_start(ap, fmt);
65 vfprintf(stderr, fmt, ap);
66 va_end(ap);
67 }
68
rc_error(struct radeon_compiler * c,const char * fmt,...)69 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
70 {
71 va_list ap;
72
73 c->Error = 1;
74
75 if (!c->ErrorMsg) {
76 /* Only remember the first error */
77 char buf[1024];
78 int written;
79
80 va_start(ap, fmt);
81 written = vsnprintf(buf, sizeof(buf), fmt, ap);
82 va_end(ap);
83
84 if (written < sizeof(buf)) {
85 c->ErrorMsg = strdup(buf);
86 } else {
87 c->ErrorMsg = malloc(written + 1);
88
89 va_start(ap, fmt);
90 vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
91 va_end(ap);
92 }
93 }
94
95 if (c->Debug & RC_DBG_LOG) {
96 fprintf(stderr, "r300compiler error: ");
97
98 va_start(ap, fmt);
99 vfprintf(stderr, fmt, ap);
100 va_end(ap);
101 }
102 }
103
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)104 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
105 {
106 rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
107 return 1;
108 }
109
110 /**
111 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
112 * based on which inputs and outputs are actually referenced
113 * in program instructions.
114 */
rc_calculate_inputs_outputs(struct radeon_compiler * c)115 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
116 {
117 struct rc_instruction *inst;
118
119 c->Program.InputsRead = 0;
120 c->Program.OutputsWritten = 0;
121
122 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
123 {
124 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
125 int i;
126
127 for (i = 0; i < opcode->NumSrcRegs; ++i) {
128 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
129 c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
130 }
131
132 if (opcode->HasDstReg) {
133 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
134 c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
135 }
136 }
137 }
138
139 /**
140 * Rewrite the program such that a given output is duplicated.
141 */
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)142 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
143 {
144 unsigned tempreg = rc_find_free_temporary(c);
145 struct rc_instruction * inst;
146 struct rc_instruction * insert_pos = c->Program.Instructions.Prev;
147 struct rc_instruction * last_write_inst = NULL;
148 unsigned branch_depth = 0;
149 unsigned loop_depth = 0;
150 bool emit_after_control_flow = false;
151 unsigned num_writes = 0;
152
153 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
154 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
155
156 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
157 loop_depth++;
158 if (inst->U.I.Opcode == RC_OPCODE_IF)
159 branch_depth++;
160 if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
161 (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
162 if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
163 insert_pos = inst;
164 emit_after_control_flow = false;
165 }
166
167 if (opcode->HasDstReg) {
168 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
169 num_writes++;
170 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
171 inst->U.I.DstReg.Index = tempreg;
172 insert_pos = inst;
173 last_write_inst = inst;
174 if (loop_depth != 0 && branch_depth != 0)
175 emit_after_control_flow = true;
176 }
177 }
178 }
179
180 /* If there is only a single write, just duplicate the whole instruction instead.
181 * We can do this even when the single write was is a control flow.
182 */
183 if (num_writes == 1) {
184 last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
185 last_write_inst->U.I.DstReg.Index = output;
186
187 inst = rc_insert_new_instruction(c, last_write_inst);
188 struct rc_instruction * prev = inst->Prev;
189 struct rc_instruction * next = inst->Next;
190 memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
191 inst->Prev = prev;
192 inst->Next = next;
193 inst->U.I.DstReg.Index = dup_output;
194 } else {
195 inst = rc_insert_new_instruction(c, insert_pos);
196 inst->U.I.Opcode = RC_OPCODE_MOV;
197 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
198 inst->U.I.DstReg.Index = output;
199
200 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
201 inst->U.I.SrcReg[0].Index = tempreg;
202 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
203
204 inst = rc_insert_new_instruction(c, inst);
205 inst->U.I.Opcode = RC_OPCODE_MOV;
206 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
207 inst->U.I.DstReg.Index = dup_output;
208
209 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
210 inst->U.I.SrcReg[0].Index = tempreg;
211 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
212 }
213
214 c->Program.OutputsWritten |= 1U << dup_output;
215 }
216
217
218 /**
219 * Introduce standard code fragment to deal with fragment.position.
220 */
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)221 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
222 int full_vtransform)
223 {
224 unsigned tempregi = rc_find_free_temporary(c);
225 struct rc_instruction * inst_rcp;
226 struct rc_instruction * inst_mul;
227 struct rc_instruction * inst_mad;
228 struct rc_instruction * inst;
229
230 c->Program.InputsRead &= ~(1U << wpos);
231 c->Program.InputsRead |= 1U << new_input;
232
233 /* perspective divide */
234 inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
235 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
236
237 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
238 inst_rcp->U.I.DstReg.Index = tempregi;
239 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
240
241 inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
242 inst_rcp->U.I.SrcReg[0].Index = new_input;
243 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
244
245 inst_mul = rc_insert_new_instruction(c, inst_rcp);
246 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
247
248 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
249 inst_mul->U.I.DstReg.Index = tempregi;
250 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
251
252 inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
253 inst_mul->U.I.SrcReg[0].Index = new_input;
254
255 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
256 inst_mul->U.I.SrcReg[1].Index = tempregi;
257 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
258
259 /* viewport transformation */
260 inst_mad = rc_insert_new_instruction(c, inst_mul);
261 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
262
263 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
264 inst_mad->U.I.DstReg.Index = tempregi;
265 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
266
267 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
268 inst_mad->U.I.SrcReg[0].Index = tempregi;
269 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
270
271 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
272 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
273
274 inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
275 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
276
277 if (full_vtransform) {
278 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
279 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
280 } else {
281 inst_mad->U.I.SrcReg[1].Index =
282 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
283 }
284
285 for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
286 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
287 unsigned i;
288
289 for(i = 0; i < opcode->NumSrcRegs; i++) {
290 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
291 inst->U.I.SrcReg[i].Index == wpos) {
292 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
293 inst->U.I.SrcReg[i].Index = tempregi;
294 }
295 }
296 }
297 }
298
299
300 /**
301 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
302 * Gallium and OpenGL define it the other way around.
303 *
304 * So let's just negate FACE at the beginning of the shader and rewrite the rest
305 * of the shader to read from the newly allocated temporary.
306 */
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)307 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
308 {
309 unsigned tempregi = rc_find_free_temporary(c);
310 struct rc_instruction *inst_add;
311 struct rc_instruction *inst;
312
313 /* perspective divide */
314 inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
315 inst_add->U.I.Opcode = RC_OPCODE_ADD;
316
317 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
318 inst_add->U.I.DstReg.Index = tempregi;
319 inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
320
321 inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
322 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
323
324 inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
325 inst_add->U.I.SrcReg[1].Index = face;
326 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
327 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
328
329 for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
330 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
331 unsigned i;
332
333 for(i = 0; i < opcode->NumSrcRegs; i++) {
334 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
335 inst->U.I.SrcReg[i].Index == face) {
336 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
337 inst->U.I.SrcReg[i].Index = tempregi;
338 }
339 }
340 }
341 }
342
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)343 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
344 rc_register_file file, unsigned int index, unsigned int mask)
345 {
346 struct rc_program_stats *s = userdata;
347 if (file == RC_FILE_TEMPORARY)
348 (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
349 if (file == RC_FILE_INLINE)
350 s->num_inline_literals++;
351 if (file == RC_FILE_CONSTANT)
352 s->num_consts = MAX2(s->num_consts, index + 1);
353 }
354
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)355 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
356 {
357 struct rc_instruction * tmp;
358 memset(s, 0, sizeof(*s));
359
360 for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
361 tmp = tmp->Next){
362 const struct rc_opcode_info * info;
363 rc_for_all_reads_mask(tmp, reg_count_callback, s);
364 if (tmp->Type == RC_INSTRUCTION_NORMAL) {
365 info = rc_get_opcode_info(tmp->U.I.Opcode);
366 if (info->Opcode == RC_OPCODE_BEGIN_TEX)
367 continue;
368 if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
369 s->num_presub_ops++;
370 } else {
371 if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
372 s->num_presub_ops++;
373 if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
374 s->num_presub_ops++;
375 /* Assuming alpha will never be a flow control or
376 * a tex instruction. */
377 if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
378 s->num_alpha_insts++;
379 if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
380 s->num_rgb_insts++;
381 if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
382 tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
383 s->num_omod_ops++;
384 }
385 if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
386 tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
387 s->num_omod_ops++;
388 }
389 info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
390 }
391 if (info->IsFlowControl) {
392 s->num_fc_insts++;
393 if (info->Opcode == RC_OPCODE_BGNLOOP)
394 s->num_loops++;
395 }
396 /* VS flow control was already translated to the predicate instructions */
397 if (c->type == RC_VERTEX_PROGRAM)
398 if (strstr(info->Name, "PRED") != NULL)
399 s->num_pred_insts++;
400
401 if (info->HasTexture)
402 s->num_tex_insts++;
403 s->num_insts++;
404 }
405 /* Increment here because the reg_count_callback store the max
406 * temporary reg index in s->nun_temp_regs. */
407 s->num_temp_regs++;
408 }
409
print_stats(struct radeon_compiler * c)410 static void print_stats(struct radeon_compiler * c)
411 {
412 struct rc_program_stats s;
413
414 rc_get_stats(c, &s);
415
416 /* Note that we print some dummy values for instruction categories that
417 * only the FS has, becasue shader-db's report.py wants all shaders to
418 * have the same set.
419 */
420 util_debug_message(c->debug, SHADER_INFO, "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, %u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits",
421 c->type == RC_VERTEX_PROGRAM ? "VS" : "FS",
422 s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts,
423 s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
424 s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals);
425 }
426
427 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
428 "Vertex Program",
429 "Fragment Program"
430 };
431
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)432 bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
433 {
434 for (unsigned i = 0; list[i].name; i++) {
435 if (list[i].predicate) {
436 list[i].run(c, list[i].user);
437
438 if (c->Error)
439 return false;
440
441 if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
442 fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
443 rc_print_program(&c->Program);
444 }
445 }
446 }
447 return true;
448 }
449
450 /* Executes a list of compiler passes given in the parameter 'list'. */
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)451 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
452 {
453 if (c->Debug & RC_DBG_LOG) {
454 fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
455 rc_print_program(&c->Program);
456 }
457
458 if(rc_run_compiler_passes(c, list)) {
459 print_stats(c);
460 }
461 }
462
rc_validate_final_shader(struct radeon_compiler * c,void * user)463 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
464 {
465 /* Check the number of constants. */
466 if (c->Program.Constants.Count > c->max_constants) {
467 rc_error(c, "Too many constants. Max: %i, Got: %i\n",
468 c->max_constants, c->Program.Constants.Count);
469 }
470 }
471