1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
27 #include "program.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
31 #include <stdbool.h>
32
33 static bool
src_regs_are_constant(const struct prog_instruction * inst,unsigned num_srcs)34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
35 {
36 unsigned i;
37
38 for (i = 0; i < num_srcs; i++) {
39 if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
40 return false;
41 }
42
43 return true;
44 }
45
46 static struct prog_src_register
src_reg_for_float(struct gl_program * prog,float val)47 src_reg_for_float(struct gl_program *prog, float val)
48 {
49 struct prog_src_register src;
50 unsigned swiz;
51
52 memset(&src, 0, sizeof(src));
53
54 src.File = PROGRAM_CONSTANT;
55 src.Index = _mesa_add_unnamed_constant(prog->Parameters,
56 (gl_constant_value *) &val, 1, &swiz);
57 src.Swizzle = swiz;
58 return src;
59 }
60
61 static struct prog_src_register
src_reg_for_vec4(struct gl_program * prog,const float * val)62 src_reg_for_vec4(struct gl_program *prog, const float *val)
63 {
64 struct prog_src_register src;
65 unsigned swiz;
66
67 memset(&src, 0, sizeof(src));
68
69 src.File = PROGRAM_CONSTANT;
70 src.Index = _mesa_add_unnamed_constant(prog->Parameters,
71 (gl_constant_value *) val, 4, &swiz);
72 src.Swizzle = swiz;
73 return src;
74 }
75
76 static bool
src_regs_are_same(const struct prog_src_register * a,const struct prog_src_register * b)77 src_regs_are_same(const struct prog_src_register *a,
78 const struct prog_src_register *b)
79 {
80 return (a->File == b->File)
81 && (a->Index == b->Index)
82 && (a->Swizzle == b->Swizzle)
83 && (a->Abs == b->Abs)
84 && (a->Negate == b->Negate)
85 && (a->RelAddr == 0)
86 && (b->RelAddr == 0);
87 }
88
89 static void
get_value(struct gl_program * prog,struct prog_src_register * r,float * data)90 get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
91 {
92 const gl_constant_value *const value =
93 prog->Parameters->ParameterValues[r->Index];
94
95 data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
96 data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
97 data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
98 data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
99
100 if (r->Abs) {
101 data[0] = fabsf(data[0]);
102 data[1] = fabsf(data[1]);
103 data[2] = fabsf(data[2]);
104 data[3] = fabsf(data[3]);
105 }
106
107 if (r->Negate & 0x01) {
108 data[0] = -data[0];
109 }
110
111 if (r->Negate & 0x02) {
112 data[1] = -data[1];
113 }
114
115 if (r->Negate & 0x04) {
116 data[2] = -data[2];
117 }
118
119 if (r->Negate & 0x08) {
120 data[3] = -data[3];
121 }
122 }
123
124 /**
125 * Try to replace instructions that produce a constant result with simple moves
126 *
127 * The hope is that a following copy propagation pass will eliminate the
128 * unnecessary move instructions.
129 */
130 GLboolean
_mesa_constant_fold(struct gl_program * prog)131 _mesa_constant_fold(struct gl_program *prog)
132 {
133 bool progress = false;
134 unsigned i;
135
136 for (i = 0; i < prog->NumInstructions; i++) {
137 struct prog_instruction *const inst = &prog->Instructions[i];
138
139 switch (inst->Opcode) {
140 case OPCODE_ADD:
141 if (src_regs_are_constant(inst, 2)) {
142 float a[4];
143 float b[4];
144 float result[4];
145
146 get_value(prog, &inst->SrcReg[0], a);
147 get_value(prog, &inst->SrcReg[1], b);
148
149 result[0] = a[0] + b[0];
150 result[1] = a[1] + b[1];
151 result[2] = a[2] + b[2];
152 result[3] = a[3] + b[3];
153
154 inst->Opcode = OPCODE_MOV;
155 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
156
157 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
158 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
159
160 progress = true;
161 }
162 break;
163
164 case OPCODE_CMP:
165 /* FINISHME: We could also optimize CMP instructions where the first
166 * FINISHME: source is a constant that is either all < 0.0 or all
167 * FINISHME: >= 0.0.
168 */
169 if (src_regs_are_constant(inst, 3)) {
170 float a[4];
171 float b[4];
172 float c[4];
173 float result[4];
174
175 get_value(prog, &inst->SrcReg[0], a);
176 get_value(prog, &inst->SrcReg[1], b);
177 get_value(prog, &inst->SrcReg[2], c);
178
179 result[0] = a[0] < 0.0f ? b[0] : c[0];
180 result[1] = a[1] < 0.0f ? b[1] : c[1];
181 result[2] = a[2] < 0.0f ? b[2] : c[2];
182 result[3] = a[3] < 0.0f ? b[3] : c[3];
183
184 inst->Opcode = OPCODE_MOV;
185 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
186
187 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
188 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
189 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
190 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
191
192 progress = true;
193 }
194 break;
195
196 case OPCODE_DP2:
197 case OPCODE_DP3:
198 case OPCODE_DP4:
199 if (src_regs_are_constant(inst, 2)) {
200 float a[4];
201 float b[4];
202 float result;
203
204 get_value(prog, &inst->SrcReg[0], a);
205 get_value(prog, &inst->SrcReg[1], b);
206
207 /* It seems like a loop could be used here, but we cleverly put
208 * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from
209 * the opcode results in various failures of the loop control.
210 */
211 result = (a[0] * b[0]) + (a[1] * b[1]);
212
213 if (inst->Opcode >= OPCODE_DP3)
214 result += a[2] * b[2];
215
216 if (inst->Opcode == OPCODE_DP4)
217 result += a[3] * b[3];
218
219 inst->Opcode = OPCODE_MOV;
220 inst->SrcReg[0] = src_reg_for_float(prog, result);
221
222 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
223 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
224
225 progress = true;
226 }
227 break;
228
229 case OPCODE_MUL:
230 if (src_regs_are_constant(inst, 2)) {
231 float a[4];
232 float b[4];
233 float result[4];
234
235 get_value(prog, &inst->SrcReg[0], a);
236 get_value(prog, &inst->SrcReg[1], b);
237
238 result[0] = a[0] * b[0];
239 result[1] = a[1] * b[1];
240 result[2] = a[2] * b[2];
241 result[3] = a[3] * b[3];
242
243 inst->Opcode = OPCODE_MOV;
244 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
245
246 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
247 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
248
249 progress = true;
250 }
251 break;
252
253 case OPCODE_SEQ:
254 if (src_regs_are_constant(inst, 2)) {
255 float a[4];
256 float b[4];
257 float result[4];
258
259 get_value(prog, &inst->SrcReg[0], a);
260 get_value(prog, &inst->SrcReg[1], b);
261
262 result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
263 result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
264 result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
265 result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
266
267 inst->Opcode = OPCODE_MOV;
268 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
269
270 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
271 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
272
273 progress = true;
274 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
275 inst->Opcode = OPCODE_MOV;
276 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
277
278 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
279 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
280
281 progress = true;
282 }
283 break;
284
285 case OPCODE_SGE:
286 if (src_regs_are_constant(inst, 2)) {
287 float a[4];
288 float b[4];
289 float result[4];
290
291 get_value(prog, &inst->SrcReg[0], a);
292 get_value(prog, &inst->SrcReg[1], b);
293
294 result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
295 result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
296 result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
297 result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
298
299 inst->Opcode = OPCODE_MOV;
300 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
301
302 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
303 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
304
305 progress = true;
306 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
307 inst->Opcode = OPCODE_MOV;
308 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
309
310 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
311 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
312
313 progress = true;
314 }
315 break;
316
317 case OPCODE_SGT:
318 if (src_regs_are_constant(inst, 2)) {
319 float a[4];
320 float b[4];
321 float result[4];
322
323 get_value(prog, &inst->SrcReg[0], a);
324 get_value(prog, &inst->SrcReg[1], b);
325
326 result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
327 result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
328 result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
329 result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
330
331 inst->Opcode = OPCODE_MOV;
332 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
333
334 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
335 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
336
337 progress = true;
338 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
339 inst->Opcode = OPCODE_MOV;
340 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
341
342 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
343 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
344
345 progress = true;
346 }
347 break;
348
349 case OPCODE_SLE:
350 if (src_regs_are_constant(inst, 2)) {
351 float a[4];
352 float b[4];
353 float result[4];
354
355 get_value(prog, &inst->SrcReg[0], a);
356 get_value(prog, &inst->SrcReg[1], b);
357
358 result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
359 result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
360 result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
361 result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
362
363 inst->Opcode = OPCODE_MOV;
364 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
365
366 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
367 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
368
369 progress = true;
370 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
371 inst->Opcode = OPCODE_MOV;
372 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
373
374 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
375 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
376
377 progress = true;
378 }
379 break;
380
381 case OPCODE_SLT:
382 if (src_regs_are_constant(inst, 2)) {
383 float a[4];
384 float b[4];
385 float result[4];
386
387 get_value(prog, &inst->SrcReg[0], a);
388 get_value(prog, &inst->SrcReg[1], b);
389
390 result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
391 result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
392 result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
393 result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
394
395 inst->Opcode = OPCODE_MOV;
396 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
397
398 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
399 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
400
401 progress = true;
402 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
403 inst->Opcode = OPCODE_MOV;
404 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
405
406 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
407 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
408
409 progress = true;
410 }
411 break;
412
413 case OPCODE_SNE:
414 if (src_regs_are_constant(inst, 2)) {
415 float a[4];
416 float b[4];
417 float result[4];
418
419 get_value(prog, &inst->SrcReg[0], a);
420 get_value(prog, &inst->SrcReg[1], b);
421
422 result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
423 result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
424 result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
425 result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
426
427 inst->Opcode = OPCODE_MOV;
428 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
429
430 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
431 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
432
433 progress = true;
434 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
435 inst->Opcode = OPCODE_MOV;
436 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
437
438 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
439 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
440
441 progress = true;
442 }
443 break;
444
445 default:
446 break;
447 }
448 }
449
450 return progress;
451 }
452