1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
27 #include "program.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
31 #include <stdbool.h>
32
33 static bool
src_regs_are_constant(const struct prog_instruction * inst,unsigned num_srcs)34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
35 {
36 unsigned i;
37
38 for (i = 0; i < num_srcs; i++) {
39 if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
40 return false;
41 if (inst->SrcReg[i].RelAddr)
42 return false;
43 }
44
45 return true;
46 }
47
48 static struct prog_src_register
src_reg_for_float(struct gl_program * prog,float val)49 src_reg_for_float(struct gl_program *prog, float val)
50 {
51 struct prog_src_register src;
52 unsigned swiz;
53
54 memset(&src, 0, sizeof(src));
55
56 src.File = PROGRAM_CONSTANT;
57 src.Index = _mesa_add_unnamed_constant(prog->Parameters,
58 (gl_constant_value *) &val, 1, &swiz);
59 src.Swizzle = swiz;
60 return src;
61 }
62
63 static struct prog_src_register
src_reg_for_vec4(struct gl_program * prog,const float * val)64 src_reg_for_vec4(struct gl_program *prog, const float *val)
65 {
66 struct prog_src_register src;
67 unsigned swiz;
68
69 memset(&src, 0, sizeof(src));
70
71 src.File = PROGRAM_CONSTANT;
72 src.Index = _mesa_add_unnamed_constant(prog->Parameters,
73 (gl_constant_value *) val, 4, &swiz);
74 src.Swizzle = swiz;
75 return src;
76 }
77
78 static bool
src_regs_are_same(const struct prog_src_register * a,const struct prog_src_register * b)79 src_regs_are_same(const struct prog_src_register *a,
80 const struct prog_src_register *b)
81 {
82 return (a->File == b->File)
83 && (a->Index == b->Index)
84 && (a->Swizzle == b->Swizzle)
85 && (a->Negate == b->Negate)
86 && (a->RelAddr == 0)
87 && (b->RelAddr == 0);
88 }
89
90 static void
get_value(struct gl_program * prog,struct prog_src_register * r,float * data)91 get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
92 {
93 const gl_constant_value *const value =
94 prog->Parameters->ParameterValues[r->Index];
95
96 data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
97 data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
98 data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
99 data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
100
101 if (r->Negate & 0x01) {
102 data[0] = -data[0];
103 }
104
105 if (r->Negate & 0x02) {
106 data[1] = -data[1];
107 }
108
109 if (r->Negate & 0x04) {
110 data[2] = -data[2];
111 }
112
113 if (r->Negate & 0x08) {
114 data[3] = -data[3];
115 }
116 }
117
118 /**
119 * Try to replace instructions that produce a constant result with simple moves
120 *
121 * The hope is that a following copy propagation pass will eliminate the
122 * unnecessary move instructions.
123 */
124 GLboolean
_mesa_constant_fold(struct gl_program * prog)125 _mesa_constant_fold(struct gl_program *prog)
126 {
127 bool progress = false;
128 unsigned i;
129
130 for (i = 0; i < prog->arb.NumInstructions; i++) {
131 struct prog_instruction *const inst = &prog->arb.Instructions[i];
132
133 switch (inst->Opcode) {
134 case OPCODE_ADD:
135 if (src_regs_are_constant(inst, 2)) {
136 float a[4];
137 float b[4];
138 float result[4];
139
140 get_value(prog, &inst->SrcReg[0], a);
141 get_value(prog, &inst->SrcReg[1], b);
142
143 result[0] = a[0] + b[0];
144 result[1] = a[1] + b[1];
145 result[2] = a[2] + b[2];
146 result[3] = a[3] + b[3];
147
148 inst->Opcode = OPCODE_MOV;
149 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
150
151 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
152 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
153
154 progress = true;
155 }
156 break;
157
158 case OPCODE_CMP:
159 /* FINISHME: We could also optimize CMP instructions where the first
160 * FINISHME: source is a constant that is either all < 0.0 or all
161 * FINISHME: >= 0.0.
162 */
163 if (src_regs_are_constant(inst, 3)) {
164 float a[4];
165 float b[4];
166 float c[4];
167 float result[4];
168
169 get_value(prog, &inst->SrcReg[0], a);
170 get_value(prog, &inst->SrcReg[1], b);
171 get_value(prog, &inst->SrcReg[2], c);
172
173 result[0] = a[0] < 0.0f ? b[0] : c[0];
174 result[1] = a[1] < 0.0f ? b[1] : c[1];
175 result[2] = a[2] < 0.0f ? b[2] : c[2];
176 result[3] = a[3] < 0.0f ? b[3] : c[3];
177
178 inst->Opcode = OPCODE_MOV;
179 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
180
181 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
182 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
183 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
184 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
185
186 progress = true;
187 }
188 break;
189
190 case OPCODE_DP2:
191 case OPCODE_DP3:
192 case OPCODE_DP4:
193 if (src_regs_are_constant(inst, 2)) {
194 float a[4];
195 float b[4];
196 float result;
197
198 get_value(prog, &inst->SrcReg[0], a);
199 get_value(prog, &inst->SrcReg[1], b);
200
201 result = (a[0] * b[0]) + (a[1] * b[1]);
202
203 if (inst->Opcode >= OPCODE_DP3)
204 result += a[2] * b[2];
205
206 if (inst->Opcode == OPCODE_DP4)
207 result += a[3] * b[3];
208
209 inst->Opcode = OPCODE_MOV;
210 inst->SrcReg[0] = src_reg_for_float(prog, result);
211
212 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
213 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
214
215 progress = true;
216 }
217 break;
218
219 case OPCODE_MUL:
220 if (src_regs_are_constant(inst, 2)) {
221 float a[4];
222 float b[4];
223 float result[4];
224
225 get_value(prog, &inst->SrcReg[0], a);
226 get_value(prog, &inst->SrcReg[1], b);
227
228 result[0] = a[0] * b[0];
229 result[1] = a[1] * b[1];
230 result[2] = a[2] * b[2];
231 result[3] = a[3] * b[3];
232
233 inst->Opcode = OPCODE_MOV;
234 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
235
236 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
237 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
238
239 progress = true;
240 }
241 break;
242
243 case OPCODE_SGE:
244 if (src_regs_are_constant(inst, 2)) {
245 float a[4];
246 float b[4];
247 float result[4];
248
249 get_value(prog, &inst->SrcReg[0], a);
250 get_value(prog, &inst->SrcReg[1], b);
251
252 result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
253 result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
254 result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
255 result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
256
257 inst->Opcode = OPCODE_MOV;
258 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
259
260 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
261 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
262
263 progress = true;
264 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
265 inst->Opcode = OPCODE_MOV;
266 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
267
268 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
269 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
270
271 progress = true;
272 }
273 break;
274
275 case OPCODE_SLT:
276 if (src_regs_are_constant(inst, 2)) {
277 float a[4];
278 float b[4];
279 float result[4];
280
281 get_value(prog, &inst->SrcReg[0], a);
282 get_value(prog, &inst->SrcReg[1], b);
283
284 result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
285 result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
286 result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
287 result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
288
289 inst->Opcode = OPCODE_MOV;
290 inst->SrcReg[0] = src_reg_for_vec4(prog, result);
291
292 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
293 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
294
295 progress = true;
296 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
297 inst->Opcode = OPCODE_MOV;
298 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
299
300 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
301 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
302
303 progress = true;
304 }
305 break;
306
307 default:
308 break;
309 }
310 }
311
312 return progress;
313 }
314