1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33
34 #include "main/errors.h"
35 #include "main/glheader.h"
36 #include "main/macros.h"
37 #include "main/enums.h"
38 #include "program/program.h"
39 #include "program/prog_instruction.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_statevars.h"
42 #include "program/programopt.h"
43 #include "tnl/tnl.h"
44
45 #include "r200_context.h"
46 #include "r200_vertprog.h"
47 #include "r200_ioctl.h"
48 #include "r200_tcl.h"
49
50 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
51 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
52 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
53 SWIZZLE_W != VSF_IN_COMPONENT_W || \
54 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
55 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
56 WRITEMASK_X != VSF_FLAG_X || \
57 WRITEMASK_Y != VSF_FLAG_Y || \
58 WRITEMASK_Z != VSF_FLAG_Z || \
59 WRITEMASK_W != VSF_FLAG_W
60 #error Cannot change these!
61 #endif
62
63 #define SCALAR_FLAG (1<<31)
64 #define FLAG_MASK (1<<31)
65 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
66 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
67
68 static struct{
69 char *name;
70 int opcode;
71 unsigned long ip; /* number of input operands and flags */
72 }op_names[]={
73 OPN(ABS, 1),
74 OPN(ADD, 2),
75 OPN(ARL, 1|SCALAR_FLAG),
76 OPN(DP3, 2),
77 OPN(DP4, 2),
78 OPN(DPH, 2),
79 OPN(DST, 2),
80 OPN(EX2, 1|SCALAR_FLAG),
81 OPN(EXP, 1|SCALAR_FLAG),
82 OPN(FLR, 1),
83 OPN(FRC, 1),
84 OPN(LG2, 1|SCALAR_FLAG),
85 OPN(LIT, 1),
86 OPN(LOG, 1|SCALAR_FLAG),
87 OPN(MAD, 3),
88 OPN(MAX, 2),
89 OPN(MIN, 2),
90 OPN(MOV, 1),
91 OPN(MUL, 2),
92 OPN(POW, 2|SCALAR_FLAG),
93 OPN(RCP, 1|SCALAR_FLAG),
94 OPN(RSQ, 1|SCALAR_FLAG),
95 OPN(SGE, 2),
96 OPN(SLT, 2),
97 OPN(SUB, 2),
98 OPN(SWZ, 1),
99 OPN(XPD, 2),
100 OPN(END, 0),
101 };
102 #undef OPN
103
r200VertexProgUpdateParams(struct gl_context * ctx,struct r200_vertex_program * vp)104 static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
105 {
106 r200ContextPtr rmesa = R200_CONTEXT( ctx );
107 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
108 int pi;
109 struct gl_program *mesa_vp = &vp->mesa_program;
110 struct gl_program_parameter_list *paramList;
111 drm_radeon_cmd_header_t tmp;
112
113 R200_STATECHANGE( rmesa, vpp[0] );
114 R200_STATECHANGE( rmesa, vpp[1] );
115 assert(mesa_vp->Parameters);
116 _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
117 paramList = mesa_vp->Parameters;
118
119 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
120 fprintf(stderr, "%s:Params exhausted\n", __func__);
121 return GL_FALSE;
122 }
123
124 for(pi = 0; pi < paramList->NumParameters; pi++) {
125 unsigned pvo = paramList->Parameters[pi].ValueOffset;
126
127 switch(paramList->Parameters[pi].Type) {
128 case PROGRAM_STATE_VAR:
129 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
130 case PROGRAM_CONSTANT:
131 *fcmd++ = paramList->ParameterValues[pvo + 0].f;
132 *fcmd++ = paramList->ParameterValues[pvo + 1].f;
133 *fcmd++ = paramList->ParameterValues[pvo + 2].f;
134 *fcmd++ = paramList->ParameterValues[pvo + 3].f;
135 break;
136 default:
137 _mesa_problem(NULL, "Bad param type in %s", __func__);
138 break;
139 }
140 if (pi == 95) {
141 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
142 }
143 }
144 /* hack up the cmd_size so not the whole state atom is emitted always. */
145 rmesa->hw.vpp[0].cmd_size =
146 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
147 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
148 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
149 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
150 if (paramList->NumParameters > 96) {
151 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
152 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
153 tmp.veclinear.count = paramList->NumParameters - 96;
154 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
155 }
156 return GL_TRUE;
157 }
158
t_dst_mask(GLuint mask)159 static inline unsigned long t_dst_mask(GLuint mask)
160 {
161 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
162 return mask & VSF_FLAG_ALL;
163 }
164
t_dst(struct prog_dst_register * dst)165 static unsigned long t_dst(struct prog_dst_register *dst)
166 {
167 switch(dst->File) {
168 case PROGRAM_TEMPORARY:
169 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
170 | R200_VSF_OUT_CLASS_TMP);
171 case PROGRAM_OUTPUT:
172 switch (dst->Index) {
173 case VARYING_SLOT_POS:
174 return R200_VSF_OUT_CLASS_RESULT_POS;
175 case VARYING_SLOT_COL0:
176 return R200_VSF_OUT_CLASS_RESULT_COLOR;
177 case VARYING_SLOT_COL1:
178 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
179 | R200_VSF_OUT_CLASS_RESULT_COLOR);
180 case VARYING_SLOT_FOGC:
181 return R200_VSF_OUT_CLASS_RESULT_FOGC;
182 case VARYING_SLOT_TEX0:
183 case VARYING_SLOT_TEX1:
184 case VARYING_SLOT_TEX2:
185 case VARYING_SLOT_TEX3:
186 case VARYING_SLOT_TEX4:
187 case VARYING_SLOT_TEX5:
188 return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
189 | R200_VSF_OUT_CLASS_RESULT_TEXC);
190 case VARYING_SLOT_PSIZ:
191 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
192 default:
193 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __func__, dst->Index);
194 exit(0);
195 return 0;
196 }
197 case PROGRAM_ADDRESS:
198 assert (dst->Index == 0);
199 return R200_VSF_OUT_CLASS_ADDR;
200 default:
201 fprintf(stderr, "problem in %s, unknown register type %d\n", __func__, dst->File);
202 exit(0);
203 return 0;
204 }
205 }
206
t_src_class(gl_register_file file)207 static unsigned long t_src_class(gl_register_file file)
208 {
209
210 switch(file){
211 case PROGRAM_TEMPORARY:
212 return VSF_IN_CLASS_TMP;
213
214 case PROGRAM_INPUT:
215 return VSF_IN_CLASS_ATTR;
216
217 case PROGRAM_CONSTANT:
218 case PROGRAM_STATE_VAR:
219 return VSF_IN_CLASS_PARAM;
220 /*
221 case PROGRAM_OUTPUT:
222 case PROGRAM_ADDRESS:
223 */
224 default:
225 fprintf(stderr, "problem in %s", __func__);
226 exit(0);
227 }
228 }
229
t_swizzle(GLubyte swizzle)230 static inline unsigned long t_swizzle(GLubyte swizzle)
231 {
232 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
233 return swizzle;
234 }
235
236 #if 0
237 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
238 {
239 int i;
240
241 if(vp == NULL){
242 fprintf(stderr, "vp null in call to %s from %s\n", __func__, caller);
243 return ;
244 }
245
246 fprintf(stderr, "%s:<", caller);
247 for(i=0; i < VERT_ATTRIB_MAX; i++)
248 fprintf(stderr, "%d ", vp->inputs[i]);
249 fprintf(stderr, ">\n");
250
251 }
252 #endif
253
t_src_index(struct r200_vertex_program * vp,struct prog_src_register * src)254 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
255 {
256 /*
257 int i;
258 int max_reg = -1;
259 */
260 if(src->File == PROGRAM_INPUT){
261 /* if(vp->inputs[src->Index] != -1)
262 return vp->inputs[src->Index];
263
264 for(i=0; i < VERT_ATTRIB_MAX; i++)
265 if(vp->inputs[i] > max_reg)
266 max_reg = vp->inputs[i];
267
268 vp->inputs[src->Index] = max_reg+1;*/
269
270 //vp_dump_inputs(vp, __func__);
271 assert(vp->inputs[src->Index] != -1);
272 return vp->inputs[src->Index];
273 } else {
274 if (src->Index < 0) {
275 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
276 return 0;
277 }
278 return src->Index;
279 }
280 }
281
t_src(struct r200_vertex_program * vp,struct prog_src_register * src)282 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
283 {
284
285 return MAKE_VSF_SOURCE(t_src_index(vp, src),
286 t_swizzle(GET_SWZ(src->Swizzle, 0)),
287 t_swizzle(GET_SWZ(src->Swizzle, 1)),
288 t_swizzle(GET_SWZ(src->Swizzle, 2)),
289 t_swizzle(GET_SWZ(src->Swizzle, 3)),
290 t_src_class(src->File),
291 src->Negate) | (src->RelAddr << 4);
292 }
293
t_src_scalar(struct r200_vertex_program * vp,struct prog_src_register * src)294 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
295 {
296
297 return MAKE_VSF_SOURCE(t_src_index(vp, src),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_swizzle(GET_SWZ(src->Swizzle, 0)),
302 t_src_class(src->File),
303 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
304 }
305
t_opcode(enum prog_opcode opcode)306 static unsigned long t_opcode(enum prog_opcode opcode)
307 {
308
309 switch(opcode){
310 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
311 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
312 * seems to ignore neg offsets which isn't quite correct...
313 */
314 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
315 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
316 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
317 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
318 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
319 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
320 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
321 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
322 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
323 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
324 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
325 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
326 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
327 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
328 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
329 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
330
331 default:
332 fprintf(stderr, "%s: Should not be called with opcode %d!", __func__, opcode);
333 }
334 exit(-1);
335 return 0;
336 }
337
op_operands(enum prog_opcode opcode)338 static unsigned long op_operands(enum prog_opcode opcode)
339 {
340 int i;
341
342 /* Can we trust mesas opcodes to be in order ? */
343 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
344 if(op_names[i].opcode == opcode)
345 return op_names[i].ip;
346
347 fprintf(stderr, "op %d not found in op_names\n", opcode);
348 exit(-1);
349 return 0;
350 }
351
352 /* TODO: Get rid of t_src_class call */
353 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
354 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
355 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
356 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
357 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))
358
359 /* fglrx on rv250 codes up unused sources as follows:
360 unused but necessary sources are same as previous source, zero-ed out.
361 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
362 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
363 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
364
365 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
366 Those are NOT semantically equivalent to the r300 ones, requires code changes */
367 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
368 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
372
373 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
374 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
378
379 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
380 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
384
385 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
386
387 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
388
389 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
390
391
392 /**
393 * Generate an R200 vertex program from Mesa's internal representation.
394 *
395 * \return GL_TRUE for success, GL_FALSE for failure.
396 */
r200_translate_vertex_program(struct gl_context * ctx,struct r200_vertex_program * vp)397 static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
398 {
399 struct gl_program *mesa_vp = &vp->mesa_program;
400 struct prog_instruction *vpi;
401 int i;
402 VERTEX_SHADER_INSTRUCTION *o_inst;
403 unsigned long operands;
404 int are_srcs_scalar;
405 unsigned long hw_op;
406 int dofogfix = 0;
407 int fog_temp_i = 0;
408 int free_inputs;
409 int array_count = 0;
410 int u_temp_used;
411
412 vp->native = GL_FALSE;
413 vp->translated = GL_TRUE;
414 vp->fogmode = ctx->Fog.Mode;
415
416 if (mesa_vp->arb.NumInstructions == 0)
417 return GL_FALSE;
418
419 #if 0
420 if ((mesa_vp->info.inputs_read &
421 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
422 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
423 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
424 if (R200_DEBUG & RADEON_FALLBACKS) {
425 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
426 mesa_vp->info.inputs_read);
427 }
428 return GL_FALSE;
429 }
430 #endif
431
432 if ((mesa_vp->info.outputs_written &
433 ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
434 (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
435 (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
436 (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
437 if (R200_DEBUG & RADEON_FALLBACKS) {
438 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
439 (unsigned long long) mesa_vp->info.outputs_written);
440 }
441 return GL_FALSE;
442 }
443
444 /* Initial value should be last tmp reg that hw supports.
445 Strangely enough r300 doesnt mind even though these would be out of range.
446 Smart enough to realize that it doesnt need it? */
447 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
448 struct prog_src_register src[3];
449 struct prog_dst_register dst;
450
451 /* FIXME: is changing the prog safe to do here? */
452 if (mesa_vp->arb.IsPositionInvariant &&
453 /* make sure we only do this once */
454 !(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
455 _mesa_insert_mvp_code(ctx, mesa_vp);
456 }
457
458 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
459 base e isn't directly available neither. */
460 if ((mesa_vp->info.outputs_written & (1 << VARYING_SLOT_FOGC)) &&
461 !vp->fogpidx) {
462 struct gl_program_parameter_list *paramList;
463 gl_state_index16 tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0 };
464 paramList = mesa_vp->Parameters;
465 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
466 }
467
468 vp->pos_end = 0;
469 mesa_vp->arb.NumNativeInstructions = 0;
470 if (mesa_vp->Parameters)
471 mesa_vp->arb.NumNativeParameters = mesa_vp->Parameters->NumParameters;
472 else
473 mesa_vp->arb.NumNativeParameters = 0;
474
475 for(i = 0; i < VERT_ATTRIB_MAX; i++)
476 vp->inputs[i] = -1;
477 for(i = 0; i < 15; i++)
478 vp->inputmap_rev[i] = 255;
479 free_inputs = 0x2ffd;
480
481 /* fglrx uses fixed inputs as follows for conventional attribs.
482 generic attribs use non-fixed assignment, fglrx will always use the
483 lowest attrib values available. We'll just do the same.
484 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
485 and 13 in a hw vertex prog.
486 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
487 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
488 Additionally, not more than 12 arrays in total are possible I think.
489 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
490 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
491 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
492 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
493 */
494
495 /* attr 4,5 and 13 are only used with generic attribs.
496 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
497 not possibe to use with vertex progs as it is lacking in vert prog specification) */
498 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
499 if (mesa_vp->info.inputs_read & VERT_BIT_POS) {
500 vp->inputs[VERT_ATTRIB_POS] = 0;
501 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
502 free_inputs &= ~(1 << 0);
503 array_count++;
504 }
505 if (mesa_vp->info.inputs_read & VERT_BIT_NORMAL) {
506 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
507 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
508 array_count++;
509 }
510 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR0) {
511 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
512 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
513 free_inputs &= ~(1 << 2);
514 array_count++;
515 }
516 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR1) {
517 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
518 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
519 free_inputs &= ~(1 << 3);
520 array_count++;
521 }
522 if (mesa_vp->info.inputs_read & VERT_BIT_FOG) {
523 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
524 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
525 array_count++;
526 }
527 /* VERT_ATTRIB_TEX0-5 */
528 for (i = 0; i <= 5; i++) {
529 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
530 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
531 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
532 free_inputs &= ~(1 << (i + 6));
533 array_count++;
534 }
535 }
536 /* using VERT_ATTRIB_TEX6/7 would be illegal */
537 for (; i < VERT_ATTRIB_TEX_MAX; i++) {
538 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
539 if (R200_DEBUG & RADEON_FALLBACKS) {
540 fprintf(stderr, "texture attribute %d in vert prog\n", i);
541 }
542 return GL_FALSE;
543 }
544 }
545 /* completely ignore aliasing? */
546 for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
547 int j;
548 /* completely ignore aliasing? */
549 if (mesa_vp->info.inputs_read & VERT_BIT_GENERIC(i)) {
550 array_count++;
551 if (array_count > 12) {
552 if (R200_DEBUG & RADEON_FALLBACKS) {
553 fprintf(stderr, "more than 12 attribs used in vert prog\n");
554 }
555 return GL_FALSE;
556 }
557 for (j = 0; j < 14; j++) {
558 /* will always find one due to limited array_count */
559 if (free_inputs & (1 << j)) {
560 free_inputs &= ~(1 << j);
561 vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
562 if (j == 0) {
563 /* mapped to pos */
564 vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
565 } else if (j < 12) {
566 /* mapped to col/tex */
567 vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
568 } else {
569 /* mapped to pos1 */
570 vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
571 }
572 break;
573 }
574 }
575 }
576 }
577
578 if (!(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
579 if (R200_DEBUG & RADEON_FALLBACKS) {
580 fprintf(stderr, "can't handle vert prog without position output\n");
581 }
582 return GL_FALSE;
583 }
584 if (free_inputs & 1) {
585 if (R200_DEBUG & RADEON_FALLBACKS) {
586 fprintf(stderr, "can't handle vert prog without position input\n");
587 }
588 return GL_FALSE;
589 }
590
591 o_inst = vp->instr;
592 for (vpi = mesa_vp->arb.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
593 operands = op_operands(vpi->Opcode);
594 are_srcs_scalar = operands & SCALAR_FLAG;
595 operands &= OP_MASK;
596
597 for(i = 0; i < operands; i++) {
598 src[i] = vpi->SrcReg[i];
599 /* hack up default attrib values as per spec as swizzling.
600 normal, fog, secondary color. Crazy?
601 May need more if we don't submit vec4 elements? */
602 if (src[i].File == PROGRAM_INPUT) {
603 if (src[i].Index == VERT_ATTRIB_NORMAL) {
604 int j;
605 for (j = 0; j < 4; j++) {
606 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
607 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
608 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
609 }
610 }
611 }
612 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
613 int j;
614 for (j = 0; j < 4; j++) {
615 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
616 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
617 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
618 }
619 }
620 }
621 else if (src[i].Index == VERT_ATTRIB_FOG) {
622 int j;
623 for (j = 0; j < 4; j++) {
624 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
625 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
626 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
627 }
628 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
629 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
630 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
631 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
632 }
633 }
634 }
635 }
636 }
637
638 if(operands == 3){
639 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
640 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
641 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
642 VSF_FLAG_ALL);
643
644 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
645 SWIZZLE_X, SWIZZLE_Y,
646 SWIZZLE_Z, SWIZZLE_W,
647 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
648
649 o_inst->src1 = ZERO_SRC_0;
650 o_inst->src2 = UNUSED_SRC_1;
651 o_inst++;
652
653 src[2].File = PROGRAM_TEMPORARY;
654 src[2].Index = u_temp_i;
655 src[2].RelAddr = 0;
656 u_temp_i--;
657 }
658 }
659
660 if(operands >= 2){
661 if( CMP_SRCS(src[1], src[0]) ){
662 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
663 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
664 VSF_FLAG_ALL);
665
666 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
667 SWIZZLE_X, SWIZZLE_Y,
668 SWIZZLE_Z, SWIZZLE_W,
669 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
670
671 o_inst->src1 = ZERO_SRC_0;
672 o_inst->src2 = UNUSED_SRC_1;
673 o_inst++;
674
675 src[0].File = PROGRAM_TEMPORARY;
676 src[0].Index = u_temp_i;
677 src[0].RelAddr = 0;
678 u_temp_i--;
679 }
680 }
681
682 dst = vpi->DstReg;
683 if (dst.File == PROGRAM_OUTPUT &&
684 dst.Index == VARYING_SLOT_FOGC &&
685 dst.WriteMask & WRITEMASK_X) {
686 fog_temp_i = u_temp_i;
687 dst.File = PROGRAM_TEMPORARY;
688 dst.Index = fog_temp_i;
689 dofogfix = 1;
690 u_temp_i--;
691 }
692
693 /* These ops need special handling. */
694 switch(vpi->Opcode){
695 case OPCODE_POW:
696 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
697 So may need to insert additional instruction */
698 if ((src[0].File == src[1].File) &&
699 (src[0].Index == src[1].Index)) {
700 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
701 t_dst_mask(dst.WriteMask));
702 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
703 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
704 SWIZZLE_ZERO,
705 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
706 SWIZZLE_ZERO,
707 t_src_class(src[0].File),
708 src[0].Negate) | (src[0].RelAddr << 4);
709 o_inst->src1 = UNUSED_SRC_0;
710 o_inst->src2 = UNUSED_SRC_0;
711 }
712 else {
713 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
714 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
715 VSF_FLAG_ALL);
716 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
717 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
718 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
719 t_src_class(src[0].File),
720 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
721 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
722 SWIZZLE_ZERO, SWIZZLE_ZERO,
723 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
724 t_src_class(src[1].File),
725 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
726 o_inst->src2 = UNUSED_SRC_1;
727 o_inst++;
728
729 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
730 t_dst_mask(dst.WriteMask));
731 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
732 VSF_IN_COMPONENT_X,
733 VSF_IN_COMPONENT_Y,
734 VSF_IN_COMPONENT_Z,
735 VSF_IN_COMPONENT_W,
736 VSF_IN_CLASS_TMP,
737 VSF_FLAG_NONE);
738 o_inst->src1 = UNUSED_SRC_0;
739 o_inst->src2 = UNUSED_SRC_0;
740 u_temp_i--;
741 }
742 goto next;
743
744 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
745 case OPCODE_SWZ:
746 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
747 t_dst_mask(dst.WriteMask));
748 o_inst->src0 = t_src(vp, &src[0]);
749 o_inst->src1 = ZERO_SRC_0;
750 o_inst->src2 = UNUSED_SRC_1;
751 goto next;
752
753 case OPCODE_MAD:
754 /* only 2 read ports into temp memory thus may need the macro op MAD_2
755 instead (requiring 2 clocks) if all inputs are in temp memory
756 (and, only if they actually reference 3 distinct temps) */
757 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
758 src[1].File == PROGRAM_TEMPORARY &&
759 src[2].File == PROGRAM_TEMPORARY &&
760 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
761 (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
762 (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
763 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
764
765 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
766 t_dst_mask(dst.WriteMask));
767 o_inst->src0 = t_src(vp, &src[0]);
768 #if 0
769 if ((o_inst - vp->instr) == 31) {
770 /* fix up the broken vertex program of quake4 demo... */
771 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
772 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
773 t_src_class(src[1].File),
774 src[1].Negate) | (src[1].RelAddr << 4);
775 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
776 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
777 t_src_class(src[1].File),
778 src[1].Negate) | (src[1].RelAddr << 4);
779 }
780 else {
781 o_inst->src1 = t_src(vp, &src[1]);
782 o_inst->src2 = t_src(vp, &src[2]);
783 }
784 #else
785 o_inst->src1 = t_src(vp, &src[1]);
786 o_inst->src2 = t_src(vp, &src[2]);
787 #endif
788 goto next;
789
790 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
791 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
792 t_dst_mask(dst.WriteMask));
793
794 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
795 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
796 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
797 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
798 SWIZZLE_ZERO,
799 t_src_class(src[0].File),
800 src[0].Negate) | (src[0].RelAddr << 4);
801
802 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
803 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
804 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
805 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
806 SWIZZLE_ZERO,
807 t_src_class(src[1].File),
808 src[1].Negate) | (src[1].RelAddr << 4);
809
810 o_inst->src2 = UNUSED_SRC_1;
811 goto next;
812
813 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
814 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
815 t_dst_mask(dst.WriteMask));
816
817 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
818 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
819 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
820 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
821 VSF_IN_COMPONENT_ONE,
822 t_src_class(src[0].File),
823 src[0].Negate) | (src[0].RelAddr << 4);
824 o_inst->src1 = t_src(vp, &src[1]);
825 o_inst->src2 = UNUSED_SRC_1;
826 goto next;
827
828 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
829 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
830 t_dst_mask(dst.WriteMask));
831
832 o_inst->src0 = t_src(vp, &src[0]);
833 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
834 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
835 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
836 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
838 t_src_class(src[1].File),
839 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
840 o_inst->src2 = UNUSED_SRC_1;
841 goto next;
842
843 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
844 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
845 t_dst_mask(dst.WriteMask));
846
847 o_inst->src0=t_src(vp, &src[0]);
848 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
849 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
850 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
851 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
852 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
853 t_src_class(src[0].File),
854 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
855 o_inst->src2 = UNUSED_SRC_1;
856 goto next;
857
858 case OPCODE_FLR:
859 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
860 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
861
862 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
863 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
864 t_dst_mask(dst.WriteMask));
865
866 o_inst->src0 = t_src(vp, &src[0]);
867 o_inst->src1 = UNUSED_SRC_0;
868 o_inst->src2 = UNUSED_SRC_1;
869 o_inst++;
870
871 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
872 t_dst_mask(dst.WriteMask));
873
874 o_inst->src0 = t_src(vp, &src[0]);
875 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
876 VSF_IN_COMPONENT_X,
877 VSF_IN_COMPONENT_Y,
878 VSF_IN_COMPONENT_Z,
879 VSF_IN_COMPONENT_W,
880 VSF_IN_CLASS_TMP,
881 /* Not 100% sure about this */
882 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
883
884 o_inst->src2 = UNUSED_SRC_0;
885 u_temp_i--;
886 goto next;
887
888 case OPCODE_XPD:
889 /* mul r0, r1.yzxw, r2.zxyw
890 mad r0, -r2.yzxw, r1.zxyw, r0
891 */
892 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
893 src[1].File == PROGRAM_TEMPORARY &&
894 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
895 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
896
897 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
898 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
899 t_dst_mask(dst.WriteMask));
900
901 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
902 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
903 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
904 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
905 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
906 t_src_class(src[0].File),
907 src[0].Negate) | (src[0].RelAddr << 4);
908
909 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
910 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
911 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
912 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
913 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
914 t_src_class(src[1].File),
915 src[1].Negate) | (src[1].RelAddr << 4);
916
917 o_inst->src2 = UNUSED_SRC_1;
918 o_inst++;
919 u_temp_i--;
920
921 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
922 t_dst_mask(dst.WriteMask));
923
924 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
925 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
926 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
927 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
928 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
929 t_src_class(src[1].File),
930 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
931
932 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
933 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
934 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
935 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
936 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
937 t_src_class(src[0].File),
938 src[0].Negate) | (src[0].RelAddr << 4);
939
940 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
941 VSF_IN_COMPONENT_X,
942 VSF_IN_COMPONENT_Y,
943 VSF_IN_COMPONENT_Z,
944 VSF_IN_COMPONENT_W,
945 VSF_IN_CLASS_TMP,
946 VSF_FLAG_NONE);
947 goto next;
948
949 case OPCODE_END:
950 assert(0);
951 default:
952 break;
953 }
954
955 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
956 t_dst_mask(dst.WriteMask));
957
958 if(are_srcs_scalar){
959 switch(operands){
960 case 1:
961 o_inst->src0 = t_src_scalar(vp, &src[0]);
962 o_inst->src1 = UNUSED_SRC_0;
963 o_inst->src2 = UNUSED_SRC_1;
964 break;
965
966 case 2:
967 o_inst->src0 = t_src_scalar(vp, &src[0]);
968 o_inst->src1 = t_src_scalar(vp, &src[1]);
969 o_inst->src2 = UNUSED_SRC_1;
970 break;
971
972 case 3:
973 o_inst->src0 = t_src_scalar(vp, &src[0]);
974 o_inst->src1 = t_src_scalar(vp, &src[1]);
975 o_inst->src2 = t_src_scalar(vp, &src[2]);
976 break;
977
978 default:
979 fprintf(stderr, "illegal number of operands %lu\n", operands);
980 exit(-1);
981 break;
982 }
983 } else {
984 switch(operands){
985 case 1:
986 o_inst->src0 = t_src(vp, &src[0]);
987 o_inst->src1 = UNUSED_SRC_0;
988 o_inst->src2 = UNUSED_SRC_1;
989 break;
990
991 case 2:
992 o_inst->src0 = t_src(vp, &src[0]);
993 o_inst->src1 = t_src(vp, &src[1]);
994 o_inst->src2 = UNUSED_SRC_1;
995 break;
996
997 case 3:
998 o_inst->src0 = t_src(vp, &src[0]);
999 o_inst->src1 = t_src(vp, &src[1]);
1000 o_inst->src2 = t_src(vp, &src[2]);
1001 break;
1002
1003 default:
1004 fprintf(stderr, "illegal number of operands %lu\n", operands);
1005 exit(-1);
1006 break;
1007 }
1008 }
1009 next:
1010
1011 if (dofogfix) {
1012 o_inst++;
1013 if (vp->fogmode == GL_EXP) {
1014 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1015 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1016 VSF_FLAG_X);
1017 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1018 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1019 o_inst->src2 = UNUSED_SRC_1;
1020 o_inst++;
1021 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1022 R200_VSF_OUT_CLASS_RESULT_FOGC,
1023 VSF_FLAG_X);
1024 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1025 o_inst->src1 = UNUSED_SRC_0;
1026 o_inst->src2 = UNUSED_SRC_1;
1027 }
1028 else if (vp->fogmode == GL_EXP2) {
1029 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1030 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1031 VSF_FLAG_X);
1032 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1033 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1034 o_inst->src2 = UNUSED_SRC_1;
1035 o_inst++;
1036 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1037 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1038 VSF_FLAG_X);
1039 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1040 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1041 o_inst->src2 = UNUSED_SRC_1;
1042 o_inst++;
1043 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1044 R200_VSF_OUT_CLASS_RESULT_FOGC,
1045 VSF_FLAG_X);
1046 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1047 o_inst->src1 = UNUSED_SRC_0;
1048 o_inst->src2 = UNUSED_SRC_1;
1049 }
1050 else { /* fogmode == GL_LINEAR */
1051 /* could do that with single op (dot) if using params like
1052 with fixed function pipeline fog */
1053 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1054 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1055 VSF_FLAG_X);
1056 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1057 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1058 o_inst->src2 = UNUSED_SRC_1;
1059 o_inst++;
1060 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1061 R200_VSF_OUT_CLASS_RESULT_FOGC,
1062 VSF_FLAG_X);
1063 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1064 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1065 o_inst->src2 = UNUSED_SRC_1;
1066
1067 }
1068 dofogfix = 0;
1069 }
1070
1071 u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1072 if (mesa_vp->arb.NumNativeTemporaries <
1073 (mesa_vp->arb.NumTemporaries + u_temp_used)) {
1074 mesa_vp->arb.NumNativeTemporaries =
1075 mesa_vp->arb.NumTemporaries + u_temp_used;
1076 }
1077 if ((mesa_vp->arb.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1078 if (R200_DEBUG & RADEON_FALLBACKS) {
1079 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->arb.NumTemporaries, u_temp_used);
1080 }
1081 return GL_FALSE;
1082 }
1083 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1084 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1085 mesa_vp->arb.NumNativeInstructions = 129;
1086 if (R200_DEBUG & RADEON_FALLBACKS) {
1087 fprintf(stderr, "more than 128 native instructions\n");
1088 }
1089 return GL_FALSE;
1090 }
1091 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1092 vp->pos_end = (o_inst - vp->instr);
1093 }
1094 }
1095
1096 vp->native = GL_TRUE;
1097 mesa_vp->arb.NumNativeInstructions = (o_inst - vp->instr);
1098 #if 0
1099 fprintf(stderr, "hw program:\n");
1100 for(i=0; i < vp->program.length; i++)
1101 fprintf(stderr, "%08x\n", vp->instr[i]);
1102 #endif
1103 return GL_TRUE;
1104 }
1105
r200SetupVertexProg(struct gl_context * ctx)1106 void r200SetupVertexProg( struct gl_context *ctx ) {
1107 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1108 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1109 GLboolean fallback;
1110 GLint i;
1111
1112 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1113 rmesa->curr_vp_hw = NULL;
1114 r200_translate_vertex_program(ctx, vp);
1115 }
1116 /* could optimize setting up vertex progs away for non-tcl hw */
1117 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1118 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1119 if (rmesa->radeon.TclFallback) return;
1120
1121 R200_STATECHANGE( rmesa, vap );
1122 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1123 maybe only when using more than 64 inst / 96 param? */
1124 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1125
1126 R200_STATECHANGE( rmesa, pvs );
1127
1128 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1129 ((vp->mesa_program.arb.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1130 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1131 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1132 (vp->mesa_program.arb.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1133
1134 /* maybe user clip planes just work with vertex progs... untested */
1135 if (ctx->Transform.ClipPlanesEnabled) {
1136 R200_STATECHANGE( rmesa, tcl );
1137 if (vp->mesa_program.arb.IsPositionInvariant) {
1138 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1139 }
1140 else {
1141 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1142 }
1143 }
1144
1145 if (vp != rmesa->curr_vp_hw) {
1146 GLuint count = vp->mesa_program.arb.NumNativeInstructions;
1147 drm_radeon_cmd_header_t tmp;
1148
1149 R200_STATECHANGE( rmesa, vpi[0] );
1150 R200_STATECHANGE( rmesa, vpi[1] );
1151
1152 /* FIXME: what about using a memcopy... */
1153 for (i = 0; (i < 64) && i < count; i++) {
1154 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1155 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1156 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1157 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1158 }
1159 /* hack up the cmd_size so not the whole state atom is emitted always.
1160 This may require some more thought, we may emit half progs on lost state, but
1161 hopefully it won't matter?
1162 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1163 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1164 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1165 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1166 tmp.veclinear.count = (count > 64) ? 64 : count;
1167 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1168 if (count > 64) {
1169 for (i = 0; i < (count - 64); i++) {
1170 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1171 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1172 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1173 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1174 }
1175 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1176 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1177 tmp.veclinear.count = count - 64;
1178 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1179 }
1180 rmesa->curr_vp_hw = vp;
1181 }
1182 }
1183
1184
1185 static struct gl_program *
r200NewProgram(struct gl_context * ctx,gl_shader_stage stage,GLuint id,bool is_arb_asm)1186 r200NewProgram(struct gl_context *ctx, gl_shader_stage stage, GLuint id,
1187 bool is_arb_asm)
1188 {
1189 switch(stage){
1190 case MESA_SHADER_VERTEX: {
1191 struct r200_vertex_program *vp = rzalloc(NULL,
1192 struct r200_vertex_program);
1193 return _mesa_init_gl_program(&vp->mesa_program, stage, id, is_arb_asm);
1194 }
1195 case MESA_SHADER_FRAGMENT: {
1196 struct gl_program *prog = rzalloc(NULL, struct gl_program);
1197 return _mesa_init_gl_program(prog, stage, id, is_arb_asm);
1198 }
1199 default:
1200 _mesa_problem(ctx, "Bad target in r200NewProgram");
1201 return NULL;
1202 }
1203 }
1204
1205
1206 static void
r200DeleteProgram(struct gl_context * ctx,struct gl_program * prog)1207 r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1208 {
1209 _mesa_delete_program(ctx, prog);
1210 }
1211
1212 static GLboolean
r200ProgramStringNotify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1213 r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1214 {
1215 struct r200_vertex_program *vp = (void *)prog;
1216 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1217
1218 switch(target) {
1219 case GL_VERTEX_PROGRAM_ARB:
1220 vp->translated = GL_FALSE;
1221 vp->fogpidx = 0;
1222 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_program));*/
1223 r200_translate_vertex_program(ctx, vp);
1224 rmesa->curr_vp_hw = NULL;
1225 break;
1226 case GL_FRAGMENT_SHADER_ATI:
1227 rmesa->afs_loaded = NULL;
1228 break;
1229 }
1230 /* need this for tcl fallbacks */
1231 (void) _tnl_program_string(ctx, target, prog);
1232
1233 /* XXX check if program is legal, within limits */
1234 return GL_TRUE;
1235 }
1236
1237 static GLboolean
r200IsProgramNative(struct gl_context * ctx,GLenum target,struct gl_program * prog)1238 r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1239 {
1240 struct r200_vertex_program *vp = (void *)prog;
1241
1242 switch(target){
1243 case GL_VERTEX_PROGRAM_ARB:
1244 if (!vp->translated) {
1245 r200_translate_vertex_program(ctx, vp);
1246 }
1247 /* does not take parameters etc. into account */
1248 return vp->native;
1249 default:
1250 _mesa_problem(ctx, "Bad target in r200NewProgram");
1251 }
1252 return 0;
1253 }
1254
r200InitShaderFuncs(struct dd_function_table * functions)1255 void r200InitShaderFuncs(struct dd_function_table *functions)
1256 {
1257 functions->NewProgram = r200NewProgram;
1258 functions->DeleteProgram = r200DeleteProgram;
1259 functions->ProgramStringNotify = r200ProgramStringNotify;
1260 functions->IsProgramNative = r200IsProgramNative;
1261 }
1262