1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
41 #include "tnl/tnl.h"
42
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
59 #endif
60
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66 static struct{
67 char *name;
68 int opcode;
69 unsigned long ip; /* number of input operands and flags */
70 }op_names[]={
71 OPN(ABS, 1),
72 OPN(ADD, 2),
73 OPN(ARL, 1|SCALAR_FLAG),
74 OPN(DP3, 2),
75 OPN(DP4, 2),
76 OPN(DPH, 2),
77 OPN(DST, 2),
78 OPN(EX2, 1|SCALAR_FLAG),
79 OPN(EXP, 1|SCALAR_FLAG),
80 OPN(FLR, 1),
81 OPN(FRC, 1),
82 OPN(LG2, 1|SCALAR_FLAG),
83 OPN(LIT, 1),
84 OPN(LOG, 1|SCALAR_FLAG),
85 OPN(MAD, 3),
86 OPN(MAX, 2),
87 OPN(MIN, 2),
88 OPN(MOV, 1),
89 OPN(MUL, 2),
90 OPN(POW, 2|SCALAR_FLAG),
91 OPN(RCP, 1|SCALAR_FLAG),
92 OPN(RSQ, 1|SCALAR_FLAG),
93 OPN(SGE, 2),
94 OPN(SLT, 2),
95 OPN(SUB, 2),
96 OPN(SWZ, 1),
97 OPN(XPD, 2),
98 OPN(END, 0),
99 };
100 #undef OPN
101
r200VertexProgUpdateParams(struct gl_context * ctx,struct r200_vertex_program * vp)102 static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
103 {
104 r200ContextPtr rmesa = R200_CONTEXT( ctx );
105 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
106 int pi;
107 struct gl_program *mesa_vp = &vp->mesa_program;
108 struct gl_program_parameter_list *paramList;
109 drm_radeon_cmd_header_t tmp;
110
111 R200_STATECHANGE( rmesa, vpp[0] );
112 R200_STATECHANGE( rmesa, vpp[1] );
113 assert(mesa_vp->Parameters);
114 _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
115 paramList = mesa_vp->Parameters;
116
117 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
118 fprintf(stderr, "%s:Params exhausted\n", __func__);
119 return GL_FALSE;
120 }
121
122 for(pi = 0; pi < paramList->NumParameters; pi++) {
123 switch(paramList->Parameters[pi].Type) {
124 case PROGRAM_STATE_VAR:
125 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
126 case PROGRAM_CONSTANT:
127 *fcmd++ = paramList->ParameterValues[pi][0].f;
128 *fcmd++ = paramList->ParameterValues[pi][1].f;
129 *fcmd++ = paramList->ParameterValues[pi][2].f;
130 *fcmd++ = paramList->ParameterValues[pi][3].f;
131 break;
132 default:
133 _mesa_problem(NULL, "Bad param type in %s", __func__);
134 break;
135 }
136 if (pi == 95) {
137 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
138 }
139 }
140 /* hack up the cmd_size so not the whole state atom is emitted always. */
141 rmesa->hw.vpp[0].cmd_size =
142 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
143 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
144 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
145 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
146 if (paramList->NumParameters > 96) {
147 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
148 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
149 tmp.veclinear.count = paramList->NumParameters - 96;
150 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
151 }
152 return GL_TRUE;
153 }
154
t_dst_mask(GLuint mask)155 static inline unsigned long t_dst_mask(GLuint mask)
156 {
157 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
158 return mask & VSF_FLAG_ALL;
159 }
160
t_dst(struct prog_dst_register * dst)161 static unsigned long t_dst(struct prog_dst_register *dst)
162 {
163 switch(dst->File) {
164 case PROGRAM_TEMPORARY:
165 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
166 | R200_VSF_OUT_CLASS_TMP);
167 case PROGRAM_OUTPUT:
168 switch (dst->Index) {
169 case VARYING_SLOT_POS:
170 return R200_VSF_OUT_CLASS_RESULT_POS;
171 case VARYING_SLOT_COL0:
172 return R200_VSF_OUT_CLASS_RESULT_COLOR;
173 case VARYING_SLOT_COL1:
174 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
175 | R200_VSF_OUT_CLASS_RESULT_COLOR);
176 case VARYING_SLOT_FOGC:
177 return R200_VSF_OUT_CLASS_RESULT_FOGC;
178 case VARYING_SLOT_TEX0:
179 case VARYING_SLOT_TEX1:
180 case VARYING_SLOT_TEX2:
181 case VARYING_SLOT_TEX3:
182 case VARYING_SLOT_TEX4:
183 case VARYING_SLOT_TEX5:
184 return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
185 | R200_VSF_OUT_CLASS_RESULT_TEXC);
186 case VARYING_SLOT_PSIZ:
187 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
188 default:
189 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __func__, dst->Index);
190 exit(0);
191 return 0;
192 }
193 case PROGRAM_ADDRESS:
194 assert (dst->Index == 0);
195 return R200_VSF_OUT_CLASS_ADDR;
196 default:
197 fprintf(stderr, "problem in %s, unknown register type %d\n", __func__, dst->File);
198 exit(0);
199 return 0;
200 }
201 }
202
t_src_class(gl_register_file file)203 static unsigned long t_src_class(gl_register_file file)
204 {
205
206 switch(file){
207 case PROGRAM_TEMPORARY:
208 return VSF_IN_CLASS_TMP;
209
210 case PROGRAM_INPUT:
211 return VSF_IN_CLASS_ATTR;
212
213 case PROGRAM_CONSTANT:
214 case PROGRAM_STATE_VAR:
215 return VSF_IN_CLASS_PARAM;
216 /*
217 case PROGRAM_OUTPUT:
218 case PROGRAM_ADDRESS:
219 */
220 default:
221 fprintf(stderr, "problem in %s", __func__);
222 exit(0);
223 }
224 }
225
t_swizzle(GLubyte swizzle)226 static inline unsigned long t_swizzle(GLubyte swizzle)
227 {
228 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
229 return swizzle;
230 }
231
232 #if 0
233 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
234 {
235 int i;
236
237 if(vp == NULL){
238 fprintf(stderr, "vp null in call to %s from %s\n", __func__, caller);
239 return ;
240 }
241
242 fprintf(stderr, "%s:<", caller);
243 for(i=0; i < VERT_ATTRIB_MAX; i++)
244 fprintf(stderr, "%d ", vp->inputs[i]);
245 fprintf(stderr, ">\n");
246
247 }
248 #endif
249
t_src_index(struct r200_vertex_program * vp,struct prog_src_register * src)250 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
251 {
252 /*
253 int i;
254 int max_reg = -1;
255 */
256 if(src->File == PROGRAM_INPUT){
257 /* if(vp->inputs[src->Index] != -1)
258 return vp->inputs[src->Index];
259
260 for(i=0; i < VERT_ATTRIB_MAX; i++)
261 if(vp->inputs[i] > max_reg)
262 max_reg = vp->inputs[i];
263
264 vp->inputs[src->Index] = max_reg+1;*/
265
266 //vp_dump_inputs(vp, __func__);
267 assert(vp->inputs[src->Index] != -1);
268 return vp->inputs[src->Index];
269 } else {
270 if (src->Index < 0) {
271 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
272 return 0;
273 }
274 return src->Index;
275 }
276 }
277
t_src(struct r200_vertex_program * vp,struct prog_src_register * src)278 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
279 {
280
281 return MAKE_VSF_SOURCE(t_src_index(vp, src),
282 t_swizzle(GET_SWZ(src->Swizzle, 0)),
283 t_swizzle(GET_SWZ(src->Swizzle, 1)),
284 t_swizzle(GET_SWZ(src->Swizzle, 2)),
285 t_swizzle(GET_SWZ(src->Swizzle, 3)),
286 t_src_class(src->File),
287 src->Negate) | (src->RelAddr << 4);
288 }
289
t_src_scalar(struct r200_vertex_program * vp,struct prog_src_register * src)290 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
291 {
292
293 return MAKE_VSF_SOURCE(t_src_index(vp, src),
294 t_swizzle(GET_SWZ(src->Swizzle, 0)),
295 t_swizzle(GET_SWZ(src->Swizzle, 0)),
296 t_swizzle(GET_SWZ(src->Swizzle, 0)),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_src_class(src->File),
299 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
300 }
301
t_opcode(enum prog_opcode opcode)302 static unsigned long t_opcode(enum prog_opcode opcode)
303 {
304
305 switch(opcode){
306 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
307 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
308 * seems to ignore neg offsets which isn't quite correct...
309 */
310 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
311 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
312 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
313 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
314 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
315 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
316 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
317 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
318 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
319 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
320 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
321 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
322 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
323 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
324 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
325 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
326
327 default:
328 fprintf(stderr, "%s: Should not be called with opcode %d!", __func__, opcode);
329 }
330 exit(-1);
331 return 0;
332 }
333
op_operands(enum prog_opcode opcode)334 static unsigned long op_operands(enum prog_opcode opcode)
335 {
336 int i;
337
338 /* Can we trust mesas opcodes to be in order ? */
339 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
340 if(op_names[i].opcode == opcode)
341 return op_names[i].ip;
342
343 fprintf(stderr, "op %d not found in op_names\n", opcode);
344 exit(-1);
345 return 0;
346 }
347
348 /* TODO: Get rid of t_src_class call */
349 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
350 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
351 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
352 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
353 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))
354
355 /* fglrx on rv250 codes up unused sources as follows:
356 unused but necessary sources are same as previous source, zero-ed out.
357 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
358 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
359 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
360
361 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
362 Those are NOT semantically equivalent to the r300 ones, requires code changes */
363 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
364 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
365 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
366 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
368
369 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374
375 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380
381 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
382
383 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
384
385 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
386
387
388 /**
389 * Generate an R200 vertex program from Mesa's internal representation.
390 *
391 * \return GL_TRUE for success, GL_FALSE for failure.
392 */
r200_translate_vertex_program(struct gl_context * ctx,struct r200_vertex_program * vp)393 static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
394 {
395 struct gl_program *mesa_vp = &vp->mesa_program;
396 struct prog_instruction *vpi;
397 int i;
398 VERTEX_SHADER_INSTRUCTION *o_inst;
399 unsigned long operands;
400 int are_srcs_scalar;
401 unsigned long hw_op;
402 int dofogfix = 0;
403 int fog_temp_i = 0;
404 int free_inputs;
405 int array_count = 0;
406 int u_temp_used;
407
408 vp->native = GL_FALSE;
409 vp->translated = GL_TRUE;
410 vp->fogmode = ctx->Fog.Mode;
411
412 if (mesa_vp->arb.NumInstructions == 0)
413 return GL_FALSE;
414
415 #if 0
416 if ((mesa_vp->info.inputs_read &
417 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
418 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
419 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
420 if (R200_DEBUG & RADEON_FALLBACKS) {
421 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
422 mesa_vp->info.inputs_read);
423 }
424 return GL_FALSE;
425 }
426 #endif
427
428 if ((mesa_vp->info.outputs_written &
429 ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
430 (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
431 (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
432 (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
433 if (R200_DEBUG & RADEON_FALLBACKS) {
434 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
435 (unsigned long long) mesa_vp->info.outputs_written);
436 }
437 return GL_FALSE;
438 }
439
440 /* Initial value should be last tmp reg that hw supports.
441 Strangely enough r300 doesnt mind even though these would be out of range.
442 Smart enough to realize that it doesnt need it? */
443 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
444 struct prog_src_register src[3];
445 struct prog_dst_register dst;
446
447 /* FIXME: is changing the prog safe to do here? */
448 if (mesa_vp->arb.IsPositionInvariant &&
449 /* make sure we only do this once */
450 !(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
451 _mesa_insert_mvp_code(ctx, mesa_vp);
452 }
453
454 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
455 base e isn't directly available neither. */
456 if ((mesa_vp->info.outputs_written & (1 << VARYING_SLOT_FOGC)) &&
457 !vp->fogpidx) {
458 struct gl_program_parameter_list *paramList;
459 gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
460 paramList = mesa_vp->Parameters;
461 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
462 }
463
464 vp->pos_end = 0;
465 mesa_vp->arb.NumNativeInstructions = 0;
466 if (mesa_vp->Parameters)
467 mesa_vp->arb.NumNativeParameters = mesa_vp->Parameters->NumParameters;
468 else
469 mesa_vp->arb.NumNativeParameters = 0;
470
471 for(i = 0; i < VERT_ATTRIB_MAX; i++)
472 vp->inputs[i] = -1;
473 for(i = 0; i < 15; i++)
474 vp->inputmap_rev[i] = 255;
475 free_inputs = 0x2ffd;
476
477 /* fglrx uses fixed inputs as follows for conventional attribs.
478 generic attribs use non-fixed assignment, fglrx will always use the
479 lowest attrib values available. We'll just do the same.
480 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
481 and 13 in a hw vertex prog.
482 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
483 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
484 Additionally, not more than 12 arrays in total are possible I think.
485 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
486 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
487 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
488 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
489 */
490
491 /* attr 4,5 and 13 are only used with generic attribs.
492 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
493 not possibe to use with vertex progs as it is lacking in vert prog specification) */
494 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
495 if (mesa_vp->info.inputs_read & VERT_BIT_POS) {
496 vp->inputs[VERT_ATTRIB_POS] = 0;
497 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
498 free_inputs &= ~(1 << 0);
499 array_count++;
500 }
501 if (mesa_vp->info.inputs_read & VERT_BIT_NORMAL) {
502 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
503 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
504 array_count++;
505 }
506 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR0) {
507 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
508 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
509 free_inputs &= ~(1 << 2);
510 array_count++;
511 }
512 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR1) {
513 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
514 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
515 free_inputs &= ~(1 << 3);
516 array_count++;
517 }
518 if (mesa_vp->info.inputs_read & VERT_BIT_FOG) {
519 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
520 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
521 array_count++;
522 }
523 /* VERT_ATTRIB_TEX0-5 */
524 for (i = 0; i <= 5; i++) {
525 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
526 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
527 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
528 free_inputs &= ~(1 << (i + 6));
529 array_count++;
530 }
531 }
532 /* using VERT_ATTRIB_TEX6/7 would be illegal */
533 for (; i < VERT_ATTRIB_TEX_MAX; i++) {
534 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
535 if (R200_DEBUG & RADEON_FALLBACKS) {
536 fprintf(stderr, "texture attribute %d in vert prog\n", i);
537 }
538 return GL_FALSE;
539 }
540 }
541 /* completely ignore aliasing? */
542 for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
543 int j;
544 /* completely ignore aliasing? */
545 if (mesa_vp->info.inputs_read & VERT_BIT_GENERIC(i)) {
546 array_count++;
547 if (array_count > 12) {
548 if (R200_DEBUG & RADEON_FALLBACKS) {
549 fprintf(stderr, "more than 12 attribs used in vert prog\n");
550 }
551 return GL_FALSE;
552 }
553 for (j = 0; j < 14; j++) {
554 /* will always find one due to limited array_count */
555 if (free_inputs & (1 << j)) {
556 free_inputs &= ~(1 << j);
557 vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
558 if (j == 0) {
559 /* mapped to pos */
560 vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
561 } else if (j < 12) {
562 /* mapped to col/tex */
563 vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
564 } else {
565 /* mapped to pos1 */
566 vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
567 }
568 break;
569 }
570 }
571 }
572 }
573
574 if (!(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
575 if (R200_DEBUG & RADEON_FALLBACKS) {
576 fprintf(stderr, "can't handle vert prog without position output\n");
577 }
578 return GL_FALSE;
579 }
580 if (free_inputs & 1) {
581 if (R200_DEBUG & RADEON_FALLBACKS) {
582 fprintf(stderr, "can't handle vert prog without position input\n");
583 }
584 return GL_FALSE;
585 }
586
587 o_inst = vp->instr;
588 for (vpi = mesa_vp->arb.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
589 operands = op_operands(vpi->Opcode);
590 are_srcs_scalar = operands & SCALAR_FLAG;
591 operands &= OP_MASK;
592
593 for(i = 0; i < operands; i++) {
594 src[i] = vpi->SrcReg[i];
595 /* hack up default attrib values as per spec as swizzling.
596 normal, fog, secondary color. Crazy?
597 May need more if we don't submit vec4 elements? */
598 if (src[i].File == PROGRAM_INPUT) {
599 if (src[i].Index == VERT_ATTRIB_NORMAL) {
600 int j;
601 for (j = 0; j < 4; j++) {
602 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
603 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
604 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
605 }
606 }
607 }
608 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
609 int j;
610 for (j = 0; j < 4; j++) {
611 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
612 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
613 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
614 }
615 }
616 }
617 else if (src[i].Index == VERT_ATTRIB_FOG) {
618 int j;
619 for (j = 0; j < 4; j++) {
620 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
621 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
622 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
623 }
624 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
625 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
626 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
627 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
628 }
629 }
630 }
631 }
632 }
633
634 if(operands == 3){
635 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
636 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
637 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
638 VSF_FLAG_ALL);
639
640 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
641 SWIZZLE_X, SWIZZLE_Y,
642 SWIZZLE_Z, SWIZZLE_W,
643 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
644
645 o_inst->src1 = ZERO_SRC_0;
646 o_inst->src2 = UNUSED_SRC_1;
647 o_inst++;
648
649 src[2].File = PROGRAM_TEMPORARY;
650 src[2].Index = u_temp_i;
651 src[2].RelAddr = 0;
652 u_temp_i--;
653 }
654 }
655
656 if(operands >= 2){
657 if( CMP_SRCS(src[1], src[0]) ){
658 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
659 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
660 VSF_FLAG_ALL);
661
662 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
663 SWIZZLE_X, SWIZZLE_Y,
664 SWIZZLE_Z, SWIZZLE_W,
665 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
666
667 o_inst->src1 = ZERO_SRC_0;
668 o_inst->src2 = UNUSED_SRC_1;
669 o_inst++;
670
671 src[0].File = PROGRAM_TEMPORARY;
672 src[0].Index = u_temp_i;
673 src[0].RelAddr = 0;
674 u_temp_i--;
675 }
676 }
677
678 dst = vpi->DstReg;
679 if (dst.File == PROGRAM_OUTPUT &&
680 dst.Index == VARYING_SLOT_FOGC &&
681 dst.WriteMask & WRITEMASK_X) {
682 fog_temp_i = u_temp_i;
683 dst.File = PROGRAM_TEMPORARY;
684 dst.Index = fog_temp_i;
685 dofogfix = 1;
686 u_temp_i--;
687 }
688
689 /* These ops need special handling. */
690 switch(vpi->Opcode){
691 case OPCODE_POW:
692 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
693 So may need to insert additional instruction */
694 if ((src[0].File == src[1].File) &&
695 (src[0].Index == src[1].Index)) {
696 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
697 t_dst_mask(dst.WriteMask));
698 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
699 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
700 SWIZZLE_ZERO,
701 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
702 SWIZZLE_ZERO,
703 t_src_class(src[0].File),
704 src[0].Negate) | (src[0].RelAddr << 4);
705 o_inst->src1 = UNUSED_SRC_0;
706 o_inst->src2 = UNUSED_SRC_0;
707 }
708 else {
709 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
710 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
711 VSF_FLAG_ALL);
712 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
713 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
714 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
715 t_src_class(src[0].File),
716 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
717 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
718 SWIZZLE_ZERO, SWIZZLE_ZERO,
719 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
720 t_src_class(src[1].File),
721 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
722 o_inst->src2 = UNUSED_SRC_1;
723 o_inst++;
724
725 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
726 t_dst_mask(dst.WriteMask));
727 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
728 VSF_IN_COMPONENT_X,
729 VSF_IN_COMPONENT_Y,
730 VSF_IN_COMPONENT_Z,
731 VSF_IN_COMPONENT_W,
732 VSF_IN_CLASS_TMP,
733 VSF_FLAG_NONE);
734 o_inst->src1 = UNUSED_SRC_0;
735 o_inst->src2 = UNUSED_SRC_0;
736 u_temp_i--;
737 }
738 goto next;
739
740 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
741 case OPCODE_SWZ:
742 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
743 t_dst_mask(dst.WriteMask));
744 o_inst->src0 = t_src(vp, &src[0]);
745 o_inst->src1 = ZERO_SRC_0;
746 o_inst->src2 = UNUSED_SRC_1;
747 goto next;
748
749 case OPCODE_MAD:
750 /* only 2 read ports into temp memory thus may need the macro op MAD_2
751 instead (requiring 2 clocks) if all inputs are in temp memory
752 (and, only if they actually reference 3 distinct temps) */
753 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
754 src[1].File == PROGRAM_TEMPORARY &&
755 src[2].File == PROGRAM_TEMPORARY &&
756 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
757 (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
758 (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
759 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
760
761 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
762 t_dst_mask(dst.WriteMask));
763 o_inst->src0 = t_src(vp, &src[0]);
764 #if 0
765 if ((o_inst - vp->instr) == 31) {
766 /* fix up the broken vertex program of quake4 demo... */
767 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
768 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
769 t_src_class(src[1].File),
770 src[1].Negate) | (src[1].RelAddr << 4);
771 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
772 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
773 t_src_class(src[1].File),
774 src[1].Negate) | (src[1].RelAddr << 4);
775 }
776 else {
777 o_inst->src1 = t_src(vp, &src[1]);
778 o_inst->src2 = t_src(vp, &src[2]);
779 }
780 #else
781 o_inst->src1 = t_src(vp, &src[1]);
782 o_inst->src2 = t_src(vp, &src[2]);
783 #endif
784 goto next;
785
786 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
787 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
788 t_dst_mask(dst.WriteMask));
789
790 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
791 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
792 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
793 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
794 SWIZZLE_ZERO,
795 t_src_class(src[0].File),
796 src[0].Negate) | (src[0].RelAddr << 4);
797
798 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
799 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
800 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
801 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
802 SWIZZLE_ZERO,
803 t_src_class(src[1].File),
804 src[1].Negate) | (src[1].RelAddr << 4);
805
806 o_inst->src2 = UNUSED_SRC_1;
807 goto next;
808
809 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
810 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
811 t_dst_mask(dst.WriteMask));
812
813 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
814 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
815 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
816 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
817 VSF_IN_COMPONENT_ONE,
818 t_src_class(src[0].File),
819 src[0].Negate) | (src[0].RelAddr << 4);
820 o_inst->src1 = t_src(vp, &src[1]);
821 o_inst->src2 = UNUSED_SRC_1;
822 goto next;
823
824 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
825 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
826 t_dst_mask(dst.WriteMask));
827
828 o_inst->src0 = t_src(vp, &src[0]);
829 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
830 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
831 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
832 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
833 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
834 t_src_class(src[1].File),
835 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
836 o_inst->src2 = UNUSED_SRC_1;
837 goto next;
838
839 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
840 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
841 t_dst_mask(dst.WriteMask));
842
843 o_inst->src0=t_src(vp, &src[0]);
844 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
845 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
846 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
847 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
848 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
849 t_src_class(src[0].File),
850 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
851 o_inst->src2 = UNUSED_SRC_1;
852 goto next;
853
854 case OPCODE_FLR:
855 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
856 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
857
858 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
859 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
860 t_dst_mask(dst.WriteMask));
861
862 o_inst->src0 = t_src(vp, &src[0]);
863 o_inst->src1 = UNUSED_SRC_0;
864 o_inst->src2 = UNUSED_SRC_1;
865 o_inst++;
866
867 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
868 t_dst_mask(dst.WriteMask));
869
870 o_inst->src0 = t_src(vp, &src[0]);
871 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
872 VSF_IN_COMPONENT_X,
873 VSF_IN_COMPONENT_Y,
874 VSF_IN_COMPONENT_Z,
875 VSF_IN_COMPONENT_W,
876 VSF_IN_CLASS_TMP,
877 /* Not 100% sure about this */
878 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
879
880 o_inst->src2 = UNUSED_SRC_0;
881 u_temp_i--;
882 goto next;
883
884 case OPCODE_XPD:
885 /* mul r0, r1.yzxw, r2.zxyw
886 mad r0, -r2.yzxw, r1.zxyw, r0
887 */
888 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
889 src[1].File == PROGRAM_TEMPORARY &&
890 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
891 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
892
893 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
894 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
895 t_dst_mask(dst.WriteMask));
896
897 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
898 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
899 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
900 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
901 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
902 t_src_class(src[0].File),
903 src[0].Negate) | (src[0].RelAddr << 4);
904
905 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
906 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
907 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
908 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
909 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
910 t_src_class(src[1].File),
911 src[1].Negate) | (src[1].RelAddr << 4);
912
913 o_inst->src2 = UNUSED_SRC_1;
914 o_inst++;
915 u_temp_i--;
916
917 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
918 t_dst_mask(dst.WriteMask));
919
920 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
921 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
922 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
923 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
924 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
925 t_src_class(src[1].File),
926 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
927
928 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
929 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
930 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
931 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
932 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
933 t_src_class(src[0].File),
934 src[0].Negate) | (src[0].RelAddr << 4);
935
936 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
937 VSF_IN_COMPONENT_X,
938 VSF_IN_COMPONENT_Y,
939 VSF_IN_COMPONENT_Z,
940 VSF_IN_COMPONENT_W,
941 VSF_IN_CLASS_TMP,
942 VSF_FLAG_NONE);
943 goto next;
944
945 case OPCODE_END:
946 assert(0);
947 default:
948 break;
949 }
950
951 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
952 t_dst_mask(dst.WriteMask));
953
954 if(are_srcs_scalar){
955 switch(operands){
956 case 1:
957 o_inst->src0 = t_src_scalar(vp, &src[0]);
958 o_inst->src1 = UNUSED_SRC_0;
959 o_inst->src2 = UNUSED_SRC_1;
960 break;
961
962 case 2:
963 o_inst->src0 = t_src_scalar(vp, &src[0]);
964 o_inst->src1 = t_src_scalar(vp, &src[1]);
965 o_inst->src2 = UNUSED_SRC_1;
966 break;
967
968 case 3:
969 o_inst->src0 = t_src_scalar(vp, &src[0]);
970 o_inst->src1 = t_src_scalar(vp, &src[1]);
971 o_inst->src2 = t_src_scalar(vp, &src[2]);
972 break;
973
974 default:
975 fprintf(stderr, "illegal number of operands %lu\n", operands);
976 exit(-1);
977 break;
978 }
979 } else {
980 switch(operands){
981 case 1:
982 o_inst->src0 = t_src(vp, &src[0]);
983 o_inst->src1 = UNUSED_SRC_0;
984 o_inst->src2 = UNUSED_SRC_1;
985 break;
986
987 case 2:
988 o_inst->src0 = t_src(vp, &src[0]);
989 o_inst->src1 = t_src(vp, &src[1]);
990 o_inst->src2 = UNUSED_SRC_1;
991 break;
992
993 case 3:
994 o_inst->src0 = t_src(vp, &src[0]);
995 o_inst->src1 = t_src(vp, &src[1]);
996 o_inst->src2 = t_src(vp, &src[2]);
997 break;
998
999 default:
1000 fprintf(stderr, "illegal number of operands %lu\n", operands);
1001 exit(-1);
1002 break;
1003 }
1004 }
1005 next:
1006
1007 if (dofogfix) {
1008 o_inst++;
1009 if (vp->fogmode == GL_EXP) {
1010 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1011 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1012 VSF_FLAG_X);
1013 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1014 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1015 o_inst->src2 = UNUSED_SRC_1;
1016 o_inst++;
1017 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1018 R200_VSF_OUT_CLASS_RESULT_FOGC,
1019 VSF_FLAG_X);
1020 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1021 o_inst->src1 = UNUSED_SRC_0;
1022 o_inst->src2 = UNUSED_SRC_1;
1023 }
1024 else if (vp->fogmode == GL_EXP2) {
1025 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1026 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1027 VSF_FLAG_X);
1028 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1029 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1030 o_inst->src2 = UNUSED_SRC_1;
1031 o_inst++;
1032 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1033 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1034 VSF_FLAG_X);
1035 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1036 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1037 o_inst->src2 = UNUSED_SRC_1;
1038 o_inst++;
1039 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1040 R200_VSF_OUT_CLASS_RESULT_FOGC,
1041 VSF_FLAG_X);
1042 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1043 o_inst->src1 = UNUSED_SRC_0;
1044 o_inst->src2 = UNUSED_SRC_1;
1045 }
1046 else { /* fogmode == GL_LINEAR */
1047 /* could do that with single op (dot) if using params like
1048 with fixed function pipeline fog */
1049 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1050 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1051 VSF_FLAG_X);
1052 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1053 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1054 o_inst->src2 = UNUSED_SRC_1;
1055 o_inst++;
1056 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1057 R200_VSF_OUT_CLASS_RESULT_FOGC,
1058 VSF_FLAG_X);
1059 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1060 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1061 o_inst->src2 = UNUSED_SRC_1;
1062
1063 }
1064 dofogfix = 0;
1065 }
1066
1067 u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1068 if (mesa_vp->arb.NumNativeTemporaries <
1069 (mesa_vp->arb.NumTemporaries + u_temp_used)) {
1070 mesa_vp->arb.NumNativeTemporaries =
1071 mesa_vp->arb.NumTemporaries + u_temp_used;
1072 }
1073 if ((mesa_vp->arb.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1074 if (R200_DEBUG & RADEON_FALLBACKS) {
1075 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->arb.NumTemporaries, u_temp_used);
1076 }
1077 return GL_FALSE;
1078 }
1079 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1080 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1081 mesa_vp->arb.NumNativeInstructions = 129;
1082 if (R200_DEBUG & RADEON_FALLBACKS) {
1083 fprintf(stderr, "more than 128 native instructions\n");
1084 }
1085 return GL_FALSE;
1086 }
1087 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1088 vp->pos_end = (o_inst - vp->instr);
1089 }
1090 }
1091
1092 vp->native = GL_TRUE;
1093 mesa_vp->arb.NumNativeInstructions = (o_inst - vp->instr);
1094 #if 0
1095 fprintf(stderr, "hw program:\n");
1096 for(i=0; i < vp->program.length; i++)
1097 fprintf(stderr, "%08x\n", vp->instr[i]);
1098 #endif
1099 return GL_TRUE;
1100 }
1101
r200SetupVertexProg(struct gl_context * ctx)1102 void r200SetupVertexProg( struct gl_context *ctx ) {
1103 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1104 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1105 GLboolean fallback;
1106 GLint i;
1107
1108 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1109 rmesa->curr_vp_hw = NULL;
1110 r200_translate_vertex_program(ctx, vp);
1111 }
1112 /* could optimize setting up vertex progs away for non-tcl hw */
1113 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1114 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1115 if (rmesa->radeon.TclFallback) return;
1116
1117 R200_STATECHANGE( rmesa, vap );
1118 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1119 maybe only when using more than 64 inst / 96 param? */
1120 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1121
1122 R200_STATECHANGE( rmesa, pvs );
1123
1124 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1125 ((vp->mesa_program.arb.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1126 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1127 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1128 (vp->mesa_program.arb.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1129
1130 /* maybe user clip planes just work with vertex progs... untested */
1131 if (ctx->Transform.ClipPlanesEnabled) {
1132 R200_STATECHANGE( rmesa, tcl );
1133 if (vp->mesa_program.arb.IsPositionInvariant) {
1134 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1135 }
1136 else {
1137 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1138 }
1139 }
1140
1141 if (vp != rmesa->curr_vp_hw) {
1142 GLuint count = vp->mesa_program.arb.NumNativeInstructions;
1143 drm_radeon_cmd_header_t tmp;
1144
1145 R200_STATECHANGE( rmesa, vpi[0] );
1146 R200_STATECHANGE( rmesa, vpi[1] );
1147
1148 /* FIXME: what about using a memcopy... */
1149 for (i = 0; (i < 64) && i < count; i++) {
1150 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1151 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1152 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1153 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1154 }
1155 /* hack up the cmd_size so not the whole state atom is emitted always.
1156 This may require some more thought, we may emit half progs on lost state, but
1157 hopefully it won't matter?
1158 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1159 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1160 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1161 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1162 tmp.veclinear.count = (count > 64) ? 64 : count;
1163 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1164 if (count > 64) {
1165 for (i = 0; i < (count - 64); i++) {
1166 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1167 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1168 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1169 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1170 }
1171 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1172 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1173 tmp.veclinear.count = count - 64;
1174 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1175 }
1176 rmesa->curr_vp_hw = vp;
1177 }
1178 }
1179
1180
1181 static struct gl_program *
r200NewProgram(struct gl_context * ctx,GLenum target,GLuint id,bool is_arb_asm)1182 r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id,
1183 bool is_arb_asm)
1184 {
1185 switch(target){
1186 case GL_VERTEX_PROGRAM_ARB: {
1187 struct r200_vertex_program *vp = rzalloc(NULL,
1188 struct r200_vertex_program);
1189 return _mesa_init_gl_program(&vp->mesa_program, target, id, is_arb_asm);
1190 }
1191 case GL_FRAGMENT_PROGRAM_ARB: {
1192 struct gl_program *prog = rzalloc(NULL, struct gl_program);
1193 return _mesa_init_gl_program(prog, target, id, is_arb_asm);
1194 }
1195 default:
1196 _mesa_problem(ctx, "Bad target in r200NewProgram");
1197 return NULL;
1198 }
1199 }
1200
1201
1202 static void
r200DeleteProgram(struct gl_context * ctx,struct gl_program * prog)1203 r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1204 {
1205 _mesa_delete_program(ctx, prog);
1206 }
1207
1208 static GLboolean
r200ProgramStringNotify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1209 r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1210 {
1211 struct r200_vertex_program *vp = (void *)prog;
1212 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1213
1214 switch(target) {
1215 case GL_VERTEX_PROGRAM_ARB:
1216 vp->translated = GL_FALSE;
1217 vp->fogpidx = 0;
1218 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_program));*/
1219 r200_translate_vertex_program(ctx, vp);
1220 rmesa->curr_vp_hw = NULL;
1221 break;
1222 case GL_FRAGMENT_SHADER_ATI:
1223 rmesa->afs_loaded = NULL;
1224 break;
1225 }
1226 /* need this for tcl fallbacks */
1227 (void) _tnl_program_string(ctx, target, prog);
1228
1229 /* XXX check if program is legal, within limits */
1230 return GL_TRUE;
1231 }
1232
1233 static GLboolean
r200IsProgramNative(struct gl_context * ctx,GLenum target,struct gl_program * prog)1234 r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1235 {
1236 struct r200_vertex_program *vp = (void *)prog;
1237
1238 switch(target){
1239 case GL_VERTEX_PROGRAM_ARB:
1240 if (!vp->translated) {
1241 r200_translate_vertex_program(ctx, vp);
1242 }
1243 /* does not take parameters etc. into account */
1244 return vp->native;
1245 default:
1246 _mesa_problem(ctx, "Bad target in r200NewProgram");
1247 }
1248 return 0;
1249 }
1250
r200InitShaderFuncs(struct dd_function_table * functions)1251 void r200InitShaderFuncs(struct dd_function_table *functions)
1252 {
1253 functions->NewProgram = r200NewProgram;
1254 functions->DeleteProgram = r200DeleteProgram;
1255 functions->ProgramStringNotify = r200ProgramStringNotify;
1256 functions->IsProgramNative = r200IsProgramNative;
1257 }
1258