• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keith@tungstengraphics.com>
30   */
31 
32 
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39 
40 #include "program/prog_parameter.h"
41 #include "program/prog_print.h"
42 #include "program/prog_statevars.h"
43 
44 
45 /** An invalid texture target */
46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
47 
48 /** An invalid texture unit */
49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
50 
51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
52 
53 #define X    0
54 #define Y    1
55 #define Z    2
56 #define W    3
57 
58 
59 static const char *wm_opcode_strings[] = {
60    "PIXELXY",
61    "DELTAXY",
62    "PIXELW",
63    "LINTERP",
64    "PINTERP",
65    "CINTERP",
66    "WPOSXY",
67    "FB_WRITE",
68    "FRONTFACING",
69 };
70 
71 #if 0
72 static const char *wm_file_strings[] = {
73    "PAYLOAD"
74 };
75 #endif
76 
77 
78 /***********************************************************************
79  * Source regs
80  */
81 
src_reg(GLuint file,GLuint idx)82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
83 {
84    struct prog_src_register reg;
85    reg.File = file;
86    reg.Index = idx;
87    reg.Swizzle = SWIZZLE_NOOP;
88    reg.RelAddr = 0;
89    reg.Negate = NEGATE_NONE;
90    reg.Abs = 0;
91    reg.HasIndex2 = 0;
92    reg.RelAddr2 = 0;
93    reg.Index2 = 0;
94    return reg;
95 }
96 
src_reg_from_dst(struct prog_dst_register dst)97 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
98 {
99    return src_reg(dst.File, dst.Index);
100 }
101 
src_undef(void)102 static struct prog_src_register src_undef( void )
103 {
104    return src_reg(PROGRAM_UNDEFINED, 0);
105 }
106 
src_is_undef(struct prog_src_register src)107 static bool src_is_undef(struct prog_src_register src)
108 {
109    return src.File == PROGRAM_UNDEFINED;
110 }
111 
src_swizzle(struct prog_src_register reg,int x,int y,int z,int w)112 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
113 {
114    reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
115    return reg;
116 }
117 
src_swizzle1(struct prog_src_register reg,int x)118 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
119 {
120    return src_swizzle(reg, x, x, x, x);
121 }
122 
src_swizzle4(struct prog_src_register reg,uint swizzle)123 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
124 {
125    reg.Swizzle = swizzle;
126    return reg;
127 }
128 
129 
130 /***********************************************************************
131  * Dest regs
132  */
133 
dst_reg(GLuint file,GLuint idx)134 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
135 {
136    struct prog_dst_register reg;
137    reg.File = file;
138    reg.Index = idx;
139    reg.WriteMask = WRITEMASK_XYZW;
140    reg.RelAddr = 0;
141    reg.CondMask = COND_TR;
142    reg.CondSwizzle = 0;
143    reg.CondSrc = 0;
144    return reg;
145 }
146 
dst_mask(struct prog_dst_register reg,int mask)147 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
148 {
149    reg.WriteMask &= mask;
150    return reg;
151 }
152 
dst_undef(void)153 static struct prog_dst_register dst_undef( void )
154 {
155    return dst_reg(PROGRAM_UNDEFINED, 0);
156 }
157 
158 
159 
get_temp(struct brw_wm_compile * c)160 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
161 {
162    int bit = ffs( ~c->fp_temp );
163 
164    if (!bit) {
165       printf("%s: out of temporaries\n", __FILE__);
166       exit(1);
167    }
168 
169    c->fp_temp |= 1<<(bit-1);
170    return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
171 }
172 
173 
release_temp(struct brw_wm_compile * c,struct prog_dst_register temp)174 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
175 {
176    c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
177 }
178 
179 
180 /***********************************************************************
181  * Instructions
182  */
183 
get_fp_inst(struct brw_wm_compile * c)184 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
185 {
186    assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
187    memset(&c->prog_instructions[c->nr_fp_insns], 0,
188 	  sizeof(*c->prog_instructions));
189    return &c->prog_instructions[c->nr_fp_insns++];
190 }
191 
emit_insn(struct brw_wm_compile * c,const struct prog_instruction * inst0)192 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
193 					const struct prog_instruction *inst0)
194 {
195    struct prog_instruction *inst = get_fp_inst(c);
196    *inst = *inst0;
197    return inst;
198 }
199 
emit_tex_op(struct brw_wm_compile * c,GLuint op,struct prog_dst_register dest,GLuint saturate,GLuint tex_src_unit,GLuint tex_src_target,GLuint tex_shadow,struct prog_src_register src0,struct prog_src_register src1,struct prog_src_register src2)200 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
201 				       GLuint op,
202 				       struct prog_dst_register dest,
203 				       GLuint saturate,
204 				       GLuint tex_src_unit,
205 				       GLuint tex_src_target,
206 				       GLuint tex_shadow,
207 				       struct prog_src_register src0,
208 				       struct prog_src_register src1,
209 				       struct prog_src_register src2 )
210 {
211    struct prog_instruction *inst = get_fp_inst(c);
212 
213    assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
214           tex_src_unit == TEX_UNIT_NONE);
215    assert(tex_src_target < NUM_TEXTURE_TARGETS ||
216           tex_src_target == TEX_TARGET_NONE);
217 
218    memset(inst, 0, sizeof(*inst));
219 
220    inst->Opcode = op;
221    inst->DstReg = dest;
222    inst->SaturateMode = saturate;
223    inst->TexSrcUnit = tex_src_unit;
224    inst->TexSrcTarget = tex_src_target;
225    inst->TexShadow = tex_shadow;
226    inst->SrcReg[0] = src0;
227    inst->SrcReg[1] = src1;
228    inst->SrcReg[2] = src2;
229    return inst;
230 }
231 
232 
emit_op(struct brw_wm_compile * c,GLuint op,struct prog_dst_register dest,GLuint saturate,struct prog_src_register src0,struct prog_src_register src1,struct prog_src_register src2)233 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
234 				       GLuint op,
235 				       struct prog_dst_register dest,
236 				       GLuint saturate,
237 				       struct prog_src_register src0,
238 				       struct prog_src_register src1,
239 				       struct prog_src_register src2 )
240 {
241    return emit_tex_op(c, op, dest, saturate,
242                       TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
243                       src0, src1, src2);
244 }
245 
246 
247 /* Many Mesa opcodes produce the same value across all the result channels.
248  * We'd rather not have to support that splatting in the opcode implementations,
249  * and brw_wm_pass*.c wants to optimize them out by shuffling references around
250  * anyway.  We can easily get both by emitting the opcode to one channel, and
251  * then MOVing it to the others, which brw_wm_pass*.c already understands.
252  */
emit_scalar_insn(struct brw_wm_compile * c,const struct prog_instruction * inst0)253 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
254 						 const struct prog_instruction *inst0)
255 {
256    struct prog_instruction *inst;
257    unsigned int dst_chan;
258    unsigned int other_channel_mask;
259 
260    if (inst0->DstReg.WriteMask == 0)
261       return NULL;
262 
263    dst_chan = ffs(inst0->DstReg.WriteMask) - 1;
264    inst = get_fp_inst(c);
265    *inst = *inst0;
266    inst->DstReg.WriteMask = 1 << dst_chan;
267 
268    other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
269    if (other_channel_mask != 0) {
270       inst = emit_op(c,
271 		     OPCODE_MOV,
272 		     dst_mask(inst0->DstReg, other_channel_mask),
273 		     0,
274 		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
275 		     src_undef(),
276 		     src_undef());
277    }
278    return inst;
279 }
280 
281 
282 /***********************************************************************
283  * Special instructions for interpolation and other tasks
284  */
285 
get_pixel_xy(struct brw_wm_compile * c)286 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
287 {
288    if (src_is_undef(c->pixel_xy)) {
289       struct prog_dst_register pixel_xy = get_temp(c);
290       struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
291 
292 
293       /* Emit the out calculations, and hold onto the results.  Use
294        * two instructions as a temporary is required.
295        */
296       /* pixel_xy.xy = PIXELXY payload[0];
297        */
298       emit_op(c,
299 	      WM_PIXELXY,
300 	      dst_mask(pixel_xy, WRITEMASK_XY),
301 	      0,
302 	      payload_r0_depth,
303 	      src_undef(),
304 	      src_undef());
305 
306       c->pixel_xy = src_reg_from_dst(pixel_xy);
307    }
308 
309    return c->pixel_xy;
310 }
311 
get_delta_xy(struct brw_wm_compile * c)312 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
313 {
314    if (src_is_undef(c->delta_xy)) {
315       struct prog_dst_register delta_xy = get_temp(c);
316       struct prog_src_register pixel_xy = get_pixel_xy(c);
317       struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
318 
319       /* deltas.xy = DELTAXY pixel_xy, payload[0]
320        */
321       emit_op(c,
322 	      WM_DELTAXY,
323 	      dst_mask(delta_xy, WRITEMASK_XY),
324 	      0,
325 	      pixel_xy,
326 	      payload_r0_depth,
327 	      src_undef());
328 
329       c->delta_xy = src_reg_from_dst(delta_xy);
330    }
331 
332    return c->delta_xy;
333 }
334 
get_pixel_w(struct brw_wm_compile * c)335 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
336 {
337    /* This is called for producing 1/w in pre-gen6 interp.  for gen6,
338     * the interp opcodes don't use this argument.  But to keep the
339     * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
340     * into the slot.
341     */
342    if (c->func.brw->intel.gen >= 6)
343       return c->delta_xy;
344 
345    if (src_is_undef(c->pixel_w)) {
346       struct prog_dst_register pixel_w = get_temp(c);
347       struct prog_src_register deltas = get_delta_xy(c);
348       struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
349 
350       /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
351        */
352       emit_op(c,
353 	      WM_PIXELW,
354 	      dst_mask(pixel_w, WRITEMASK_W),
355 	      0,
356 	      interp_wpos,
357 	      deltas,
358 	      src_undef());
359 
360 
361       c->pixel_w = src_reg_from_dst(pixel_w);
362    }
363 
364    return c->pixel_w;
365 }
366 
emit_interp(struct brw_wm_compile * c,GLuint idx)367 static void emit_interp( struct brw_wm_compile *c,
368 			 GLuint idx )
369 {
370    struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
371    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
372    struct prog_src_register deltas;
373 
374    deltas = get_delta_xy(c);
375 
376    /* Need to use PINTERP on attributes which have been
377     * multiplied by 1/W in the SF program, and LINTERP on those
378     * which have not:
379     */
380    switch (idx) {
381    case FRAG_ATTRIB_WPOS:
382       /* Have to treat wpos.xy specially:
383        */
384       emit_op(c,
385 	      WM_WPOSXY,
386 	      dst_mask(dst, WRITEMASK_XY),
387 	      0,
388 	      get_pixel_xy(c),
389 	      src_undef(),
390 	      src_undef());
391 
392       dst = dst_mask(dst, WRITEMASK_ZW);
393 
394       /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
395        */
396       emit_op(c,
397 	      WM_LINTERP,
398 	      dst,
399 	      0,
400 	      interp,
401 	      deltas,
402 	      src_undef());
403       break;
404    case FRAG_ATTRIB_COL0:
405    case FRAG_ATTRIB_COL1:
406       if (c->key.flat_shade) {
407 	 emit_op(c,
408 		 WM_CINTERP,
409 		 dst,
410 		 0,
411 		 interp,
412 		 src_undef(),
413 		 src_undef());
414       }
415       else {
416 	 /* perspective-corrected color interpolation */
417 	 emit_op(c,
418 		 WM_PINTERP,
419 		 dst,
420 		 0,
421 		 interp,
422 		 deltas,
423 		 get_pixel_w(c));
424       }
425       break;
426    case FRAG_ATTRIB_FOGC:
427       /* Interpolate the fog coordinate */
428       emit_op(c,
429 	      WM_PINTERP,
430 	      dst_mask(dst, WRITEMASK_X),
431 	      0,
432 	      interp,
433 	      deltas,
434 	      get_pixel_w(c));
435 
436       emit_op(c,
437 	      OPCODE_MOV,
438 	      dst_mask(dst, WRITEMASK_YZW),
439 	      0,
440 	      src_swizzle(interp,
441 			  SWIZZLE_ZERO,
442 			  SWIZZLE_ZERO,
443 			  SWIZZLE_ZERO,
444 			  SWIZZLE_ONE),
445 	      src_undef(),
446 	      src_undef());
447       break;
448 
449    case FRAG_ATTRIB_FACE:
450       emit_op(c,
451               WM_FRONTFACING,
452               dst_mask(dst, WRITEMASK_X),
453               0,
454               src_undef(),
455               src_undef(),
456               src_undef());
457       break;
458 
459    case FRAG_ATTRIB_PNTC:
460       /* XXX review/test this case */
461       emit_op(c,
462 	      WM_PINTERP,
463 	      dst_mask(dst, WRITEMASK_XY),
464 	      0,
465 	      interp,
466 	      deltas,
467 	      get_pixel_w(c));
468 
469       emit_op(c,
470 	      OPCODE_MOV,
471 	      dst_mask(dst, WRITEMASK_ZW),
472 	      0,
473 	      src_swizzle(interp,
474 			  SWIZZLE_ZERO,
475 			  SWIZZLE_ZERO,
476 			  SWIZZLE_ZERO,
477 			  SWIZZLE_ONE),
478 	      src_undef(),
479 	      src_undef());
480       break;
481 
482    default:
483       emit_op(c,
484 	      WM_PINTERP,
485 	      dst,
486 	      0,
487 	      interp,
488 	      deltas,
489 	      get_pixel_w(c));
490       break;
491    }
492 
493    c->fp_interp_emitted |= 1<<idx;
494 }
495 
496 /***********************************************************************
497  * Hacks to extend the program parameter and constant lists.
498  */
499 
500 /* Add the fog parameters to the parameter list of the original
501  * program, rather than creating a new list.  Doesn't really do any
502  * harm and it's not as if the parameter handling isn't a big hack
503  * anyway.
504  */
search_or_add_param5(struct brw_wm_compile * c,GLint s0,GLint s1,GLint s2,GLint s3,GLint s4)505 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
506                                                      GLint s0,
507                                                      GLint s1,
508                                                      GLint s2,
509                                                      GLint s3,
510                                                      GLint s4)
511 {
512    struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
513    gl_state_index tokens[STATE_LENGTH];
514    GLuint idx;
515    tokens[0] = s0;
516    tokens[1] = s1;
517    tokens[2] = s2;
518    tokens[3] = s3;
519    tokens[4] = s4;
520 
521    idx = _mesa_add_state_reference( paramList, tokens );
522 
523    return src_reg(PROGRAM_STATE_VAR, idx);
524 }
525 
526 
search_or_add_const4f(struct brw_wm_compile * c,GLfloat s0,GLfloat s1,GLfloat s2,GLfloat s3)527 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
528 						     GLfloat s0,
529 						     GLfloat s1,
530 						     GLfloat s2,
531 						     GLfloat s3)
532 {
533    struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
534    gl_constant_value values[4];
535    GLuint idx;
536    GLuint swizzle;
537    struct prog_src_register reg;
538 
539    values[0].f = s0;
540    values[1].f = s1;
541    values[2].f = s2;
542    values[3].f = s3;
543 
544    idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
545    reg = src_reg(PROGRAM_STATE_VAR, idx);
546    reg.Swizzle = swizzle;
547 
548    return reg;
549 }
550 
551 
552 
553 /***********************************************************************
554  * Expand various instructions here to simpler forms.
555  */
precalc_dst(struct brw_wm_compile * c,const struct prog_instruction * inst)556 static void precalc_dst( struct brw_wm_compile *c,
557 			       const struct prog_instruction *inst )
558 {
559    struct prog_src_register src0 = inst->SrcReg[0];
560    struct prog_src_register src1 = inst->SrcReg[1];
561    struct prog_dst_register dst = inst->DstReg;
562    struct prog_dst_register temp = get_temp(c);
563 
564    if (dst.WriteMask & WRITEMASK_Y) {
565       /* dst.y = mul src0.y, src1.y
566        */
567       emit_op(c,
568 	      OPCODE_MUL,
569 	      dst_mask(temp, WRITEMASK_Y),
570 	      inst->SaturateMode,
571 	      src0,
572 	      src1,
573 	      src_undef());
574    }
575 
576    if (dst.WriteMask & WRITEMASK_XZ) {
577       struct prog_instruction *swz;
578       GLuint z = GET_SWZ(src0.Swizzle, Z);
579 
580       /* dst.xz = swz src0.1zzz
581        */
582       swz = emit_op(c,
583 		    OPCODE_SWZ,
584 		    dst_mask(temp, WRITEMASK_XZ),
585 		    inst->SaturateMode,
586 		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
587 		    src_undef(),
588 		    src_undef());
589       /* Avoid letting negation flag of src0 affect our 1 constant. */
590       swz->SrcReg[0].Negate &= ~NEGATE_X;
591    }
592    if (dst.WriteMask & WRITEMASK_W) {
593       /* dst.w = mov src1.w
594        */
595       emit_op(c,
596 	      OPCODE_MOV,
597 	      dst_mask(temp, WRITEMASK_W),
598 	      inst->SaturateMode,
599 	      src1,
600 	      src_undef(),
601 	      src_undef());
602    }
603 
604    /* This will get optimized out in general, but it ensures that we
605     * don't overwrite src operands in our channel-wise splitting
606     * above.  See piglit fp-dst-aliasing-[12].
607     */
608    emit_op(c,
609 	   OPCODE_MOV,
610 	   dst,
611 	   0,
612 	   src_reg_from_dst(temp),
613 	   src_undef(),
614 	   src_undef());
615 
616    release_temp(c, temp);
617 }
618 
619 
precalc_lit(struct brw_wm_compile * c,const struct prog_instruction * inst)620 static void precalc_lit( struct brw_wm_compile *c,
621 			 const struct prog_instruction *inst )
622 {
623    struct prog_src_register src0 = inst->SrcReg[0];
624    struct prog_dst_register dst = inst->DstReg;
625 
626    if (dst.WriteMask & WRITEMASK_YZ) {
627       emit_op(c,
628 	      OPCODE_LIT,
629 	      dst_mask(dst, WRITEMASK_YZ),
630 	      inst->SaturateMode,
631 	      src0,
632 	      src_undef(),
633 	      src_undef());
634    }
635 
636    if (dst.WriteMask & WRITEMASK_XW) {
637       struct prog_instruction *swz;
638 
639       /* dst.xw = swz src0.1111
640        */
641       swz = emit_op(c,
642 		    OPCODE_SWZ,
643 		    dst_mask(dst, WRITEMASK_XW),
644 		    0,
645 		    src_swizzle1(src0, SWIZZLE_ONE),
646 		    src_undef(),
647 		    src_undef());
648       /* Avoid letting the negation flag of src0 affect our 1 constant. */
649       swz->SrcReg[0].Negate = NEGATE_NONE;
650    }
651 }
652 
653 
654 /**
655  * Some TEX instructions require extra code, cube map coordinate
656  * normalization, or coordinate scaling for RECT textures, etc.
657  * This function emits those extra instructions and the TEX
658  * instruction itself.
659  */
precalc_tex(struct brw_wm_compile * c,const struct prog_instruction * inst)660 static void precalc_tex( struct brw_wm_compile *c,
661 			 const struct prog_instruction *inst )
662 {
663    struct brw_compile *p = &c->func;
664    struct intel_context *intel = &p->brw->intel;
665    struct prog_src_register coord;
666    struct prog_dst_register tmpcoord = { 0 };
667    const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
668    struct prog_dst_register unswizzled_tmp;
669 
670    /* If we are doing EXT_texture_swizzle, we need to write our result into a
671     * temporary, otherwise writemasking of the real dst could lose some of our
672     * channels.
673     */
674    if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) {
675       unswizzled_tmp = get_temp(c);
676    } else {
677       unswizzled_tmp = inst->DstReg;
678    }
679 
680    assert(unit < BRW_MAX_TEX_UNIT);
681 
682    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
683        struct prog_instruction *out;
684        struct prog_dst_register tmp0 = get_temp(c);
685        struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
686        struct prog_dst_register tmp1 = get_temp(c);
687        struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
688        struct prog_src_register src0 = inst->SrcReg[0];
689 
690        /* find longest component of coord vector and normalize it */
691        tmpcoord = get_temp(c);
692        coord = src_reg_from_dst(tmpcoord);
693 
694        /* tmpcoord = src0 (i.e.: coord = src0) */
695        out = emit_op(c, OPCODE_MOV,
696                      tmpcoord,
697                      0,
698                      src0,
699                      src_undef(),
700                      src_undef());
701        out->SrcReg[0].Negate = NEGATE_NONE;
702        out->SrcReg[0].Abs = 1;
703 
704        /* tmp0 = MAX(coord.X, coord.Y) */
705        emit_op(c, OPCODE_MAX,
706                tmp0,
707                0,
708                src_swizzle1(coord, X),
709                src_swizzle1(coord, Y),
710                src_undef());
711 
712        /* tmp1 = MAX(tmp0, coord.Z) */
713        emit_op(c, OPCODE_MAX,
714                tmp1,
715                0,
716                tmp0src,
717                src_swizzle1(coord, Z),
718                src_undef());
719 
720        /* tmp0 = 1 / tmp1 */
721        emit_op(c, OPCODE_RCP,
722                dst_mask(tmp0, WRITEMASK_X),
723                0,
724                tmp1src,
725                src_undef(),
726                src_undef());
727 
728        /* tmpCoord = src0 * tmp0 */
729        emit_op(c, OPCODE_MUL,
730                tmpcoord,
731                0,
732                src0,
733                src_swizzle1(tmp0src, SWIZZLE_X),
734                src_undef());
735 
736        release_temp(c, tmp0);
737        release_temp(c, tmp1);
738    }
739    else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
740       struct prog_src_register scale =
741 	 search_or_add_param5( c,
742 			       STATE_INTERNAL,
743 			       STATE_TEXRECT_SCALE,
744 			       unit,
745 			       0,0 );
746 
747       tmpcoord = get_temp(c);
748 
749       /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
750        */
751       emit_op(c,
752 	      OPCODE_MUL,
753 	      tmpcoord,
754 	      0,
755 	      inst->SrcReg[0],
756 	      src_swizzle(scale,
757 			  SWIZZLE_X,
758 			  SWIZZLE_Y,
759 			  SWIZZLE_ONE,
760 			  SWIZZLE_ONE),
761 	      src_undef());
762 
763       coord = src_reg_from_dst(tmpcoord);
764    }
765    else {
766       coord = inst->SrcReg[0];
767    }
768 
769    /* Need to emit YUV texture conversions by hand.  Probably need to
770     * do this here - the alternative is in brw_wm_emit.c, but the
771     * conversion requires allocating a temporary variable which we
772     * don't have the facility to do that late in the compilation.
773     */
774    if (c->key.tex.yuvtex_mask & (1 << unit)) {
775       /* convert ycbcr to RGBA */
776       bool swap_uv = c->key.tex.yuvtex_swap_mask & (1 << unit);
777 
778       /*
779 	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
780 	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
781 	 UYV     = TEX ...
782 	 UYV.xyz = ADD UYV,     C0
783 	 UYV.y   = MUL UYV.y,   C0.w
784  	 if (UV swaped)
785 	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
786 	 else
787 	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
788 	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
789       */
790       struct prog_dst_register tmp = get_temp(c);
791       struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
792       struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
793       struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
794 
795       /* tmp     = TEX ...
796        */
797       emit_tex_op(c,
798                   OPCODE_TEX,
799                   tmp,
800                   inst->SaturateMode,
801                   unit,
802                   inst->TexSrcTarget,
803                   inst->TexShadow,
804                   coord,
805                   src_undef(),
806                   src_undef());
807 
808       /* tmp.xyz =  ADD TMP, C0
809        */
810       emit_op(c,
811 	      OPCODE_ADD,
812 	      dst_mask(tmp, WRITEMASK_XYZ),
813 	      0,
814 	      tmpsrc,
815 	      C0,
816 	      src_undef());
817 
818       /* YUV.y   = MUL YUV.y, C0.w
819        */
820 
821       emit_op(c,
822 	      OPCODE_MUL,
823 	      dst_mask(tmp, WRITEMASK_Y),
824 	      0,
825 	      tmpsrc,
826 	      src_swizzle1(C0, W),
827 	      src_undef());
828 
829       /*
830        * if (UV swaped)
831        *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
832        * else
833        *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
834        */
835 
836       emit_op(c,
837 	      OPCODE_MAD,
838 	      dst_mask(unswizzled_tmp, WRITEMASK_XYZ),
839 	      0,
840 	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
841 	      C1,
842 	      src_swizzle1(tmpsrc, Y));
843 
844       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
845        */
846       emit_op(c,
847 	      OPCODE_MAD,
848 	      dst_mask(unswizzled_tmp, WRITEMASK_Y),
849 	      0,
850 	      src_swizzle1(tmpsrc, Z),
851 	      src_swizzle1(C1, W),
852 	      src_swizzle1(src_reg_from_dst(unswizzled_tmp), Y));
853 
854       release_temp(c, tmp);
855    }
856    else {
857       /* ordinary RGBA tex instruction */
858       emit_tex_op(c,
859                   OPCODE_TEX,
860                   unswizzled_tmp,
861                   inst->SaturateMode,
862                   unit,
863                   inst->TexSrcTarget,
864                   inst->TexShadow,
865                   coord,
866                   src_undef(),
867                   src_undef());
868    }
869 
870    /* For GL_EXT_texture_swizzle: */
871    if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) {
872       /* swizzle the result of the TEX instruction */
873       struct prog_src_register tmpsrc = src_reg_from_dst(unswizzled_tmp);
874       emit_op(c, OPCODE_SWZ,
875               inst->DstReg,
876               SATURATE_OFF, /* saturate already done above */
877               src_swizzle4(tmpsrc, c->key.tex.swizzles[unit]),
878               src_undef(),
879               src_undef());
880    }
881 
882    if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
883        (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
884       release_temp(c, tmpcoord);
885 }
886 
887 
888 /**
889  * Check if the given TXP instruction really needs the divide-by-W step.
890  */
891 static bool
projtex(struct brw_wm_compile * c,const struct prog_instruction * inst)892 projtex(struct brw_wm_compile *c, const struct prog_instruction *inst)
893 {
894    const struct prog_src_register src = inst->SrcReg[0];
895    bool retVal;
896 
897    assert(inst->Opcode == OPCODE_TXP);
898 
899    /* Only try to detect the simplest cases.  Could detect (later)
900     * cases where we are trying to emit code like RCP {1.0}, MUL x,
901     * {1.0}, and so on.
902     *
903     * More complex cases than this typically only arise from
904     * user-provided fragment programs anyway:
905     */
906    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
907       retVal = false;  /* ut2004 gun rendering !?! */
908    else if (src.File == PROGRAM_INPUT &&
909 	    GET_SWZ(src.Swizzle, W) == W &&
910             (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
911       retVal = false;
912    else
913       retVal = true;
914 
915    return retVal;
916 }
917 
918 
919 /**
920  * Emit code for TXP.
921  */
precalc_txp(struct brw_wm_compile * c,const struct prog_instruction * inst)922 static void precalc_txp( struct brw_wm_compile *c,
923 			       const struct prog_instruction *inst )
924 {
925    struct prog_src_register src0 = inst->SrcReg[0];
926 
927    if (projtex(c, inst)) {
928       struct prog_dst_register tmp = get_temp(c);
929       struct prog_instruction tmp_inst;
930 
931       /* tmp0.w = RCP inst.arg[0][3]
932        */
933       emit_op(c,
934 	      OPCODE_RCP,
935 	      dst_mask(tmp, WRITEMASK_W),
936 	      0,
937 	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
938 	      src_undef(),
939 	      src_undef());
940 
941       /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
942        */
943       emit_op(c,
944 	      OPCODE_MUL,
945 	      dst_mask(tmp, WRITEMASK_XYZ),
946 	      0,
947 	      src0,
948 	      src_swizzle1(src_reg_from_dst(tmp), W),
949 	      src_undef());
950 
951       /* dst = precalc(TEX tmp0)
952        */
953       tmp_inst = *inst;
954       tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
955       precalc_tex(c, &tmp_inst);
956 
957       release_temp(c, tmp);
958    }
959    else
960    {
961       /* dst = precalc(TEX src0)
962        */
963       precalc_tex(c, inst);
964    }
965 }
966 
967 
968 
emit_render_target_writes(struct brw_wm_compile * c)969 static void emit_render_target_writes( struct brw_wm_compile *c )
970 {
971    struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
972    struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
973    struct prog_src_register outcolor;
974    GLuint i;
975 
976    struct prog_instruction *inst = NULL;
977 
978    /* The inst->Aux field is used for FB write target and the EOT marker */
979 
980    for (i = 0; i < c->key.nr_color_regions; i++) {
981       if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
982 	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
983       } else {
984 	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
985       }
986       inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
987 		     0, outcolor, payload_r0_depth, outdepth);
988       inst->Aux = INST_AUX_TARGET(i);
989    }
990 
991    /* Mark the last FB write as final, or emit a dummy write if we had
992     * no render targets bound.
993     */
994    if (c->key.nr_color_regions != 0) {
995       inst->Aux |= INST_AUX_EOT;
996    } else {
997       inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
998 		     0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
999 		     payload_r0_depth, outdepth);
1000       inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
1001    }
1002 }
1003 
1004 
1005 
1006 
1007 /***********************************************************************
1008  * Emit INTERP instructions ahead of first use of each attrib.
1009  */
1010 
validate_src_regs(struct brw_wm_compile * c,const struct prog_instruction * inst)1011 static void validate_src_regs( struct brw_wm_compile *c,
1012 			       const struct prog_instruction *inst )
1013 {
1014    GLuint nr_args = brw_wm_nr_args( inst->Opcode );
1015    GLuint i;
1016 
1017    for (i = 0; i < nr_args; i++) {
1018       if (inst->SrcReg[i].File == PROGRAM_INPUT) {
1019 	 GLuint idx = inst->SrcReg[i].Index;
1020 	 if (!(c->fp_interp_emitted & (1<<idx))) {
1021 	    emit_interp(c, idx);
1022 	 }
1023       }
1024    }
1025 }
1026 
print_insns(const struct prog_instruction * insn,GLuint nr)1027 static void print_insns( const struct prog_instruction *insn,
1028 			 GLuint nr )
1029 {
1030    GLuint i;
1031    for (i = 0; i < nr; i++, insn++) {
1032       printf("%3d: ", i);
1033       if (insn->Opcode < MAX_OPCODE)
1034 	 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
1035       else if (insn->Opcode < MAX_WM_OPCODE) {
1036 	 GLuint idx = insn->Opcode - MAX_OPCODE;
1037 
1038 	 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
1039 				      3, PROG_PRINT_DEBUG, NULL);
1040       }
1041       else
1042 	 printf("965 Opcode %d\n", insn->Opcode);
1043    }
1044 }
1045 
1046 
1047 /**
1048  * Initial pass for fragment program code generation.
1049  * This function is used by both the GLSL and non-GLSL paths.
1050  */
brw_wm_pass_fp(struct brw_wm_compile * c)1051 void brw_wm_pass_fp( struct brw_wm_compile *c )
1052 {
1053    struct intel_context *intel = &c->func.brw->intel;
1054    struct brw_fragment_program *fp = c->fp;
1055    GLuint insn;
1056 
1057    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1058       printf("pre-fp:\n");
1059       _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
1060 			       true);
1061       printf("\n");
1062    }
1063 
1064    c->pixel_xy = src_undef();
1065    if (intel->gen >= 6) {
1066       /* The interpolation deltas come in as the perspective pixel
1067        * location barycentric params.
1068        */
1069       c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
1070    } else {
1071       c->delta_xy = src_undef();
1072    }
1073    c->pixel_w = src_undef();
1074    c->nr_fp_insns = 0;
1075 
1076    /* Emit preamble instructions.  This is where special instructions such as
1077     * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1078     * compute shader inputs from varying vars.
1079     */
1080    for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1081       const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1082       validate_src_regs(c, inst);
1083    }
1084 
1085    /* Loop over all instructions doing assorted simplifications and
1086     * transformations.
1087     */
1088    for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
1089       const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
1090       struct prog_instruction *out;
1091 
1092       /* Check for INPUT values, emit INTERP instructions where
1093        * necessary:
1094        */
1095 
1096       switch (inst->Opcode) {
1097       case OPCODE_SWZ:
1098 	 out = emit_insn(c, inst);
1099 	 out->Opcode = OPCODE_MOV;
1100 	 break;
1101 
1102       case OPCODE_ABS:
1103 	 out = emit_insn(c, inst);
1104 	 out->Opcode = OPCODE_MOV;
1105 	 out->SrcReg[0].Negate = NEGATE_NONE;
1106 	 out->SrcReg[0].Abs = 1;
1107 	 break;
1108 
1109       case OPCODE_SUB:
1110 	 out = emit_insn(c, inst);
1111 	 out->Opcode = OPCODE_ADD;
1112 	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
1113 	 break;
1114 
1115       case OPCODE_SCS:
1116 	 out = emit_insn(c, inst);
1117 	 /* This should probably be done in the parser.
1118 	  */
1119 	 out->DstReg.WriteMask &= WRITEMASK_XY;
1120 	 break;
1121 
1122       case OPCODE_DST:
1123 	 precalc_dst(c, inst);
1124 	 break;
1125 
1126       case OPCODE_LIT:
1127 	 precalc_lit(c, inst);
1128 	 break;
1129 
1130       case OPCODE_RSQ:
1131 	 out = emit_scalar_insn(c, inst);
1132 	 out->SrcReg[0].Abs = true;
1133 	 break;
1134 
1135       case OPCODE_TEX:
1136 	 precalc_tex(c, inst);
1137 	 break;
1138 
1139       case OPCODE_TXP:
1140 	 precalc_txp(c, inst);
1141 	 break;
1142 
1143       case OPCODE_TXB:
1144 	 out = emit_insn(c, inst);
1145 	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1146          assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
1147 	 break;
1148 
1149       case OPCODE_XPD:
1150 	 out = emit_insn(c, inst);
1151 	 /* This should probably be done in the parser.
1152 	  */
1153 	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1154 	 break;
1155 
1156       case OPCODE_KIL:
1157 	 out = emit_insn(c, inst);
1158 	 /* This should probably be done in the parser.
1159 	  */
1160 	 out->DstReg.WriteMask = 0;
1161 	 break;
1162       case OPCODE_END:
1163 	 emit_render_target_writes(c);
1164 	 break;
1165       case OPCODE_PRINT:
1166 	 break;
1167       default:
1168 	 if (brw_wm_is_scalar_result(inst->Opcode))
1169 	    emit_scalar_insn(c, inst);
1170 	 else
1171 	    emit_insn(c, inst);
1172 	 break;
1173       }
1174    }
1175 
1176    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1177       printf("pass_fp:\n");
1178       print_insns( c->prog_instructions, c->nr_fp_insns );
1179       printf("\n");
1180    }
1181 }
1182 
1183