• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2006 - 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_compiler.h"
25 #include "brw_eu.h"
26 #include "brw_prim.h"
27 
28 #include "dev/intel_debug.h"
29 
30 struct brw_sf_compile {
31    struct brw_codegen func;
32    struct brw_sf_prog_key key;
33    struct brw_sf_prog_data prog_data;
34 
35    struct brw_reg pv;
36    struct brw_reg det;
37    struct brw_reg dx0;
38    struct brw_reg dx2;
39    struct brw_reg dy0;
40    struct brw_reg dy2;
41 
42    /* z and 1/w passed in separately:
43     */
44    struct brw_reg z[3];
45    struct brw_reg inv_w[3];
46 
47    /* The vertices:
48     */
49    struct brw_reg vert[3];
50 
51     /* Temporaries, allocated after last vertex reg.
52     */
53    struct brw_reg inv_det;
54    struct brw_reg a1_sub_a0;
55    struct brw_reg a2_sub_a0;
56    struct brw_reg tmp;
57 
58    struct brw_reg m1Cx;
59    struct brw_reg m2Cy;
60    struct brw_reg m3C0;
61 
62    GLuint nr_verts;
63    GLuint nr_attr_regs;
64    GLuint nr_setup_regs;
65    int urb_entry_read_offset;
66 
67    /** The last known value of the f0.0 flag register. */
68    unsigned flag_value;
69 
70    struct brw_vue_map vue_map;
71 };
72 
73 /**
74  * Determine the vue slot corresponding to the given half of the given register.
75  */
vert_reg_to_vue_slot(struct brw_sf_compile * c,GLuint reg,int half)76 static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
77                                        int half)
78 {
79    return (reg + c->urb_entry_read_offset) * 2 + half;
80 }
81 
82 /**
83  * Determine the varying corresponding to the given half of the given
84  * register.  half=0 means the first half of a register, half=1 means the
85  * second half.
86  */
vert_reg_to_varying(struct brw_sf_compile * c,GLuint reg,int half)87 static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
88                                       int half)
89 {
90    int vue_slot = vert_reg_to_vue_slot(c, reg, half);
91    return c->vue_map.slot_to_varying[vue_slot];
92 }
93 
94 /**
95  * Determine the register corresponding to the given vue slot
96  */
get_vue_slot(struct brw_sf_compile * c,struct brw_reg vert,int vue_slot)97 static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
98                                    struct brw_reg vert,
99                                    int vue_slot)
100 {
101    GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
102    GLuint sub = vue_slot % 2;
103 
104    return brw_vec4_grf(vert.nr + off, sub * 4);
105 }
106 
107 /**
108  * Determine the register corresponding to the given varying.
109  */
get_varying(struct brw_sf_compile * c,struct brw_reg vert,GLuint varying)110 static struct brw_reg get_varying(struct brw_sf_compile *c,
111                                   struct brw_reg vert,
112                                   GLuint varying)
113 {
114    int vue_slot = c->vue_map.varying_to_slot[varying];
115    assert (vue_slot >= c->urb_entry_read_offset);
116    return get_vue_slot(c, vert, vue_slot);
117 }
118 
119 static bool
have_attr(struct brw_sf_compile * c,GLuint attr)120 have_attr(struct brw_sf_compile *c, GLuint attr)
121 {
122    return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
123 }
124 
125 /***********************************************************************
126  * Twoside lighting
127  */
copy_bfc(struct brw_sf_compile * c,struct brw_reg vert)128 static void copy_bfc( struct brw_sf_compile *c,
129 		      struct brw_reg vert )
130 {
131    struct brw_codegen *p = &c->func;
132    GLuint i;
133 
134    for (i = 0; i < 2; i++) {
135       if (have_attr(c, VARYING_SLOT_COL0+i) &&
136 	  have_attr(c, VARYING_SLOT_BFC0+i))
137 	 brw_MOV(p,
138 		 get_varying(c, vert, VARYING_SLOT_COL0+i),
139 		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
140    }
141 }
142 
143 
do_twoside_color(struct brw_sf_compile * c)144 static void do_twoside_color( struct brw_sf_compile *c )
145 {
146    struct brw_codegen *p = &c->func;
147    GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
148 
149    /* Already done in clip program:
150     */
151    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
152       return;
153 
154    /* If the vertex shader provides backface color, do the selection. The VS
155     * promises to set up the front color if the backface color is provided, but
156     * it may contain junk if never written to.
157     */
158    if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
159        !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
160       return;
161 
162    /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
163     * to get all channels active inside the IF.  In the clipping code
164     * we run with NoMask, so it's not an option and we can use
165     * BRW_EXECUTE_1 for all comparisons.
166     */
167    brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
168    brw_IF(p, BRW_EXECUTE_4);
169    {
170       switch (c->nr_verts) {
171       case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
172       case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
173       case 1: copy_bfc(c, c->vert[0]);
174       }
175    }
176    brw_ENDIF(p);
177 }
178 
179 
180 
181 /***********************************************************************
182  * Flat shading
183  */
184 
copy_flatshaded_attributes(struct brw_sf_compile * c,struct brw_reg dst,struct brw_reg src)185 static void copy_flatshaded_attributes(struct brw_sf_compile *c,
186                                        struct brw_reg dst,
187                                        struct brw_reg src)
188 {
189    struct brw_codegen *p = &c->func;
190    int i;
191 
192    for (i = 0; i < c->vue_map.num_slots; i++) {
193       if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
194          brw_MOV(p,
195                  get_vue_slot(c, dst, i),
196                  get_vue_slot(c, src, i));
197       }
198    }
199 }
200 
count_flatshaded_attributes(struct brw_sf_compile * c)201 static int count_flatshaded_attributes(struct brw_sf_compile *c)
202 {
203    int i;
204    int count = 0;
205 
206    for (i = 0; i < c->vue_map.num_slots; i++)
207       if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
208          count++;
209 
210    return count;
211 }
212 
213 
214 
215 /* Need to use a computed jump to copy flatshaded attributes as the
216  * vertices are ordered according to y-coordinate before reaching this
217  * point, so the PV could be anywhere.
218  */
do_flatshade_triangle(struct brw_sf_compile * c)219 static void do_flatshade_triangle( struct brw_sf_compile *c )
220 {
221    struct brw_codegen *p = &c->func;
222    GLuint nr;
223    GLuint jmpi = 1;
224 
225    /* Already done in clip program:
226     */
227    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
228       return;
229 
230    if (p->devinfo->ver == 5)
231        jmpi = 2;
232 
233    nr = count_flatshaded_attributes(c);
234 
235    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
236    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
237 
238    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
239    copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
240    brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
241 
242    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
243    copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
244    brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
245 
246    copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
247    copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
248 }
249 
250 
do_flatshade_line(struct brw_sf_compile * c)251 static void do_flatshade_line( struct brw_sf_compile *c )
252 {
253    struct brw_codegen *p = &c->func;
254    GLuint nr;
255    GLuint jmpi = 1;
256 
257    /* Already done in clip program:
258     */
259    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
260       return;
261 
262    if (p->devinfo->ver == 5)
263        jmpi = 2;
264 
265    nr = count_flatshaded_attributes(c);
266 
267    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
268    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
269    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
270 
271    brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
272    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
273 }
274 
275 
276 /***********************************************************************
277  * Triangle setup.
278  */
279 
280 
alloc_regs(struct brw_sf_compile * c)281 static void alloc_regs( struct brw_sf_compile *c )
282 {
283    GLuint reg, i;
284 
285    /* Values computed by fixed function unit:
286     */
287    c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
288    c->det = brw_vec1_grf(1, 2);
289    c->dx0 = brw_vec1_grf(1, 3);
290    c->dx2 = brw_vec1_grf(1, 4);
291    c->dy0 = brw_vec1_grf(1, 5);
292    c->dy2 = brw_vec1_grf(1, 6);
293 
294    /* z and 1/w passed in separately:
295     */
296    c->z[0]     = brw_vec1_grf(2, 0);
297    c->inv_w[0] = brw_vec1_grf(2, 1);
298    c->z[1]     = brw_vec1_grf(2, 2);
299    c->inv_w[1] = brw_vec1_grf(2, 3);
300    c->z[2]     = brw_vec1_grf(2, 4);
301    c->inv_w[2] = brw_vec1_grf(2, 5);
302 
303    /* The vertices:
304     */
305    reg = 3;
306    for (i = 0; i < c->nr_verts; i++) {
307       c->vert[i] = brw_vec8_grf(reg, 0);
308       reg += c->nr_attr_regs;
309    }
310 
311    /* Temporaries, allocated after last vertex reg.
312     */
313    c->inv_det = brw_vec1_grf(reg, 0);  reg++;
314    c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
315    c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
316    c->tmp = brw_vec8_grf(reg, 0);  reg++;
317 
318    /* Note grf allocation:
319     */
320    c->prog_data.total_grf = reg;
321 
322 
323    /* Outputs of this program - interpolation coefficients for
324     * rasterization:
325     */
326    c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
327    c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
328    c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
329 }
330 
331 
copy_z_inv_w(struct brw_sf_compile * c)332 static void copy_z_inv_w( struct brw_sf_compile *c )
333 {
334    struct brw_codegen *p = &c->func;
335    GLuint i;
336 
337    /* Copy both scalars with a single MOV:
338     */
339    for (i = 0; i < c->nr_verts; i++)
340       brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
341 }
342 
343 
invert_det(struct brw_sf_compile * c)344 static void invert_det( struct brw_sf_compile *c)
345 {
346    /* Looks like we invert all 8 elements just to get 1/det in
347     * position 2 !?!
348     */
349    gfx4_math(&c->func,
350 	     c->inv_det,
351 	     BRW_MATH_FUNCTION_INV,
352 	     0,
353 	     c->det,
354 	     BRW_MATH_PRECISION_FULL);
355 
356 }
357 
358 
359 static bool
calculate_masks(struct brw_sf_compile * c,GLuint reg,GLushort * pc,GLushort * pc_persp,GLushort * pc_linear)360 calculate_masks(struct brw_sf_compile *c,
361                 GLuint reg,
362                 GLushort *pc,
363                 GLushort *pc_persp,
364                 GLushort *pc_linear)
365 {
366    bool is_last_attr = (reg == c->nr_setup_regs - 1);
367    enum glsl_interp_mode interp;
368 
369    *pc_persp = 0;
370    *pc_linear = 0;
371    *pc = 0xf;
372 
373    interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
374    if (interp == INTERP_MODE_SMOOTH) {
375       *pc_linear = 0xf;
376       *pc_persp = 0xf;
377    } else if (interp == INTERP_MODE_NOPERSPECTIVE)
378       *pc_linear = 0xf;
379 
380    /* Maybe only process one attribute on the final round:
381     */
382    if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
383       *pc |= 0xf0;
384 
385       interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
386       if (interp == INTERP_MODE_SMOOTH) {
387          *pc_linear |= 0xf0;
388          *pc_persp |= 0xf0;
389       } else if (interp == INTERP_MODE_NOPERSPECTIVE)
390          *pc_linear |= 0xf0;
391    }
392 
393    return is_last_attr;
394 }
395 
396 /* Calculates the predicate control for which channels of a reg
397  * (containing 2 attrs) to do point sprite coordinate replacement on.
398  */
399 static uint16_t
calculate_point_sprite_mask(struct brw_sf_compile * c,GLuint reg)400 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
401 {
402    int varying1, varying2;
403    uint16_t pc = 0;
404 
405    varying1 = vert_reg_to_varying(c, reg, 0);
406    if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
407       if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
408 	 pc |= 0x0f;
409    }
410    if (varying1 == BRW_VARYING_SLOT_PNTC)
411       pc |= 0x0f;
412 
413    varying2 = vert_reg_to_varying(c, reg, 1);
414    if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
415       if (c->key.point_sprite_coord_replace & (1 << (varying2 -
416                                                      VARYING_SLOT_TEX0)))
417          pc |= 0xf0;
418    }
419    if (varying2 == BRW_VARYING_SLOT_PNTC)
420       pc |= 0xf0;
421 
422    return pc;
423 }
424 
425 static void
set_predicate_control_flag_value(struct brw_codegen * p,struct brw_sf_compile * c,unsigned value)426 set_predicate_control_flag_value(struct brw_codegen *p,
427                                  struct brw_sf_compile *c,
428                                  unsigned value)
429 {
430    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
431 
432    if (value != 0xff) {
433       if (value != c->flag_value) {
434          brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
435          c->flag_value = value;
436       }
437 
438       brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
439    }
440 }
441 
brw_emit_tri_setup(struct brw_sf_compile * c,bool allocate)442 static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
443 {
444    struct brw_codegen *p = &c->func;
445    GLuint i;
446 
447    c->flag_value = 0xff;
448    c->nr_verts = 3;
449 
450    if (allocate)
451       alloc_regs(c);
452 
453    invert_det(c);
454    copy_z_inv_w(c);
455 
456    if (c->key.do_twoside_color)
457       do_twoside_color(c);
458 
459    if (c->key.contains_flat_varying)
460       do_flatshade_triangle(c);
461 
462 
463    for (i = 0; i < c->nr_setup_regs; i++)
464    {
465       /* Pair of incoming attributes:
466        */
467       struct brw_reg a0 = offset(c->vert[0], i);
468       struct brw_reg a1 = offset(c->vert[1], i);
469       struct brw_reg a2 = offset(c->vert[2], i);
470       GLushort pc, pc_persp, pc_linear;
471       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
472 
473       if (pc_persp)
474       {
475 	 set_predicate_control_flag_value(p, c, pc_persp);
476 	 brw_MUL(p, a0, a0, c->inv_w[0]);
477 	 brw_MUL(p, a1, a1, c->inv_w[1]);
478 	 brw_MUL(p, a2, a2, c->inv_w[2]);
479       }
480 
481 
482       /* Calculate coefficients for interpolated values:
483        */
484       if (pc_linear)
485       {
486 	 set_predicate_control_flag_value(p, c, pc_linear);
487 
488 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
489 	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
490 
491 	 /* calculate dA/dx
492 	  */
493 	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
494 	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
495 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
496 
497 	 /* calculate dA/dy
498 	  */
499 	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
500 	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
501 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
502       }
503 
504       {
505 	 set_predicate_control_flag_value(p, c, pc);
506 	 /* start point for interpolation
507 	  */
508 	 brw_MOV(p, c->m3C0, a0);
509 
510 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
511 	  * the send instruction:
512 	  */
513 	 brw_urb_WRITE(p,
514 		       brw_null_reg(),
515 		       0,
516 		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
517                        last ? BRW_URB_WRITE_EOT_COMPLETE
518                        : BRW_URB_WRITE_NO_FLAGS,
519 		       4, 	/* msg len */
520 		       0,	/* response len */
521 		       i*4,	/* offset */
522 		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
523       }
524    }
525 
526    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
527 }
528 
529 
530 
brw_emit_line_setup(struct brw_sf_compile * c,bool allocate)531 static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
532 {
533    struct brw_codegen *p = &c->func;
534    GLuint i;
535 
536    c->flag_value = 0xff;
537    c->nr_verts = 2;
538 
539    if (allocate)
540       alloc_regs(c);
541 
542    invert_det(c);
543    copy_z_inv_w(c);
544 
545    if (c->key.contains_flat_varying)
546       do_flatshade_line(c);
547 
548    for (i = 0; i < c->nr_setup_regs; i++)
549    {
550       /* Pair of incoming attributes:
551        */
552       struct brw_reg a0 = offset(c->vert[0], i);
553       struct brw_reg a1 = offset(c->vert[1], i);
554       GLushort pc, pc_persp, pc_linear;
555       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
556 
557       if (pc_persp)
558       {
559 	 set_predicate_control_flag_value(p, c, pc_persp);
560 	 brw_MUL(p, a0, a0, c->inv_w[0]);
561 	 brw_MUL(p, a1, a1, c->inv_w[1]);
562       }
563 
564       /* Calculate coefficients for position, color:
565        */
566       if (pc_linear) {
567 	 set_predicate_control_flag_value(p, c, pc_linear);
568 
569 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
570 
571 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
572 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
573 
574 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
575 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
576       }
577 
578       {
579 	 set_predicate_control_flag_value(p, c, pc);
580 
581 	 /* start point for interpolation
582 	  */
583 	 brw_MOV(p, c->m3C0, a0);
584 
585 	 /* Copy m0..m3 to URB.
586 	  */
587 	 brw_urb_WRITE(p,
588 		       brw_null_reg(),
589 		       0,
590 		       brw_vec8_grf(0, 0),
591                        last ? BRW_URB_WRITE_EOT_COMPLETE
592                        : BRW_URB_WRITE_NO_FLAGS,
593 		       4, 	/* msg len */
594 		       0,	/* response len */
595 		       i*4,	/* urb destination offset */
596 		       BRW_URB_SWIZZLE_TRANSPOSE);
597       }
598    }
599 
600    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
601 }
602 
brw_emit_point_sprite_setup(struct brw_sf_compile * c,bool allocate)603 static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
604 {
605    struct brw_codegen *p = &c->func;
606    GLuint i;
607 
608    c->flag_value = 0xff;
609    c->nr_verts = 1;
610 
611    if (allocate)
612       alloc_regs(c);
613 
614    copy_z_inv_w(c);
615    for (i = 0; i < c->nr_setup_regs; i++)
616    {
617       struct brw_reg a0 = offset(c->vert[0], i);
618       GLushort pc, pc_persp, pc_linear, pc_coord_replace;
619       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
620 
621       pc_coord_replace = calculate_point_sprite_mask(c, i);
622       pc_persp &= ~pc_coord_replace;
623 
624       if (pc_persp) {
625 	 set_predicate_control_flag_value(p, c, pc_persp);
626 	 brw_MUL(p, a0, a0, c->inv_w[0]);
627       }
628 
629       /* Point sprite coordinate replacement: A texcoord with this
630        * enabled gets replaced with the value (x, y, 0, 1) where x and
631        * y vary from 0 to 1 across the horizontal and vertical of the
632        * point.
633        */
634       if (pc_coord_replace) {
635 	 set_predicate_control_flag_value(p, c, pc_coord_replace);
636 	 /* Calculate 1.0/PointWidth */
637 	 gfx4_math(&c->func,
638 		   c->tmp,
639 		   BRW_MATH_FUNCTION_INV,
640 		   0,
641 		   c->dx0,
642 		   BRW_MATH_PRECISION_FULL);
643 
644 	 brw_set_default_access_mode(p, BRW_ALIGN_16);
645 
646 	 /* dA/dx, dA/dy */
647 	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
648 	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
649 	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
650 	 if (c->key.sprite_origin_lower_left) {
651 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
652 	 } else {
653 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
654 	 }
655 
656 	 /* attribute constant offset */
657 	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
658 	 if (c->key.sprite_origin_lower_left) {
659 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
660 	 } else {
661 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
662 	 }
663 
664 	 brw_set_default_access_mode(p, BRW_ALIGN_1);
665       }
666 
667       if (pc & ~pc_coord_replace) {
668 	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
669 	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
670 	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
671 	 brw_MOV(p, c->m3C0, a0); /* constant value */
672       }
673 
674 
675       set_predicate_control_flag_value(p, c, pc);
676       /* Copy m0..m3 to URB. */
677       brw_urb_WRITE(p,
678 		    brw_null_reg(),
679 		    0,
680 		    brw_vec8_grf(0, 0),
681                     last ? BRW_URB_WRITE_EOT_COMPLETE
682                     : BRW_URB_WRITE_NO_FLAGS,
683 		    4, 	/* msg len */
684 		    0,	/* response len */
685 		    i*4,	/* urb destination offset */
686 		    BRW_URB_SWIZZLE_TRANSPOSE);
687    }
688 
689    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
690 }
691 
692 /* Points setup - several simplifications as all attributes are
693  * constant across the face of the point (point sprites excluded!)
694  */
brw_emit_point_setup(struct brw_sf_compile * c,bool allocate)695 static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
696 {
697    struct brw_codegen *p = &c->func;
698    GLuint i;
699 
700    c->flag_value = 0xff;
701    c->nr_verts = 1;
702 
703    if (allocate)
704       alloc_regs(c);
705 
706    copy_z_inv_w(c);
707 
708    brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
709    brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
710 
711    for (i = 0; i < c->nr_setup_regs; i++)
712    {
713       struct brw_reg a0 = offset(c->vert[0], i);
714       GLushort pc, pc_persp, pc_linear;
715       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
716 
717       if (pc_persp)
718       {
719 	 /* This seems odd as the values are all constant, but the
720 	  * fragment shader will be expecting it:
721 	  */
722 	 set_predicate_control_flag_value(p, c, pc_persp);
723 	 brw_MUL(p, a0, a0, c->inv_w[0]);
724       }
725 
726 
727       /* The delta values are always zero, just send the starting
728        * coordinate.  Again, this is to fit in with the interpolation
729        * code in the fragment shader.
730        */
731       {
732 	 set_predicate_control_flag_value(p, c, pc);
733 
734 	 brw_MOV(p, c->m3C0, a0); /* constant value */
735 
736 	 /* Copy m0..m3 to URB.
737 	  */
738 	 brw_urb_WRITE(p,
739 		       brw_null_reg(),
740 		       0,
741 		       brw_vec8_grf(0, 0),
742                        last ? BRW_URB_WRITE_EOT_COMPLETE
743                        : BRW_URB_WRITE_NO_FLAGS,
744 		       4, 	/* msg len */
745 		       0,	/* response len */
746 		       i*4,	/* urb destination offset */
747 		       BRW_URB_SWIZZLE_TRANSPOSE);
748       }
749    }
750 
751    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
752 }
753 
brw_emit_anyprim_setup(struct brw_sf_compile * c)754 static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
755 {
756    struct brw_codegen *p = &c->func;
757    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
758    struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
759    struct brw_reg primmask;
760    int jmp;
761    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
762 
763    c->nr_verts = 3;
764    alloc_regs(c);
765 
766    primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
767 
768    brw_MOV(p, primmask, brw_imm_ud(1));
769    brw_SHL(p, primmask, primmask, payload_prim);
770 
771    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
772 					       (1<<_3DPRIM_TRISTRIP) |
773 					       (1<<_3DPRIM_TRIFAN) |
774 					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
775 					       (1<<_3DPRIM_POLYGON) |
776 					       (1<<_3DPRIM_RECTLIST) |
777 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
778    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
779    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
780    brw_emit_tri_setup(c, false);
781    brw_land_fwd_jump(p, jmp);
782 
783    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
784 					       (1<<_3DPRIM_LINESTRIP) |
785 					       (1<<_3DPRIM_LINELOOP) |
786 					       (1<<_3DPRIM_LINESTRIP_CONT) |
787 					       (1<<_3DPRIM_LINESTRIP_BF) |
788 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
789    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
790    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
791    brw_emit_line_setup(c, false);
792    brw_land_fwd_jump(p, jmp);
793 
794    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
795    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
796    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
797    brw_emit_point_sprite_setup(c, false);
798    brw_land_fwd_jump(p, jmp);
799 
800    brw_emit_point_setup( c, false );
801 }
802 
803 const unsigned *
brw_compile_sf(const struct brw_compiler * compiler,void * mem_ctx,const struct brw_sf_prog_key * key,struct brw_sf_prog_data * prog_data,struct brw_vue_map * vue_map,unsigned * final_assembly_size)804 brw_compile_sf(const struct brw_compiler *compiler,
805                void *mem_ctx,
806                const struct brw_sf_prog_key *key,
807                struct brw_sf_prog_data *prog_data,
808                struct brw_vue_map *vue_map,
809                unsigned *final_assembly_size)
810 {
811    struct brw_sf_compile c;
812    memset(&c, 0, sizeof(c));
813 
814    /* Begin the compilation:
815     */
816    brw_init_codegen(&compiler->isa, &c.func, mem_ctx);
817 
818    c.key = *key;
819    c.vue_map = *vue_map;
820    if (c.key.do_point_coord) {
821       /*
822        * gl_PointCoord is a FS instead of VS builtin variable, thus it's
823        * not included in c.vue_map generated in VS stage. Here we add
824        * it manually to let SF shader generate the needed interpolation
825        * coefficient for FS shader.
826        */
827       c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
828       c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
829    }
830    c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
831    c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
832    c.nr_setup_regs = c.nr_attr_regs;
833 
834    c.prog_data.urb_read_length = c.nr_attr_regs;
835    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
836 
837    /* Which primitive?  Or all three?
838     */
839    switch (key->primitive) {
840    case BRW_SF_PRIM_TRIANGLES:
841       c.nr_verts = 3;
842       brw_emit_tri_setup( &c, true );
843       break;
844    case BRW_SF_PRIM_LINES:
845       c.nr_verts = 2;
846       brw_emit_line_setup( &c, true );
847       break;
848    case BRW_SF_PRIM_POINTS:
849       c.nr_verts = 1;
850       if (key->do_point_sprite)
851 	  brw_emit_point_sprite_setup( &c, true );
852       else
853 	  brw_emit_point_setup( &c, true );
854       break;
855    case BRW_SF_PRIM_UNFILLED_TRIS:
856       c.nr_verts = 3;
857       brw_emit_anyprim_setup( &c );
858       break;
859    default:
860       unreachable("not reached");
861    }
862 
863    /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
864     * source). Compacting would be difficult.
865     */
866    /* brw_compact_instructions(&c.func, 0, 0, NULL); */
867 
868    *prog_data = c.prog_data;
869 
870    const unsigned *program = brw_get_program(&c.func, final_assembly_size);
871 
872    if (INTEL_DEBUG(DEBUG_SF)) {
873       fprintf(stderr, "sf:\n");
874       brw_disassemble_with_labels(&compiler->isa,
875                                   program, 0, *final_assembly_size, stderr);
876       fprintf(stderr, "\n");
877    }
878 
879    return program;
880 }
881