1 /*
2 * Copyright © 2006 - 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_compiler.h"
25 #include "brw_eu.h"
26 #include "brw_prim.h"
27
28 #include "dev/intel_debug.h"
29
30 struct brw_sf_compile {
31 struct brw_codegen func;
32 struct brw_sf_prog_key key;
33 struct brw_sf_prog_data prog_data;
34
35 struct brw_reg pv;
36 struct brw_reg det;
37 struct brw_reg dx0;
38 struct brw_reg dx2;
39 struct brw_reg dy0;
40 struct brw_reg dy2;
41
42 /* z and 1/w passed in separately:
43 */
44 struct brw_reg z[3];
45 struct brw_reg inv_w[3];
46
47 /* The vertices:
48 */
49 struct brw_reg vert[3];
50
51 /* Temporaries, allocated after last vertex reg.
52 */
53 struct brw_reg inv_det;
54 struct brw_reg a1_sub_a0;
55 struct brw_reg a2_sub_a0;
56 struct brw_reg tmp;
57
58 struct brw_reg m1Cx;
59 struct brw_reg m2Cy;
60 struct brw_reg m3C0;
61
62 GLuint nr_verts;
63 GLuint nr_attr_regs;
64 GLuint nr_setup_regs;
65 int urb_entry_read_offset;
66
67 /** The last known value of the f0.0 flag register. */
68 unsigned flag_value;
69
70 struct brw_vue_map vue_map;
71 };
72
73 /**
74 * Determine the vue slot corresponding to the given half of the given register.
75 */
vert_reg_to_vue_slot(struct brw_sf_compile * c,GLuint reg,int half)76 static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
77 int half)
78 {
79 return (reg + c->urb_entry_read_offset) * 2 + half;
80 }
81
82 /**
83 * Determine the varying corresponding to the given half of the given
84 * register. half=0 means the first half of a register, half=1 means the
85 * second half.
86 */
vert_reg_to_varying(struct brw_sf_compile * c,GLuint reg,int half)87 static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
88 int half)
89 {
90 int vue_slot = vert_reg_to_vue_slot(c, reg, half);
91 return c->vue_map.slot_to_varying[vue_slot];
92 }
93
94 /**
95 * Determine the register corresponding to the given vue slot
96 */
get_vue_slot(struct brw_sf_compile * c,struct brw_reg vert,int vue_slot)97 static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
98 struct brw_reg vert,
99 int vue_slot)
100 {
101 GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
102 GLuint sub = vue_slot % 2;
103
104 return brw_vec4_grf(vert.nr + off, sub * 4);
105 }
106
107 /**
108 * Determine the register corresponding to the given varying.
109 */
get_varying(struct brw_sf_compile * c,struct brw_reg vert,GLuint varying)110 static struct brw_reg get_varying(struct brw_sf_compile *c,
111 struct brw_reg vert,
112 GLuint varying)
113 {
114 int vue_slot = c->vue_map.varying_to_slot[varying];
115 assert (vue_slot >= c->urb_entry_read_offset);
116 return get_vue_slot(c, vert, vue_slot);
117 }
118
119 static bool
have_attr(struct brw_sf_compile * c,GLuint attr)120 have_attr(struct brw_sf_compile *c, GLuint attr)
121 {
122 return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
123 }
124
125 /***********************************************************************
126 * Twoside lighting
127 */
copy_bfc(struct brw_sf_compile * c,struct brw_reg vert)128 static void copy_bfc( struct brw_sf_compile *c,
129 struct brw_reg vert )
130 {
131 struct brw_codegen *p = &c->func;
132 GLuint i;
133
134 for (i = 0; i < 2; i++) {
135 if (have_attr(c, VARYING_SLOT_COL0+i) &&
136 have_attr(c, VARYING_SLOT_BFC0+i))
137 brw_MOV(p,
138 get_varying(c, vert, VARYING_SLOT_COL0+i),
139 get_varying(c, vert, VARYING_SLOT_BFC0+i));
140 }
141 }
142
143
do_twoside_color(struct brw_sf_compile * c)144 static void do_twoside_color( struct brw_sf_compile *c )
145 {
146 struct brw_codegen *p = &c->func;
147 GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
148
149 /* Already done in clip program:
150 */
151 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
152 return;
153
154 /* If the vertex shader provides backface color, do the selection. The VS
155 * promises to set up the front color if the backface color is provided, but
156 * it may contain junk if never written to.
157 */
158 if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
159 !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
160 return;
161
162 /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
163 * to get all channels active inside the IF. In the clipping code
164 * we run with NoMask, so it's not an option and we can use
165 * BRW_EXECUTE_1 for all comparisons.
166 */
167 brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
168 brw_IF(p, BRW_EXECUTE_4);
169 {
170 switch (c->nr_verts) {
171 case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
172 case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
173 case 1: copy_bfc(c, c->vert[0]);
174 }
175 }
176 brw_ENDIF(p);
177 }
178
179
180
181 /***********************************************************************
182 * Flat shading
183 */
184
copy_flatshaded_attributes(struct brw_sf_compile * c,struct brw_reg dst,struct brw_reg src)185 static void copy_flatshaded_attributes(struct brw_sf_compile *c,
186 struct brw_reg dst,
187 struct brw_reg src)
188 {
189 struct brw_codegen *p = &c->func;
190 int i;
191
192 for (i = 0; i < c->vue_map.num_slots; i++) {
193 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
194 brw_MOV(p,
195 get_vue_slot(c, dst, i),
196 get_vue_slot(c, src, i));
197 }
198 }
199 }
200
count_flatshaded_attributes(struct brw_sf_compile * c)201 static int count_flatshaded_attributes(struct brw_sf_compile *c)
202 {
203 int i;
204 int count = 0;
205
206 for (i = 0; i < c->vue_map.num_slots; i++)
207 if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
208 count++;
209
210 return count;
211 }
212
213
214
215 /* Need to use a computed jump to copy flatshaded attributes as the
216 * vertices are ordered according to y-coordinate before reaching this
217 * point, so the PV could be anywhere.
218 */
do_flatshade_triangle(struct brw_sf_compile * c)219 static void do_flatshade_triangle( struct brw_sf_compile *c )
220 {
221 struct brw_codegen *p = &c->func;
222 GLuint nr;
223 GLuint jmpi = 1;
224
225 /* Already done in clip program:
226 */
227 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
228 return;
229
230 if (p->devinfo->ver == 5)
231 jmpi = 2;
232
233 nr = count_flatshaded_attributes(c);
234
235 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
236 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
237
238 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
239 copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
240 brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
241
242 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
243 copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
244 brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
245
246 copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
247 copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
248 }
249
250
do_flatshade_line(struct brw_sf_compile * c)251 static void do_flatshade_line( struct brw_sf_compile *c )
252 {
253 struct brw_codegen *p = &c->func;
254 GLuint nr;
255 GLuint jmpi = 1;
256
257 /* Already done in clip program:
258 */
259 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
260 return;
261
262 if (p->devinfo->ver == 5)
263 jmpi = 2;
264
265 nr = count_flatshaded_attributes(c);
266
267 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
268 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
269 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
270
271 brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
272 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
273 }
274
275
276 /***********************************************************************
277 * Triangle setup.
278 */
279
280
alloc_regs(struct brw_sf_compile * c)281 static void alloc_regs( struct brw_sf_compile *c )
282 {
283 GLuint reg, i;
284
285 /* Values computed by fixed function unit:
286 */
287 c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
288 c->det = brw_vec1_grf(1, 2);
289 c->dx0 = brw_vec1_grf(1, 3);
290 c->dx2 = brw_vec1_grf(1, 4);
291 c->dy0 = brw_vec1_grf(1, 5);
292 c->dy2 = brw_vec1_grf(1, 6);
293
294 /* z and 1/w passed in separately:
295 */
296 c->z[0] = brw_vec1_grf(2, 0);
297 c->inv_w[0] = brw_vec1_grf(2, 1);
298 c->z[1] = brw_vec1_grf(2, 2);
299 c->inv_w[1] = brw_vec1_grf(2, 3);
300 c->z[2] = brw_vec1_grf(2, 4);
301 c->inv_w[2] = brw_vec1_grf(2, 5);
302
303 /* The vertices:
304 */
305 reg = 3;
306 for (i = 0; i < c->nr_verts; i++) {
307 c->vert[i] = brw_vec8_grf(reg, 0);
308 reg += c->nr_attr_regs;
309 }
310
311 /* Temporaries, allocated after last vertex reg.
312 */
313 c->inv_det = brw_vec1_grf(reg, 0); reg++;
314 c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
315 c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
316 c->tmp = brw_vec8_grf(reg, 0); reg++;
317
318 /* Note grf allocation:
319 */
320 c->prog_data.total_grf = reg;
321
322
323 /* Outputs of this program - interpolation coefficients for
324 * rasterization:
325 */
326 c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
327 c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
328 c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
329 }
330
331
copy_z_inv_w(struct brw_sf_compile * c)332 static void copy_z_inv_w( struct brw_sf_compile *c )
333 {
334 struct brw_codegen *p = &c->func;
335 GLuint i;
336
337 /* Copy both scalars with a single MOV:
338 */
339 for (i = 0; i < c->nr_verts; i++)
340 brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
341 }
342
343
invert_det(struct brw_sf_compile * c)344 static void invert_det( struct brw_sf_compile *c)
345 {
346 /* Looks like we invert all 8 elements just to get 1/det in
347 * position 2 !?!
348 */
349 gfx4_math(&c->func,
350 c->inv_det,
351 BRW_MATH_FUNCTION_INV,
352 0,
353 c->det,
354 BRW_MATH_PRECISION_FULL);
355
356 }
357
358
359 static bool
calculate_masks(struct brw_sf_compile * c,GLuint reg,GLushort * pc,GLushort * pc_persp,GLushort * pc_linear)360 calculate_masks(struct brw_sf_compile *c,
361 GLuint reg,
362 GLushort *pc,
363 GLushort *pc_persp,
364 GLushort *pc_linear)
365 {
366 bool is_last_attr = (reg == c->nr_setup_regs - 1);
367 enum glsl_interp_mode interp;
368
369 *pc_persp = 0;
370 *pc_linear = 0;
371 *pc = 0xf;
372
373 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
374 if (interp == INTERP_MODE_SMOOTH) {
375 *pc_linear = 0xf;
376 *pc_persp = 0xf;
377 } else if (interp == INTERP_MODE_NOPERSPECTIVE)
378 *pc_linear = 0xf;
379
380 /* Maybe only process one attribute on the final round:
381 */
382 if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
383 *pc |= 0xf0;
384
385 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
386 if (interp == INTERP_MODE_SMOOTH) {
387 *pc_linear |= 0xf0;
388 *pc_persp |= 0xf0;
389 } else if (interp == INTERP_MODE_NOPERSPECTIVE)
390 *pc_linear |= 0xf0;
391 }
392
393 return is_last_attr;
394 }
395
396 /* Calculates the predicate control for which channels of a reg
397 * (containing 2 attrs) to do point sprite coordinate replacement on.
398 */
399 static uint16_t
calculate_point_sprite_mask(struct brw_sf_compile * c,GLuint reg)400 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
401 {
402 int varying1, varying2;
403 uint16_t pc = 0;
404
405 varying1 = vert_reg_to_varying(c, reg, 0);
406 if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
407 if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
408 pc |= 0x0f;
409 }
410 if (varying1 == BRW_VARYING_SLOT_PNTC)
411 pc |= 0x0f;
412
413 varying2 = vert_reg_to_varying(c, reg, 1);
414 if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
415 if (c->key.point_sprite_coord_replace & (1 << (varying2 -
416 VARYING_SLOT_TEX0)))
417 pc |= 0xf0;
418 }
419 if (varying2 == BRW_VARYING_SLOT_PNTC)
420 pc |= 0xf0;
421
422 return pc;
423 }
424
425 static void
set_predicate_control_flag_value(struct brw_codegen * p,struct brw_sf_compile * c,unsigned value)426 set_predicate_control_flag_value(struct brw_codegen *p,
427 struct brw_sf_compile *c,
428 unsigned value)
429 {
430 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
431
432 if (value != 0xff) {
433 if (value != c->flag_value) {
434 brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
435 c->flag_value = value;
436 }
437
438 brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
439 }
440 }
441
brw_emit_tri_setup(struct brw_sf_compile * c,bool allocate)442 static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
443 {
444 struct brw_codegen *p = &c->func;
445 GLuint i;
446
447 c->flag_value = 0xff;
448 c->nr_verts = 3;
449
450 if (allocate)
451 alloc_regs(c);
452
453 invert_det(c);
454 copy_z_inv_w(c);
455
456 if (c->key.do_twoside_color)
457 do_twoside_color(c);
458
459 if (c->key.contains_flat_varying)
460 do_flatshade_triangle(c);
461
462
463 for (i = 0; i < c->nr_setup_regs; i++)
464 {
465 /* Pair of incoming attributes:
466 */
467 struct brw_reg a0 = offset(c->vert[0], i);
468 struct brw_reg a1 = offset(c->vert[1], i);
469 struct brw_reg a2 = offset(c->vert[2], i);
470 GLushort pc, pc_persp, pc_linear;
471 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
472
473 if (pc_persp)
474 {
475 set_predicate_control_flag_value(p, c, pc_persp);
476 brw_MUL(p, a0, a0, c->inv_w[0]);
477 brw_MUL(p, a1, a1, c->inv_w[1]);
478 brw_MUL(p, a2, a2, c->inv_w[2]);
479 }
480
481
482 /* Calculate coefficients for interpolated values:
483 */
484 if (pc_linear)
485 {
486 set_predicate_control_flag_value(p, c, pc_linear);
487
488 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
489 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
490
491 /* calculate dA/dx
492 */
493 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
494 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
495 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
496
497 /* calculate dA/dy
498 */
499 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
500 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
501 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
502 }
503
504 {
505 set_predicate_control_flag_value(p, c, pc);
506 /* start point for interpolation
507 */
508 brw_MOV(p, c->m3C0, a0);
509
510 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
511 * the send instruction:
512 */
513 brw_urb_WRITE(p,
514 brw_null_reg(),
515 0,
516 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
517 last ? BRW_URB_WRITE_EOT_COMPLETE
518 : BRW_URB_WRITE_NO_FLAGS,
519 4, /* msg len */
520 0, /* response len */
521 i*4, /* offset */
522 BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
523 }
524 }
525
526 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
527 }
528
529
530
brw_emit_line_setup(struct brw_sf_compile * c,bool allocate)531 static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
532 {
533 struct brw_codegen *p = &c->func;
534 GLuint i;
535
536 c->flag_value = 0xff;
537 c->nr_verts = 2;
538
539 if (allocate)
540 alloc_regs(c);
541
542 invert_det(c);
543 copy_z_inv_w(c);
544
545 if (c->key.contains_flat_varying)
546 do_flatshade_line(c);
547
548 for (i = 0; i < c->nr_setup_regs; i++)
549 {
550 /* Pair of incoming attributes:
551 */
552 struct brw_reg a0 = offset(c->vert[0], i);
553 struct brw_reg a1 = offset(c->vert[1], i);
554 GLushort pc, pc_persp, pc_linear;
555 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
556
557 if (pc_persp)
558 {
559 set_predicate_control_flag_value(p, c, pc_persp);
560 brw_MUL(p, a0, a0, c->inv_w[0]);
561 brw_MUL(p, a1, a1, c->inv_w[1]);
562 }
563
564 /* Calculate coefficients for position, color:
565 */
566 if (pc_linear) {
567 set_predicate_control_flag_value(p, c, pc_linear);
568
569 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
570
571 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
572 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
573
574 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
575 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
576 }
577
578 {
579 set_predicate_control_flag_value(p, c, pc);
580
581 /* start point for interpolation
582 */
583 brw_MOV(p, c->m3C0, a0);
584
585 /* Copy m0..m3 to URB.
586 */
587 brw_urb_WRITE(p,
588 brw_null_reg(),
589 0,
590 brw_vec8_grf(0, 0),
591 last ? BRW_URB_WRITE_EOT_COMPLETE
592 : BRW_URB_WRITE_NO_FLAGS,
593 4, /* msg len */
594 0, /* response len */
595 i*4, /* urb destination offset */
596 BRW_URB_SWIZZLE_TRANSPOSE);
597 }
598 }
599
600 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
601 }
602
brw_emit_point_sprite_setup(struct brw_sf_compile * c,bool allocate)603 static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
604 {
605 struct brw_codegen *p = &c->func;
606 GLuint i;
607
608 c->flag_value = 0xff;
609 c->nr_verts = 1;
610
611 if (allocate)
612 alloc_regs(c);
613
614 copy_z_inv_w(c);
615 for (i = 0; i < c->nr_setup_regs; i++)
616 {
617 struct brw_reg a0 = offset(c->vert[0], i);
618 GLushort pc, pc_persp, pc_linear, pc_coord_replace;
619 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
620
621 pc_coord_replace = calculate_point_sprite_mask(c, i);
622 pc_persp &= ~pc_coord_replace;
623
624 if (pc_persp) {
625 set_predicate_control_flag_value(p, c, pc_persp);
626 brw_MUL(p, a0, a0, c->inv_w[0]);
627 }
628
629 /* Point sprite coordinate replacement: A texcoord with this
630 * enabled gets replaced with the value (x, y, 0, 1) where x and
631 * y vary from 0 to 1 across the horizontal and vertical of the
632 * point.
633 */
634 if (pc_coord_replace) {
635 set_predicate_control_flag_value(p, c, pc_coord_replace);
636 /* Calculate 1.0/PointWidth */
637 gfx4_math(&c->func,
638 c->tmp,
639 BRW_MATH_FUNCTION_INV,
640 0,
641 c->dx0,
642 BRW_MATH_PRECISION_FULL);
643
644 brw_set_default_access_mode(p, BRW_ALIGN_16);
645
646 /* dA/dx, dA/dy */
647 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
648 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
649 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
650 if (c->key.sprite_origin_lower_left) {
651 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
652 } else {
653 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
654 }
655
656 /* attribute constant offset */
657 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
658 if (c->key.sprite_origin_lower_left) {
659 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
660 } else {
661 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
662 }
663
664 brw_set_default_access_mode(p, BRW_ALIGN_1);
665 }
666
667 if (pc & ~pc_coord_replace) {
668 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
669 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
670 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
671 brw_MOV(p, c->m3C0, a0); /* constant value */
672 }
673
674
675 set_predicate_control_flag_value(p, c, pc);
676 /* Copy m0..m3 to URB. */
677 brw_urb_WRITE(p,
678 brw_null_reg(),
679 0,
680 brw_vec8_grf(0, 0),
681 last ? BRW_URB_WRITE_EOT_COMPLETE
682 : BRW_URB_WRITE_NO_FLAGS,
683 4, /* msg len */
684 0, /* response len */
685 i*4, /* urb destination offset */
686 BRW_URB_SWIZZLE_TRANSPOSE);
687 }
688
689 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
690 }
691
692 /* Points setup - several simplifications as all attributes are
693 * constant across the face of the point (point sprites excluded!)
694 */
brw_emit_point_setup(struct brw_sf_compile * c,bool allocate)695 static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
696 {
697 struct brw_codegen *p = &c->func;
698 GLuint i;
699
700 c->flag_value = 0xff;
701 c->nr_verts = 1;
702
703 if (allocate)
704 alloc_regs(c);
705
706 copy_z_inv_w(c);
707
708 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
709 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
710
711 for (i = 0; i < c->nr_setup_regs; i++)
712 {
713 struct brw_reg a0 = offset(c->vert[0], i);
714 GLushort pc, pc_persp, pc_linear;
715 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
716
717 if (pc_persp)
718 {
719 /* This seems odd as the values are all constant, but the
720 * fragment shader will be expecting it:
721 */
722 set_predicate_control_flag_value(p, c, pc_persp);
723 brw_MUL(p, a0, a0, c->inv_w[0]);
724 }
725
726
727 /* The delta values are always zero, just send the starting
728 * coordinate. Again, this is to fit in with the interpolation
729 * code in the fragment shader.
730 */
731 {
732 set_predicate_control_flag_value(p, c, pc);
733
734 brw_MOV(p, c->m3C0, a0); /* constant value */
735
736 /* Copy m0..m3 to URB.
737 */
738 brw_urb_WRITE(p,
739 brw_null_reg(),
740 0,
741 brw_vec8_grf(0, 0),
742 last ? BRW_URB_WRITE_EOT_COMPLETE
743 : BRW_URB_WRITE_NO_FLAGS,
744 4, /* msg len */
745 0, /* response len */
746 i*4, /* urb destination offset */
747 BRW_URB_SWIZZLE_TRANSPOSE);
748 }
749 }
750
751 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
752 }
753
brw_emit_anyprim_setup(struct brw_sf_compile * c)754 static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
755 {
756 struct brw_codegen *p = &c->func;
757 struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
758 struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
759 struct brw_reg primmask;
760 int jmp;
761 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
762
763 c->nr_verts = 3;
764 alloc_regs(c);
765
766 primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
767
768 brw_MOV(p, primmask, brw_imm_ud(1));
769 brw_SHL(p, primmask, primmask, payload_prim);
770
771 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
772 (1<<_3DPRIM_TRISTRIP) |
773 (1<<_3DPRIM_TRIFAN) |
774 (1<<_3DPRIM_TRISTRIP_REVERSE) |
775 (1<<_3DPRIM_POLYGON) |
776 (1<<_3DPRIM_RECTLIST) |
777 (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
778 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
779 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
780 brw_emit_tri_setup(c, false);
781 brw_land_fwd_jump(p, jmp);
782
783 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
784 (1<<_3DPRIM_LINESTRIP) |
785 (1<<_3DPRIM_LINELOOP) |
786 (1<<_3DPRIM_LINESTRIP_CONT) |
787 (1<<_3DPRIM_LINESTRIP_BF) |
788 (1<<_3DPRIM_LINESTRIP_CONT_BF)));
789 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
790 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
791 brw_emit_line_setup(c, false);
792 brw_land_fwd_jump(p, jmp);
793
794 brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
795 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
796 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
797 brw_emit_point_sprite_setup(c, false);
798 brw_land_fwd_jump(p, jmp);
799
800 brw_emit_point_setup( c, false );
801 }
802
803 const unsigned *
brw_compile_sf(const struct brw_compiler * compiler,void * mem_ctx,const struct brw_sf_prog_key * key,struct brw_sf_prog_data * prog_data,struct brw_vue_map * vue_map,unsigned * final_assembly_size)804 brw_compile_sf(const struct brw_compiler *compiler,
805 void *mem_ctx,
806 const struct brw_sf_prog_key *key,
807 struct brw_sf_prog_data *prog_data,
808 struct brw_vue_map *vue_map,
809 unsigned *final_assembly_size)
810 {
811 struct brw_sf_compile c;
812 memset(&c, 0, sizeof(c));
813
814 /* Begin the compilation:
815 */
816 brw_init_codegen(&compiler->isa, &c.func, mem_ctx);
817
818 c.key = *key;
819 c.vue_map = *vue_map;
820 if (c.key.do_point_coord) {
821 /*
822 * gl_PointCoord is a FS instead of VS builtin variable, thus it's
823 * not included in c.vue_map generated in VS stage. Here we add
824 * it manually to let SF shader generate the needed interpolation
825 * coefficient for FS shader.
826 */
827 c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
828 c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
829 }
830 c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
831 c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
832 c.nr_setup_regs = c.nr_attr_regs;
833
834 c.prog_data.urb_read_length = c.nr_attr_regs;
835 c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
836
837 /* Which primitive? Or all three?
838 */
839 switch (key->primitive) {
840 case BRW_SF_PRIM_TRIANGLES:
841 c.nr_verts = 3;
842 brw_emit_tri_setup( &c, true );
843 break;
844 case BRW_SF_PRIM_LINES:
845 c.nr_verts = 2;
846 brw_emit_line_setup( &c, true );
847 break;
848 case BRW_SF_PRIM_POINTS:
849 c.nr_verts = 1;
850 if (key->do_point_sprite)
851 brw_emit_point_sprite_setup( &c, true );
852 else
853 brw_emit_point_setup( &c, true );
854 break;
855 case BRW_SF_PRIM_UNFILLED_TRIS:
856 c.nr_verts = 3;
857 brw_emit_anyprim_setup( &c );
858 break;
859 default:
860 unreachable("not reached");
861 }
862
863 /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
864 * source). Compacting would be difficult.
865 */
866 /* brw_compact_instructions(&c.func, 0, 0, NULL); */
867
868 *prog_data = c.prog_data;
869
870 const unsigned *program = brw_get_program(&c.func, final_assembly_size);
871
872 if (INTEL_DEBUG(DEBUG_SF)) {
873 fprintf(stderr, "sf:\n");
874 brw_disassemble_with_labels(&compiler->isa,
875 program, 0, *final_assembly_size, stderr);
876 fprintf(stderr, "\n");
877 }
878
879 return program;
880 }
881