1 /*
2 * Copyright © 2006 - 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_compiler.h"
25 #include "brw_eu.h"
26
27 #include "common/gen_debug.h"
28
29 struct brw_sf_compile {
30 struct brw_codegen func;
31 struct brw_sf_prog_key key;
32 struct brw_sf_prog_data prog_data;
33
34 struct brw_reg pv;
35 struct brw_reg det;
36 struct brw_reg dx0;
37 struct brw_reg dx2;
38 struct brw_reg dy0;
39 struct brw_reg dy2;
40
41 /* z and 1/w passed in seperately:
42 */
43 struct brw_reg z[3];
44 struct brw_reg inv_w[3];
45
46 /* The vertices:
47 */
48 struct brw_reg vert[3];
49
50 /* Temporaries, allocated after last vertex reg.
51 */
52 struct brw_reg inv_det;
53 struct brw_reg a1_sub_a0;
54 struct brw_reg a2_sub_a0;
55 struct brw_reg tmp;
56
57 struct brw_reg m1Cx;
58 struct brw_reg m2Cy;
59 struct brw_reg m3C0;
60
61 GLuint nr_verts;
62 GLuint nr_attr_regs;
63 GLuint nr_setup_regs;
64 int urb_entry_read_offset;
65
66 /** The last known value of the f0.0 flag register. */
67 unsigned flag_value;
68
69 struct brw_vue_map vue_map;
70 };
71
72 /**
73 * Determine the vue slot corresponding to the given half of the given register.
74 */
vert_reg_to_vue_slot(struct brw_sf_compile * c,GLuint reg,int half)75 static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
76 int half)
77 {
78 return (reg + c->urb_entry_read_offset) * 2 + half;
79 }
80
81 /**
82 * Determine the varying corresponding to the given half of the given
83 * register. half=0 means the first half of a register, half=1 means the
84 * second half.
85 */
vert_reg_to_varying(struct brw_sf_compile * c,GLuint reg,int half)86 static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
87 int half)
88 {
89 int vue_slot = vert_reg_to_vue_slot(c, reg, half);
90 return c->vue_map.slot_to_varying[vue_slot];
91 }
92
93 /**
94 * Determine the register corresponding to the given vue slot
95 */
get_vue_slot(struct brw_sf_compile * c,struct brw_reg vert,int vue_slot)96 static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
97 struct brw_reg vert,
98 int vue_slot)
99 {
100 GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
101 GLuint sub = vue_slot % 2;
102
103 return brw_vec4_grf(vert.nr + off, sub * 4);
104 }
105
106 /**
107 * Determine the register corresponding to the given varying.
108 */
get_varying(struct brw_sf_compile * c,struct brw_reg vert,GLuint varying)109 static struct brw_reg get_varying(struct brw_sf_compile *c,
110 struct brw_reg vert,
111 GLuint varying)
112 {
113 int vue_slot = c->vue_map.varying_to_slot[varying];
114 assert (vue_slot >= c->urb_entry_read_offset);
115 return get_vue_slot(c, vert, vue_slot);
116 }
117
118 static bool
have_attr(struct brw_sf_compile * c,GLuint attr)119 have_attr(struct brw_sf_compile *c, GLuint attr)
120 {
121 return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
122 }
123
124 /***********************************************************************
125 * Twoside lighting
126 */
copy_bfc(struct brw_sf_compile * c,struct brw_reg vert)127 static void copy_bfc( struct brw_sf_compile *c,
128 struct brw_reg vert )
129 {
130 struct brw_codegen *p = &c->func;
131 GLuint i;
132
133 for (i = 0; i < 2; i++) {
134 if (have_attr(c, VARYING_SLOT_COL0+i) &&
135 have_attr(c, VARYING_SLOT_BFC0+i))
136 brw_MOV(p,
137 get_varying(c, vert, VARYING_SLOT_COL0+i),
138 get_varying(c, vert, VARYING_SLOT_BFC0+i));
139 }
140 }
141
142
do_twoside_color(struct brw_sf_compile * c)143 static void do_twoside_color( struct brw_sf_compile *c )
144 {
145 struct brw_codegen *p = &c->func;
146 GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
147
148 /* Already done in clip program:
149 */
150 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
151 return;
152
153 /* If the vertex shader provides backface color, do the selection. The VS
154 * promises to set up the front color if the backface color is provided, but
155 * it may contain junk if never written to.
156 */
157 if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
158 !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
159 return;
160
161 /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
162 * to get all channels active inside the IF. In the clipping code
163 * we run with NoMask, so it's not an option and we can use
164 * BRW_EXECUTE_1 for all comparisions.
165 */
166 brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
167 brw_IF(p, BRW_EXECUTE_4);
168 {
169 switch (c->nr_verts) {
170 case 3: copy_bfc(c, c->vert[2]);
171 case 2: copy_bfc(c, c->vert[1]);
172 case 1: copy_bfc(c, c->vert[0]);
173 }
174 }
175 brw_ENDIF(p);
176 }
177
178
179
180 /***********************************************************************
181 * Flat shading
182 */
183
copy_flatshaded_attributes(struct brw_sf_compile * c,struct brw_reg dst,struct brw_reg src)184 static void copy_flatshaded_attributes(struct brw_sf_compile *c,
185 struct brw_reg dst,
186 struct brw_reg src)
187 {
188 struct brw_codegen *p = &c->func;
189 int i;
190
191 for (i = 0; i < c->vue_map.num_slots; i++) {
192 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
193 brw_MOV(p,
194 get_vue_slot(c, dst, i),
195 get_vue_slot(c, src, i));
196 }
197 }
198 }
199
count_flatshaded_attributes(struct brw_sf_compile * c)200 static int count_flatshaded_attributes(struct brw_sf_compile *c)
201 {
202 int i;
203 int count = 0;
204
205 for (i = 0; i < c->vue_map.num_slots; i++)
206 if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
207 count++;
208
209 return count;
210 }
211
212
213
214 /* Need to use a computed jump to copy flatshaded attributes as the
215 * vertices are ordered according to y-coordinate before reaching this
216 * point, so the PV could be anywhere.
217 */
do_flatshade_triangle(struct brw_sf_compile * c)218 static void do_flatshade_triangle( struct brw_sf_compile *c )
219 {
220 struct brw_codegen *p = &c->func;
221 GLuint nr;
222 GLuint jmpi = 1;
223
224 /* Already done in clip program:
225 */
226 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
227 return;
228
229 if (p->devinfo->gen == 5)
230 jmpi = 2;
231
232 nr = count_flatshaded_attributes(c);
233
234 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
235 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
236
237 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
238 copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
239 brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
240
241 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
242 copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
243 brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
244
245 copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
246 copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
247 }
248
249
do_flatshade_line(struct brw_sf_compile * c)250 static void do_flatshade_line( struct brw_sf_compile *c )
251 {
252 struct brw_codegen *p = &c->func;
253 GLuint nr;
254 GLuint jmpi = 1;
255
256 /* Already done in clip program:
257 */
258 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
259 return;
260
261 if (p->devinfo->gen == 5)
262 jmpi = 2;
263
264 nr = count_flatshaded_attributes(c);
265
266 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
267 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
268 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
269
270 brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
271 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
272 }
273
274
275 /***********************************************************************
276 * Triangle setup.
277 */
278
279
alloc_regs(struct brw_sf_compile * c)280 static void alloc_regs( struct brw_sf_compile *c )
281 {
282 GLuint reg, i;
283
284 /* Values computed by fixed function unit:
285 */
286 c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
287 c->det = brw_vec1_grf(1, 2);
288 c->dx0 = brw_vec1_grf(1, 3);
289 c->dx2 = brw_vec1_grf(1, 4);
290 c->dy0 = brw_vec1_grf(1, 5);
291 c->dy2 = brw_vec1_grf(1, 6);
292
293 /* z and 1/w passed in seperately:
294 */
295 c->z[0] = brw_vec1_grf(2, 0);
296 c->inv_w[0] = brw_vec1_grf(2, 1);
297 c->z[1] = brw_vec1_grf(2, 2);
298 c->inv_w[1] = brw_vec1_grf(2, 3);
299 c->z[2] = brw_vec1_grf(2, 4);
300 c->inv_w[2] = brw_vec1_grf(2, 5);
301
302 /* The vertices:
303 */
304 reg = 3;
305 for (i = 0; i < c->nr_verts; i++) {
306 c->vert[i] = brw_vec8_grf(reg, 0);
307 reg += c->nr_attr_regs;
308 }
309
310 /* Temporaries, allocated after last vertex reg.
311 */
312 c->inv_det = brw_vec1_grf(reg, 0); reg++;
313 c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
314 c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
315 c->tmp = brw_vec8_grf(reg, 0); reg++;
316
317 /* Note grf allocation:
318 */
319 c->prog_data.total_grf = reg;
320
321
322 /* Outputs of this program - interpolation coefficients for
323 * rasterization:
324 */
325 c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
326 c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
327 c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
328 }
329
330
copy_z_inv_w(struct brw_sf_compile * c)331 static void copy_z_inv_w( struct brw_sf_compile *c )
332 {
333 struct brw_codegen *p = &c->func;
334 GLuint i;
335
336 /* Copy both scalars with a single MOV:
337 */
338 for (i = 0; i < c->nr_verts; i++)
339 brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
340 }
341
342
invert_det(struct brw_sf_compile * c)343 static void invert_det( struct brw_sf_compile *c)
344 {
345 /* Looks like we invert all 8 elements just to get 1/det in
346 * position 2 !?!
347 */
348 gen4_math(&c->func,
349 c->inv_det,
350 BRW_MATH_FUNCTION_INV,
351 0,
352 c->det,
353 BRW_MATH_PRECISION_FULL);
354
355 }
356
357
358 static bool
calculate_masks(struct brw_sf_compile * c,GLuint reg,GLushort * pc,GLushort * pc_persp,GLushort * pc_linear)359 calculate_masks(struct brw_sf_compile *c,
360 GLuint reg,
361 GLushort *pc,
362 GLushort *pc_persp,
363 GLushort *pc_linear)
364 {
365 bool is_last_attr = (reg == c->nr_setup_regs - 1);
366 enum glsl_interp_mode interp;
367
368 *pc_persp = 0;
369 *pc_linear = 0;
370 *pc = 0xf;
371
372 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
373 if (interp == INTERP_MODE_SMOOTH) {
374 *pc_linear = 0xf;
375 *pc_persp = 0xf;
376 } else if (interp == INTERP_MODE_NOPERSPECTIVE)
377 *pc_linear = 0xf;
378
379 /* Maybe only processs one attribute on the final round:
380 */
381 if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
382 *pc |= 0xf0;
383
384 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
385 if (interp == INTERP_MODE_SMOOTH) {
386 *pc_linear |= 0xf0;
387 *pc_persp |= 0xf0;
388 } else if (interp == INTERP_MODE_NOPERSPECTIVE)
389 *pc_linear |= 0xf0;
390 }
391
392 return is_last_attr;
393 }
394
395 /* Calculates the predicate control for which channels of a reg
396 * (containing 2 attrs) to do point sprite coordinate replacement on.
397 */
398 static uint16_t
calculate_point_sprite_mask(struct brw_sf_compile * c,GLuint reg)399 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
400 {
401 int varying1, varying2;
402 uint16_t pc = 0;
403
404 varying1 = vert_reg_to_varying(c, reg, 0);
405 if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
406 if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
407 pc |= 0x0f;
408 }
409 if (varying1 == BRW_VARYING_SLOT_PNTC)
410 pc |= 0x0f;
411
412 varying2 = vert_reg_to_varying(c, reg, 1);
413 if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
414 if (c->key.point_sprite_coord_replace & (1 << (varying2 -
415 VARYING_SLOT_TEX0)))
416 pc |= 0xf0;
417 }
418 if (varying2 == BRW_VARYING_SLOT_PNTC)
419 pc |= 0xf0;
420
421 return pc;
422 }
423
424 static void
set_predicate_control_flag_value(struct brw_codegen * p,struct brw_sf_compile * c,unsigned value)425 set_predicate_control_flag_value(struct brw_codegen *p,
426 struct brw_sf_compile *c,
427 unsigned value)
428 {
429 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
430
431 if (value != 0xff) {
432 if (value != c->flag_value) {
433 brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
434 c->flag_value = value;
435 }
436
437 brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
438 }
439 }
440
brw_emit_tri_setup(struct brw_sf_compile * c,bool allocate)441 static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
442 {
443 struct brw_codegen *p = &c->func;
444 GLuint i;
445
446 c->flag_value = 0xff;
447 c->nr_verts = 3;
448
449 if (allocate)
450 alloc_regs(c);
451
452 invert_det(c);
453 copy_z_inv_w(c);
454
455 if (c->key.do_twoside_color)
456 do_twoside_color(c);
457
458 if (c->key.contains_flat_varying)
459 do_flatshade_triangle(c);
460
461
462 for (i = 0; i < c->nr_setup_regs; i++)
463 {
464 /* Pair of incoming attributes:
465 */
466 struct brw_reg a0 = offset(c->vert[0], i);
467 struct brw_reg a1 = offset(c->vert[1], i);
468 struct brw_reg a2 = offset(c->vert[2], i);
469 GLushort pc, pc_persp, pc_linear;
470 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
471
472 if (pc_persp)
473 {
474 set_predicate_control_flag_value(p, c, pc_persp);
475 brw_MUL(p, a0, a0, c->inv_w[0]);
476 brw_MUL(p, a1, a1, c->inv_w[1]);
477 brw_MUL(p, a2, a2, c->inv_w[2]);
478 }
479
480
481 /* Calculate coefficients for interpolated values:
482 */
483 if (pc_linear)
484 {
485 set_predicate_control_flag_value(p, c, pc_linear);
486
487 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
488 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
489
490 /* calculate dA/dx
491 */
492 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
493 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
494 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
495
496 /* calculate dA/dy
497 */
498 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
499 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
500 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
501 }
502
503 {
504 set_predicate_control_flag_value(p, c, pc);
505 /* start point for interpolation
506 */
507 brw_MOV(p, c->m3C0, a0);
508
509 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
510 * the send instruction:
511 */
512 brw_urb_WRITE(p,
513 brw_null_reg(),
514 0,
515 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
516 last ? BRW_URB_WRITE_EOT_COMPLETE
517 : BRW_URB_WRITE_NO_FLAGS,
518 4, /* msg len */
519 0, /* response len */
520 i*4, /* offset */
521 BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
522 }
523 }
524
525 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
526 }
527
528
529
brw_emit_line_setup(struct brw_sf_compile * c,bool allocate)530 static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
531 {
532 struct brw_codegen *p = &c->func;
533 GLuint i;
534
535 c->flag_value = 0xff;
536 c->nr_verts = 2;
537
538 if (allocate)
539 alloc_regs(c);
540
541 invert_det(c);
542 copy_z_inv_w(c);
543
544 if (c->key.contains_flat_varying)
545 do_flatshade_line(c);
546
547 for (i = 0; i < c->nr_setup_regs; i++)
548 {
549 /* Pair of incoming attributes:
550 */
551 struct brw_reg a0 = offset(c->vert[0], i);
552 struct brw_reg a1 = offset(c->vert[1], i);
553 GLushort pc, pc_persp, pc_linear;
554 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
555
556 if (pc_persp)
557 {
558 set_predicate_control_flag_value(p, c, pc_persp);
559 brw_MUL(p, a0, a0, c->inv_w[0]);
560 brw_MUL(p, a1, a1, c->inv_w[1]);
561 }
562
563 /* Calculate coefficients for position, color:
564 */
565 if (pc_linear) {
566 set_predicate_control_flag_value(p, c, pc_linear);
567
568 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
569
570 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
571 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
572
573 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
574 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
575 }
576
577 {
578 set_predicate_control_flag_value(p, c, pc);
579
580 /* start point for interpolation
581 */
582 brw_MOV(p, c->m3C0, a0);
583
584 /* Copy m0..m3 to URB.
585 */
586 brw_urb_WRITE(p,
587 brw_null_reg(),
588 0,
589 brw_vec8_grf(0, 0),
590 last ? BRW_URB_WRITE_EOT_COMPLETE
591 : BRW_URB_WRITE_NO_FLAGS,
592 4, /* msg len */
593 0, /* response len */
594 i*4, /* urb destination offset */
595 BRW_URB_SWIZZLE_TRANSPOSE);
596 }
597 }
598
599 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
600 }
601
brw_emit_point_sprite_setup(struct brw_sf_compile * c,bool allocate)602 static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
603 {
604 struct brw_codegen *p = &c->func;
605 GLuint i;
606
607 c->flag_value = 0xff;
608 c->nr_verts = 1;
609
610 if (allocate)
611 alloc_regs(c);
612
613 copy_z_inv_w(c);
614 for (i = 0; i < c->nr_setup_regs; i++)
615 {
616 struct brw_reg a0 = offset(c->vert[0], i);
617 GLushort pc, pc_persp, pc_linear, pc_coord_replace;
618 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
619
620 pc_coord_replace = calculate_point_sprite_mask(c, i);
621 pc_persp &= ~pc_coord_replace;
622
623 if (pc_persp) {
624 set_predicate_control_flag_value(p, c, pc_persp);
625 brw_MUL(p, a0, a0, c->inv_w[0]);
626 }
627
628 /* Point sprite coordinate replacement: A texcoord with this
629 * enabled gets replaced with the value (x, y, 0, 1) where x and
630 * y vary from 0 to 1 across the horizontal and vertical of the
631 * point.
632 */
633 if (pc_coord_replace) {
634 set_predicate_control_flag_value(p, c, pc_coord_replace);
635 /* Caculate 1.0/PointWidth */
636 gen4_math(&c->func,
637 c->tmp,
638 BRW_MATH_FUNCTION_INV,
639 0,
640 c->dx0,
641 BRW_MATH_PRECISION_FULL);
642
643 brw_set_default_access_mode(p, BRW_ALIGN_16);
644
645 /* dA/dx, dA/dy */
646 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
647 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
648 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
649 if (c->key.sprite_origin_lower_left) {
650 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
651 } else {
652 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
653 }
654
655 /* attribute constant offset */
656 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
657 if (c->key.sprite_origin_lower_left) {
658 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
659 } else {
660 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
661 }
662
663 brw_set_default_access_mode(p, BRW_ALIGN_1);
664 }
665
666 if (pc & ~pc_coord_replace) {
667 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
668 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
669 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
670 brw_MOV(p, c->m3C0, a0); /* constant value */
671 }
672
673
674 set_predicate_control_flag_value(p, c, pc);
675 /* Copy m0..m3 to URB. */
676 brw_urb_WRITE(p,
677 brw_null_reg(),
678 0,
679 brw_vec8_grf(0, 0),
680 last ? BRW_URB_WRITE_EOT_COMPLETE
681 : BRW_URB_WRITE_NO_FLAGS,
682 4, /* msg len */
683 0, /* response len */
684 i*4, /* urb destination offset */
685 BRW_URB_SWIZZLE_TRANSPOSE);
686 }
687
688 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
689 }
690
691 /* Points setup - several simplifications as all attributes are
692 * constant across the face of the point (point sprites excluded!)
693 */
brw_emit_point_setup(struct brw_sf_compile * c,bool allocate)694 static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
695 {
696 struct brw_codegen *p = &c->func;
697 GLuint i;
698
699 c->flag_value = 0xff;
700 c->nr_verts = 1;
701
702 if (allocate)
703 alloc_regs(c);
704
705 copy_z_inv_w(c);
706
707 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
708 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
709
710 for (i = 0; i < c->nr_setup_regs; i++)
711 {
712 struct brw_reg a0 = offset(c->vert[0], i);
713 GLushort pc, pc_persp, pc_linear;
714 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
715
716 if (pc_persp)
717 {
718 /* This seems odd as the values are all constant, but the
719 * fragment shader will be expecting it:
720 */
721 set_predicate_control_flag_value(p, c, pc_persp);
722 brw_MUL(p, a0, a0, c->inv_w[0]);
723 }
724
725
726 /* The delta values are always zero, just send the starting
727 * coordinate. Again, this is to fit in with the interpolation
728 * code in the fragment shader.
729 */
730 {
731 set_predicate_control_flag_value(p, c, pc);
732
733 brw_MOV(p, c->m3C0, a0); /* constant value */
734
735 /* Copy m0..m3 to URB.
736 */
737 brw_urb_WRITE(p,
738 brw_null_reg(),
739 0,
740 brw_vec8_grf(0, 0),
741 last ? BRW_URB_WRITE_EOT_COMPLETE
742 : BRW_URB_WRITE_NO_FLAGS,
743 4, /* msg len */
744 0, /* response len */
745 i*4, /* urb destination offset */
746 BRW_URB_SWIZZLE_TRANSPOSE);
747 }
748 }
749
750 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
751 }
752
brw_emit_anyprim_setup(struct brw_sf_compile * c)753 static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
754 {
755 struct brw_codegen *p = &c->func;
756 struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
757 struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
758 struct brw_reg primmask;
759 int jmp;
760 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
761
762 c->nr_verts = 3;
763 alloc_regs(c);
764
765 primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
766
767 brw_MOV(p, primmask, brw_imm_ud(1));
768 brw_SHL(p, primmask, primmask, payload_prim);
769
770 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
771 (1<<_3DPRIM_TRISTRIP) |
772 (1<<_3DPRIM_TRIFAN) |
773 (1<<_3DPRIM_TRISTRIP_REVERSE) |
774 (1<<_3DPRIM_POLYGON) |
775 (1<<_3DPRIM_RECTLIST) |
776 (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
777 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
778 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
779 brw_emit_tri_setup(c, false);
780 brw_land_fwd_jump(p, jmp);
781
782 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
783 (1<<_3DPRIM_LINESTRIP) |
784 (1<<_3DPRIM_LINELOOP) |
785 (1<<_3DPRIM_LINESTRIP_CONT) |
786 (1<<_3DPRIM_LINESTRIP_BF) |
787 (1<<_3DPRIM_LINESTRIP_CONT_BF)));
788 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
789 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
790 brw_emit_line_setup(c, false);
791 brw_land_fwd_jump(p, jmp);
792
793 brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
794 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
795 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
796 brw_emit_point_sprite_setup(c, false);
797 brw_land_fwd_jump(p, jmp);
798
799 brw_emit_point_setup( c, false );
800 }
801
802 const unsigned *
brw_compile_sf(const struct brw_compiler * compiler,void * mem_ctx,const struct brw_sf_prog_key * key,struct brw_sf_prog_data * prog_data,struct brw_vue_map * vue_map,unsigned * final_assembly_size)803 brw_compile_sf(const struct brw_compiler *compiler,
804 void *mem_ctx,
805 const struct brw_sf_prog_key *key,
806 struct brw_sf_prog_data *prog_data,
807 struct brw_vue_map *vue_map,
808 unsigned *final_assembly_size)
809 {
810 struct brw_sf_compile c;
811 memset(&c, 0, sizeof(c));
812
813 /* Begin the compilation:
814 */
815 brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
816
817 c.key = *key;
818 c.vue_map = *vue_map;
819 if (c.key.do_point_coord) {
820 /*
821 * gl_PointCoord is a FS instead of VS builtin variable, thus it's
822 * not included in c.vue_map generated in VS stage. Here we add
823 * it manually to let SF shader generate the needed interpolation
824 * coefficient for FS shader.
825 */
826 c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
827 c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
828 }
829 c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
830 c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
831 c.nr_setup_regs = c.nr_attr_regs;
832
833 c.prog_data.urb_read_length = c.nr_attr_regs;
834 c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
835
836 /* Which primitive? Or all three?
837 */
838 switch (key->primitive) {
839 case BRW_SF_PRIM_TRIANGLES:
840 c.nr_verts = 3;
841 brw_emit_tri_setup( &c, true );
842 break;
843 case BRW_SF_PRIM_LINES:
844 c.nr_verts = 2;
845 brw_emit_line_setup( &c, true );
846 break;
847 case BRW_SF_PRIM_POINTS:
848 c.nr_verts = 1;
849 if (key->do_point_sprite)
850 brw_emit_point_sprite_setup( &c, true );
851 else
852 brw_emit_point_setup( &c, true );
853 break;
854 case BRW_SF_PRIM_UNFILLED_TRIS:
855 c.nr_verts = 3;
856 brw_emit_anyprim_setup( &c );
857 break;
858 default:
859 unreachable("not reached");
860 }
861
862 /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
863 * source). Compacting would be difficult.
864 */
865 /* brw_compact_instructions(&c.func, 0, 0, NULL); */
866
867 *prog_data = c.prog_data;
868
869 const unsigned *program = brw_get_program(&c.func, final_assembly_size);
870
871 if (unlikely(INTEL_DEBUG & DEBUG_SF)) {
872 fprintf(stderr, "sf:\n");
873 brw_disassemble(compiler->devinfo,
874 program, 0, *final_assembly_size, stderr);
875 fprintf(stderr, "\n");
876 }
877
878 return program;
879 }
880