• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2009 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include "brw_context.h"
29 #include "brw_state.h"
30 #include "brw_defines.h"
31 #include "brw_util.h"
32 #include "compiler/nir/nir.h"
33 #include "main/macros.h"
34 #include "main/fbobject.h"
35 #include "main/framebuffer.h"
36 #include "intel_batchbuffer.h"
37 
38 /**
39  * Determine the appropriate attribute override value to store into the
40  * 3DSTATE_SF structure for a given fragment shader attribute.  The attribute
41  * override value contains two pieces of information: the location of the
42  * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
43  * flag indicating whether to "swizzle" the attribute based on the direction
44  * the triangle is facing.
45  *
46  * If an attribute is "swizzled", then the given VUE location is used for
47  * front-facing triangles, and the VUE location that immediately follows is
48  * used for back-facing triangles.  We use this to implement the mapping from
49  * gl_FrontColor/gl_BackColor to gl_Color.
50  *
51  * urb_entry_read_offset is the offset into the VUE at which the SF unit is
52  * being instructed to begin reading attribute data.  It can be set to a
53  * nonzero value to prevent the SF unit from wasting time reading elements of
54  * the VUE that are not needed by the fragment shader.  It is measured in
55  * 256-bit increments.
56  */
57 static uint32_t
get_attr_override(const struct brw_vue_map * vue_map,int urb_entry_read_offset,int fs_attr,bool two_side_color,uint32_t * max_source_attr)58 get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
59                   int fs_attr, bool two_side_color, uint32_t *max_source_attr)
60 {
61    /* Find the VUE slot for this attribute. */
62    int slot = vue_map->varying_to_slot[fs_attr];
63 
64    /* Viewport and Layer are stored in the VUE header.  We need to override
65     * them to zero if earlier stages didn't write them, as GL requires that
66     * they read back as zero when not explicitly set.
67     */
68    if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
69       unsigned override =
70          ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W |
71          ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT;
72 
73       if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
74          override |= ATTRIBUTE_0_OVERRIDE_Y;
75       if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
76          override |= ATTRIBUTE_0_OVERRIDE_Z;
77 
78       return override;
79    }
80 
81    /* If there was only a back color written but not front, use back
82     * as the color instead of undefined
83     */
84    if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
85       slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
86    if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
87       slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
88 
89    if (slot == -1) {
90       /* This attribute does not exist in the VUE--that means that the vertex
91        * shader did not write to it.  This means that either:
92        *
93        * (a) This attribute is a texture coordinate, and it is going to be
94        * replaced with point coordinates (as a consequence of a call to
95        * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
96        * hardware will ignore whatever attribute override we supply.
97        *
98        * (b) This attribute is read by the fragment shader but not written by
99        * the vertex shader, so its value is undefined.  Therefore the
100        * attribute override we supply doesn't matter.
101        *
102        * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
103        * previous shader stage.
104        *
105        * Note that we don't have to worry about the cases where the attribute
106        * is gl_PointCoord or is undergoing point sprite coordinate
107        * replacement, because in those cases, this function isn't called.
108        *
109        * In case (c), we need to program the attribute overrides so that the
110        * primitive ID will be stored in this slot.  In every other case, the
111        * attribute override we supply doesn't matter.  So just go ahead and
112        * program primitive ID in every case.
113        */
114       return (ATTRIBUTE_0_OVERRIDE_W |
115               ATTRIBUTE_0_OVERRIDE_Z |
116               ATTRIBUTE_0_OVERRIDE_Y |
117               ATTRIBUTE_0_OVERRIDE_X |
118               (ATTRIBUTE_CONST_PRIM_ID << ATTRIBUTE_0_CONST_SOURCE_SHIFT));
119    }
120 
121    /* Compute the location of the attribute relative to urb_entry_read_offset.
122     * Each increment of urb_entry_read_offset represents a 256-bit value, so
123     * it counts for two 128-bit VUE slots.
124     */
125    int source_attr = slot - 2 * urb_entry_read_offset;
126    assert(source_attr >= 0 && source_attr < 32);
127 
128    /* If we are doing two-sided color, and the VUE slot following this one
129     * represents a back-facing color, then we need to instruct the SF unit to
130     * do back-facing swizzling.
131     */
132    bool swizzling = two_side_color &&
133       ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
134         vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
135        (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
136         vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
137 
138    /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
139    if (*max_source_attr < source_attr + swizzling)
140       *max_source_attr = source_attr + swizzling;
141 
142    if (swizzling) {
143       return source_attr |
144          (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
145    }
146 
147    return source_attr;
148 }
149 
150 
151 /**
152  * Create the mapping from the FS inputs we produce to the previous pipeline
153  * stage (GS or VS) outputs they source from.
154  */
155 void
calculate_attr_overrides(const struct brw_context * brw,uint16_t * attr_overrides,uint32_t * point_sprite_enables,uint32_t * urb_entry_read_length,uint32_t * urb_entry_read_offset)156 calculate_attr_overrides(const struct brw_context *brw,
157                          uint16_t *attr_overrides,
158                          uint32_t *point_sprite_enables,
159                          uint32_t *urb_entry_read_length,
160                          uint32_t *urb_entry_read_offset)
161 {
162    /* BRW_NEW_FS_PROG_DATA */
163    const struct brw_wm_prog_data *wm_prog_data =
164       brw_wm_prog_data(brw->wm.base.prog_data);
165    uint32_t max_source_attr = 0;
166 
167    *point_sprite_enables = 0;
168 
169    *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
170 
171    /* BRW_NEW_FRAGMENT_PROGRAM
172     *
173     * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
174     * the full vertex header.  Otherwise, we can program the SF to start
175     * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
176     * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
177     * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
178     */
179 
180    bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
181       (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
182 
183    *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
184 
185    /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
186     * description of dw10 Point Sprite Texture Coordinate Enable:
187     *
188     * "This field must be programmed to zero when non-point primitives
189     * are rendered."
190     *
191     * The SandyBridge PRM doesn't explicitly say that point sprite enables
192     * must be programmed to zero when rendering non-point primitives, but
193     * the IvyBridge PRM does, and if we don't, we get garbage.
194     *
195     * This is not required on Haswell, as the hardware ignores this state
196     * when drawing non-points -- although we do still need to be careful to
197     * correctly set the attr overrides.
198     *
199     * _NEW_POLYGON
200     * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
201     */
202    bool drawing_points = brw_is_drawing_points(brw);
203 
204    /* Initialize all the attr_overrides to 0.  In the loop below we'll modify
205     * just the ones that correspond to inputs used by the fs.
206     */
207    memset(attr_overrides, 0, 16*sizeof(*attr_overrides));
208 
209    for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
210       int input_index = wm_prog_data->urb_setup[attr];
211 
212       if (input_index < 0)
213 	 continue;
214 
215       /* _NEW_POINT */
216       bool point_sprite = false;
217       if (drawing_points) {
218          if (brw->ctx.Point.PointSprite &&
219              (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
220              (brw->ctx.Point.CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
221             point_sprite = true;
222          }
223 
224          if (attr == VARYING_SLOT_PNTC)
225             point_sprite = true;
226 
227          if (point_sprite)
228             *point_sprite_enables |= (1 << input_index);
229       }
230 
231       /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
232       uint16_t attr_override = point_sprite ? 0 :
233          get_attr_override(&brw->vue_map_geom_out,
234 			   *urb_entry_read_offset, attr,
235                            brw->ctx.VertexProgram._TwoSideEnabled,
236                            &max_source_attr);
237 
238       /* The hardware can only do the overrides on 16 overrides at a
239        * time, and the other up to 16 have to be lined up so that the
240        * input index = the output index.  We'll need to do some
241        * tweaking to make sure that's the case.
242        */
243       if (input_index < 16)
244          attr_overrides[input_index] = attr_override;
245       else
246          assert(attr_override == input_index);
247    }
248 
249    /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
250     * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
251     *
252     * "This field should be set to the minimum length required to read the
253     *  maximum source attribute.  The maximum source attribute is indicated
254     *  by the maximum value of the enabled Attribute # Source Attribute if
255     *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
256     *  enable is not set.
257     *  read_length = ceiling((max_source_attr + 1) / 2)
258     *
259     *  [errata] Corruption/Hang possible if length programmed larger than
260     *  recommended"
261     *
262     * Similar text exists for Ivy Bridge.
263     */
264    *urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
265 }
266 
267 
268 static void
upload_sf_state(struct brw_context * brw)269 upload_sf_state(struct brw_context *brw)
270 {
271    struct gl_context *ctx = &brw->ctx;
272    /* BRW_NEW_FS_PROG_DATA */
273    const struct brw_wm_prog_data *wm_prog_data =
274       brw_wm_prog_data(brw->wm.base.prog_data);
275    uint32_t num_outputs = wm_prog_data->num_varying_inputs;
276    uint32_t dw1, dw2, dw3, dw4;
277    uint32_t point_sprite_enables;
278    int i;
279    /* _NEW_BUFFER */
280    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
281    const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
282 
283    float point_size;
284    uint16_t attr_overrides[16];
285    uint32_t point_sprite_origin;
286 
287    dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
288    dw2 = GEN6_SF_STATISTICS_ENABLE;
289    dw3 = GEN6_SF_SCISSOR_ENABLE;
290    dw4 = 0;
291 
292    if (brw->sf.viewport_transform_enable)
293        dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
294 
295    /* _NEW_POLYGON */
296    if (ctx->Polygon._FrontBit == render_to_fbo)
297       dw2 |= GEN6_SF_WINDING_CCW;
298 
299    if (ctx->Polygon.OffsetFill)
300        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
301 
302    if (ctx->Polygon.OffsetLine)
303        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
304 
305    if (ctx->Polygon.OffsetPoint)
306        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
307 
308    switch (ctx->Polygon.FrontMode) {
309    case GL_FILL:
310        dw2 |= GEN6_SF_FRONT_SOLID;
311        break;
312 
313    case GL_LINE:
314        dw2 |= GEN6_SF_FRONT_WIREFRAME;
315        break;
316 
317    case GL_POINT:
318        dw2 |= GEN6_SF_FRONT_POINT;
319        break;
320 
321    default:
322        unreachable("not reached");
323    }
324 
325    switch (ctx->Polygon.BackMode) {
326    case GL_FILL:
327        dw2 |= GEN6_SF_BACK_SOLID;
328        break;
329 
330    case GL_LINE:
331        dw2 |= GEN6_SF_BACK_WIREFRAME;
332        break;
333 
334    case GL_POINT:
335        dw2 |= GEN6_SF_BACK_POINT;
336        break;
337 
338    default:
339        unreachable("not reached");
340    }
341 
342    /* _NEW_POLYGON */
343    if (ctx->Polygon.CullFlag) {
344       switch (ctx->Polygon.CullFaceMode) {
345       case GL_FRONT:
346 	 dw3 |= GEN6_SF_CULL_FRONT;
347 	 break;
348       case GL_BACK:
349 	 dw3 |= GEN6_SF_CULL_BACK;
350 	 break;
351       case GL_FRONT_AND_BACK:
352 	 dw3 |= GEN6_SF_CULL_BOTH;
353 	 break;
354       default:
355 	 unreachable("not reached");
356       }
357    } else {
358       dw3 |= GEN6_SF_CULL_NONE;
359    }
360 
361    /* _NEW_LINE */
362    {
363       uint32_t line_width_u3_7 = brw_get_line_width(brw);
364       dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
365    }
366    if (ctx->Line.SmoothFlag) {
367       dw3 |= GEN6_SF_LINE_AA_ENABLE;
368       dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
369       dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
370    }
371    /* _NEW_MULTISAMPLE */
372    if (multisampled_fbo && ctx->Multisample.Enabled)
373       dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
374 
375    /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
376    if (use_state_point_size(brw))
377       dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
378 
379    /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
380    point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
381 
382    /* Clamp to the hardware limits and convert to fixed point */
383    dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
384 
385    /*
386     * Window coordinates in an FBO are inverted, which means point
387     * sprite origin must be inverted, too.
388     */
389    if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
390       point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
391    } else {
392       point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
393    }
394    dw1 |= point_sprite_origin;
395 
396    /* _NEW_LIGHT */
397    if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
398       dw4 |=
399 	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
400 	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
401 	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
402    } else {
403       dw4 |=
404 	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
405    }
406 
407    /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
408     * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
409     */
410    uint32_t urb_entry_read_length;
411    uint32_t urb_entry_read_offset;
412    calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
413                             &urb_entry_read_length, &urb_entry_read_offset);
414    dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
415            urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
416 
417    BEGIN_BATCH(20);
418    OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
419    OUT_BATCH(dw1);
420    OUT_BATCH(dw2);
421    OUT_BATCH(dw3);
422    OUT_BATCH(dw4);
423    OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
424    OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
425    OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */
426    for (i = 0; i < 8; i++) {
427       OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
428    }
429    OUT_BATCH(point_sprite_enables); /* dw16 */
430    OUT_BATCH(wm_prog_data->flat_inputs);
431    OUT_BATCH(0); /* wrapshortest enables 0-7 */
432    OUT_BATCH(0); /* wrapshortest enables 8-15 */
433    ADVANCE_BATCH();
434 }
435 
436 const struct brw_tracked_state gen6_sf_state = {
437    .dirty = {
438       .mesa  = _NEW_BUFFERS |
439                _NEW_LIGHT |
440                _NEW_LINE |
441                _NEW_MULTISAMPLE |
442                _NEW_POINT |
443                _NEW_POLYGON |
444                _NEW_PROGRAM,
445       .brw   = BRW_NEW_BLORP |
446                BRW_NEW_CONTEXT |
447                BRW_NEW_FRAGMENT_PROGRAM |
448                BRW_NEW_FS_PROG_DATA |
449                BRW_NEW_GS_PROG_DATA |
450                BRW_NEW_PRIMITIVE |
451                BRW_NEW_TES_PROG_DATA |
452                BRW_NEW_VUE_MAP_GEOM_OUT,
453    },
454    .emit = upload_sf_state,
455 };
456