• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file brw_binding_tables.c
26  *
27  * State atoms which upload the "binding table" for each shader stage.
28  *
29  * Binding tables map a numeric "surface index" to the SURFACE_STATE structure
30  * for a currently bound surface.  This allows SEND messages (such as sampler
31  * or data port messages) to refer to a particular surface by number, rather
32  * than by pointer.
33  *
34  * The binding table is stored as a (sparse) array of SURFACE_STATE entries;
35  * surface indexes are simply indexes into the array.  The ordering of the
36  * entries is entirely left up to software; see the SURF_INDEX_* macros in
37  * brw_context.h to see our current layout.
38  */
39 
40 #include "main/mtypes.h"
41 
42 #include "brw_context.h"
43 #include "brw_defines.h"
44 #include "brw_state.h"
45 #include "intel_batchbuffer.h"
46 
47 static const GLuint stage_to_bt_edit[] = {
48    [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS,
49    [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS,
50    [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS,
51 };
52 
53 static uint32_t
reserve_hw_bt_space(struct brw_context * brw,unsigned bytes)54 reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
55 {
56    /* From the Broadwell PRM, Volume 16, "Workarounds",
57     * WaStateBindingTableOverfetch:
58     * "HW over-fetches two cache lines of binding table indices.  When
59     *  using the resource streamer, SW needs to pad binding table pointer
60     *  updates with an additional two cache lines."
61     *
62     * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
63     * the binding table pool buffer.
64     */
65    if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
66       gen7_reset_hw_bt_pool_offsets(brw);
67    }
68 
69    uint32_t offset = brw->hw_bt_pool.next_offset;
70 
71    /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
72     * 3DSTATE_BINDING_TABLE_POINTERS_xS:
73     *
74     * "If HW Binding Table is enabled, the offset is relative to the
75     *  Binding Table Pool Base Address and the alignment is 64 bytes."
76     */
77    brw->hw_bt_pool.next_offset += ALIGN(bytes, 64);
78 
79    return offset;
80 }
81 
82 /**
83  * Upload a shader stage's binding table as indirect state.
84  *
85  * This copies brw_stage_state::surf_offset[] into the indirect state section
86  * of the batchbuffer (allocated by brw_state_batch()).
87  */
88 void
brw_upload_binding_table(struct brw_context * brw,uint32_t packet_name,const struct brw_stage_prog_data * prog_data,struct brw_stage_state * stage_state)89 brw_upload_binding_table(struct brw_context *brw,
90                          uint32_t packet_name,
91                          const struct brw_stage_prog_data *prog_data,
92                          struct brw_stage_state *stage_state)
93 {
94    if (prog_data->binding_table.size_bytes == 0) {
95       /* There are no surfaces; skip making the binding table altogether. */
96       if (stage_state->bind_bo_offset == 0 && brw->gen < 9)
97          return;
98 
99       stage_state->bind_bo_offset = 0;
100    } else {
101       /* Upload a new binding table. */
102       if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
103          brw_emit_buffer_surface_state(
104             brw, &stage_state->surf_offset[
105                     prog_data->binding_table.shader_time_start],
106             brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
107             brw->shader_time.bo->size, 1, true);
108       }
109       /* When RS is enabled use hw-binding table uploads, otherwise fallback to
110        * software-uploads.
111        */
112       if (brw->use_resource_streamer) {
113          gen7_update_binding_table_from_array(brw, stage_state->stage,
114                                               stage_state->surf_offset,
115                                               prog_data->binding_table
116                                               .size_bytes / 4);
117       } else {
118          uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
119                                           prog_data->binding_table.size_bytes,
120                                           32,
121                                           &stage_state->bind_bo_offset);
122 
123          /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
124          memcpy(bind, stage_state->surf_offset,
125                 prog_data->binding_table.size_bytes);
126       }
127    }
128 
129    brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
130 
131    if (brw->gen >= 7) {
132       if (brw->use_resource_streamer) {
133          stage_state->bind_bo_offset =
134             reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes);
135       }
136       BEGIN_BATCH(2);
137       OUT_BATCH(packet_name << 16 | (2 - 2));
138       /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
139        * when hw-generated binding table is enabled.
140        */
141       OUT_BATCH(brw->use_resource_streamer ?
142                 (stage_state->bind_bo_offset >> 1) :
143                 stage_state->bind_bo_offset);
144       ADVANCE_BATCH();
145    }
146 }
147 
148 /**
149  * State atoms which upload the binding table for a particular shader stage.
150  *  @{
151  */
152 
153 /** Upload the VS binding table. */
154 static void
brw_vs_upload_binding_table(struct brw_context * brw)155 brw_vs_upload_binding_table(struct brw_context *brw)
156 {
157    /* BRW_NEW_VS_PROG_DATA */
158    const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
159    brw_upload_binding_table(brw,
160                             _3DSTATE_BINDING_TABLE_POINTERS_VS,
161                             prog_data,
162                             &brw->vs.base);
163 }
164 
165 const struct brw_tracked_state brw_vs_binding_table = {
166    .dirty = {
167       .mesa = 0,
168       .brw = BRW_NEW_BATCH |
169              BRW_NEW_BLORP |
170              BRW_NEW_VS_CONSTBUF |
171              BRW_NEW_VS_PROG_DATA |
172              BRW_NEW_SURFACES,
173    },
174    .emit = brw_vs_upload_binding_table,
175 };
176 
177 
178 /** Upload the PS binding table. */
179 static void
brw_upload_wm_binding_table(struct brw_context * brw)180 brw_upload_wm_binding_table(struct brw_context *brw)
181 {
182    /* BRW_NEW_FS_PROG_DATA */
183    const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
184    brw_upload_binding_table(brw,
185                             _3DSTATE_BINDING_TABLE_POINTERS_PS,
186                             prog_data,
187                             &brw->wm.base);
188 }
189 
190 const struct brw_tracked_state brw_wm_binding_table = {
191    .dirty = {
192       .mesa = 0,
193       .brw = BRW_NEW_BATCH |
194              BRW_NEW_BLORP |
195              BRW_NEW_FS_PROG_DATA |
196              BRW_NEW_SURFACES,
197    },
198    .emit = brw_upload_wm_binding_table,
199 };
200 
201 /** Upload the TCS binding table (if tessellation stages are active). */
202 static void
brw_tcs_upload_binding_table(struct brw_context * brw)203 brw_tcs_upload_binding_table(struct brw_context *brw)
204 {
205    /* Skip if the tessellation stages are disabled. */
206    if (brw->tess_eval_program == NULL)
207       return;
208 
209    /* BRW_NEW_TCS_PROG_DATA */
210    const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
211    brw_upload_binding_table(brw,
212                             _3DSTATE_BINDING_TABLE_POINTERS_HS,
213                             prog_data,
214                             &brw->tcs.base);
215 }
216 
217 const struct brw_tracked_state brw_tcs_binding_table = {
218    .dirty = {
219       .mesa = 0,
220       .brw = BRW_NEW_BATCH |
221              BRW_NEW_BLORP |
222              BRW_NEW_DEFAULT_TESS_LEVELS |
223              BRW_NEW_SURFACES |
224              BRW_NEW_TCS_CONSTBUF |
225              BRW_NEW_TCS_PROG_DATA,
226    },
227    .emit = brw_tcs_upload_binding_table,
228 };
229 
230 /** Upload the TES binding table (if TES is active). */
231 static void
brw_tes_upload_binding_table(struct brw_context * brw)232 brw_tes_upload_binding_table(struct brw_context *brw)
233 {
234    /* If there's no TES, skip changing anything. */
235    if (brw->tess_eval_program == NULL)
236       return;
237 
238    /* BRW_NEW_TES_PROG_DATA */
239    const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
240    brw_upload_binding_table(brw,
241                             _3DSTATE_BINDING_TABLE_POINTERS_DS,
242                             prog_data,
243                             &brw->tes.base);
244 }
245 
246 const struct brw_tracked_state brw_tes_binding_table = {
247    .dirty = {
248       .mesa = 0,
249       .brw = BRW_NEW_BATCH |
250              BRW_NEW_BLORP |
251              BRW_NEW_SURFACES |
252              BRW_NEW_TES_CONSTBUF |
253              BRW_NEW_TES_PROG_DATA,
254    },
255    .emit = brw_tes_upload_binding_table,
256 };
257 
258 /** Upload the GS binding table (if GS is active). */
259 static void
brw_gs_upload_binding_table(struct brw_context * brw)260 brw_gs_upload_binding_table(struct brw_context *brw)
261 {
262    /* If there's no GS, skip changing anything. */
263    if (brw->geometry_program == NULL)
264       return;
265 
266    /* BRW_NEW_GS_PROG_DATA */
267    const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
268    brw_upload_binding_table(brw,
269                             _3DSTATE_BINDING_TABLE_POINTERS_GS,
270                             prog_data,
271                             &brw->gs.base);
272 }
273 
274 const struct brw_tracked_state brw_gs_binding_table = {
275    .dirty = {
276       .mesa = 0,
277       .brw = BRW_NEW_BATCH |
278              BRW_NEW_BLORP |
279              BRW_NEW_GS_CONSTBUF |
280              BRW_NEW_GS_PROG_DATA |
281              BRW_NEW_SURFACES,
282    },
283    .emit = brw_gs_upload_binding_table,
284 };
285 
286 /**
287  * Edit a single entry in a hardware-generated binding table
288  */
289 void
gen7_edit_hw_binding_table_entry(struct brw_context * brw,gl_shader_stage stage,uint32_t index,uint32_t surf_offset)290 gen7_edit_hw_binding_table_entry(struct brw_context *brw,
291                                  gl_shader_stage stage,
292                                  uint32_t index,
293                                  uint32_t surf_offset)
294 {
295    assert(stage < ARRAY_SIZE(stage_to_bt_edit));
296    assert(stage_to_bt_edit[stage]);
297 
298    uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) |
299       (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) :
300        HSW_SURFACE_STATE_EDIT(surf_offset));
301 
302    BEGIN_BATCH(3);
303    OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2));
304    OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
305    OUT_BATCH(dw2);
306    ADVANCE_BATCH();
307 }
308 
309 /**
310  * Upload a whole hardware binding table for the given stage.
311  *
312  * Takes an array of surface offsets and the number of binding table
313  * entries.
314  */
315 void
gen7_update_binding_table_from_array(struct brw_context * brw,gl_shader_stage stage,const uint32_t * binding_table,int num_surfaces)316 gen7_update_binding_table_from_array(struct brw_context *brw,
317                                      gl_shader_stage stage,
318                                      const uint32_t* binding_table,
319                                      int num_surfaces)
320 {
321    uint32_t dw2 = 0;
322 
323    assert(stage < ARRAY_SIZE(stage_to_bt_edit));
324    assert(stage_to_bt_edit[stage]);
325 
326    BEGIN_BATCH(num_surfaces + 2);
327    OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces);
328    OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
329    for (int i = 0; i < num_surfaces; i++) {
330       dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) |
331          (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) :
332           HSW_SURFACE_STATE_EDIT(binding_table[i]));
333       OUT_BATCH(dw2);
334    }
335    ADVANCE_BATCH();
336 }
337 
338 /**
339  * Disable hardware binding table support, falling back to the
340  * older software-generated binding table mechanism.
341  */
342 void
gen7_disable_hw_binding_tables(struct brw_context * brw)343 gen7_disable_hw_binding_tables(struct brw_context *brw)
344 {
345    if (!brw->use_resource_streamer)
346       return;
347    /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
348     * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
349     *
350     * "When switching between HW and SW binding table generation, SW must
351     * issue a state cache invalidate."
352     */
353    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
354 
355    int pkt_len = brw->gen >= 8 ? 4 : 3;
356 
357    BEGIN_BATCH(pkt_len);
358    OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
359    if (brw->gen >= 8) {
360       OUT_BATCH(0);
361       OUT_BATCH(0);
362       OUT_BATCH(0);
363    } else {
364       OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
365       OUT_BATCH(0);
366    }
367    ADVANCE_BATCH();
368 }
369 
370 /**
371  * Enable hardware binding tables and set up the binding table pool.
372  */
373 void
gen7_enable_hw_binding_tables(struct brw_context * brw)374 gen7_enable_hw_binding_tables(struct brw_context *brw)
375 {
376    if (!brw->use_resource_streamer)
377       return;
378 
379    if (!brw->hw_bt_pool.bo) {
380       /* We use a single re-usable buffer object for the lifetime of the
381        * context and size it to maximum allowed binding tables that can be
382        * programmed per batch:
383        *
384        * From the Haswell PRM, Volume 7: 3D Media GPGPU,
385        * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
386        * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
387        */
388       static const int max_size = 16383 * 4;
389       brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
390                                               max_size, 64);
391       brw->hw_bt_pool.next_offset = 0;
392    }
393 
394    /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
395     * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
396     *
397     * "When switching between HW and SW binding table generation, SW must
398     * issue a state cache invalidate."
399     */
400    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
401 
402    int pkt_len = brw->gen >= 8 ? 4 : 3;
403    uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
404    if (brw->is_haswell) {
405       dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
406              HSW_BT_POOL_ALLOC_MUST_BE_ONE;
407    } else if (brw->gen >= 8) {
408       dw1 |= BDW_MOCS_WB;
409    }
410 
411    BEGIN_BATCH(pkt_len);
412    OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
413    if (brw->gen >= 8) {
414       OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
415       OUT_BATCH(brw->hw_bt_pool.bo->size);
416    } else {
417       OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
418       OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
419              brw->hw_bt_pool.bo->size);
420    }
421    ADVANCE_BATCH();
422 }
423 
424 void
gen7_reset_hw_bt_pool_offsets(struct brw_context * brw)425 gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
426 {
427    brw->hw_bt_pool.next_offset = 0;
428 }
429 
430 const struct brw_tracked_state gen7_hw_binding_tables = {
431    .dirty = {
432       .mesa = 0,
433       .brw = BRW_NEW_BATCH |
434              BRW_NEW_BLORP,
435    },
436    .emit = gen7_enable_hw_binding_tables
437 };
438 
439 /** @} */
440 
441 /**
442  * State atoms which emit 3DSTATE packets to update the binding table pointers.
443  *  @{
444  */
445 
446 /**
447  * (Gen4-5) Upload the binding table pointers for all shader stages.
448  *
449  * The binding table pointers are relative to the surface state base address,
450  * which points at the batchbuffer containing the streamed batch state.
451  */
452 static void
gen4_upload_binding_table_pointers(struct brw_context * brw)453 gen4_upload_binding_table_pointers(struct brw_context *brw)
454 {
455    BEGIN_BATCH(6);
456    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
457    OUT_BATCH(brw->vs.base.bind_bo_offset);
458    OUT_BATCH(0); /* gs */
459    OUT_BATCH(0); /* clip */
460    OUT_BATCH(0); /* sf */
461    OUT_BATCH(brw->wm.base.bind_bo_offset);
462    ADVANCE_BATCH();
463 }
464 
465 const struct brw_tracked_state brw_binding_table_pointers = {
466    .dirty = {
467       .mesa = 0,
468       .brw = BRW_NEW_BATCH |
469              BRW_NEW_BLORP |
470              BRW_NEW_BINDING_TABLE_POINTERS |
471              BRW_NEW_STATE_BASE_ADDRESS,
472    },
473    .emit = gen4_upload_binding_table_pointers,
474 };
475 
476 /**
477  * (Sandybridge Only) Upload the binding table pointers for all shader stages.
478  *
479  * The binding table pointers are relative to the surface state base address,
480  * which points at the batchbuffer containing the streamed batch state.
481  */
482 static void
gen6_upload_binding_table_pointers(struct brw_context * brw)483 gen6_upload_binding_table_pointers(struct brw_context *brw)
484 {
485    BEGIN_BATCH(4);
486    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
487              GEN6_BINDING_TABLE_MODIFY_VS |
488              GEN6_BINDING_TABLE_MODIFY_GS |
489              GEN6_BINDING_TABLE_MODIFY_PS |
490              (4 - 2));
491    OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
492    if (brw->ff_gs.prog_active)
493       OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
494    else
495       OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
496    OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
497    ADVANCE_BATCH();
498 }
499 
500 const struct brw_tracked_state gen6_binding_table_pointers = {
501    .dirty = {
502       .mesa = 0,
503       .brw = BRW_NEW_BATCH |
504              BRW_NEW_BLORP |
505              BRW_NEW_BINDING_TABLE_POINTERS |
506              BRW_NEW_STATE_BASE_ADDRESS,
507    },
508    .emit = gen6_upload_binding_table_pointers,
509 };
510 
511 /** @} */
512