• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 
26 #include "anv_private.h"
27 #include "anv_measure.h"
28 
29 /* These are defined in anv_private.h and blorp_genX_exec.h */
30 #undef __gen_address_type
31 #undef __gen_user_data
32 #undef __gen_combine_address
33 
34 #include "common/intel_l3_config.h"
35 #include "blorp/blorp_genX_exec.h"
36 
37 #include "ds/intel_tracepoints.h"
38 
blorp_measure_start(struct blorp_batch * _batch,const struct blorp_params * params)39 static void blorp_measure_start(struct blorp_batch *_batch,
40                                 const struct blorp_params *params)
41 {
42    struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
43    trace_intel_begin_blorp(&cmd_buffer->trace);
44    anv_measure_snapshot(cmd_buffer,
45                         params->snapshot_type,
46                         NULL, 0);
47 }
48 
blorp_measure_end(struct blorp_batch * _batch,const struct blorp_params * params)49 static void blorp_measure_end(struct blorp_batch *_batch,
50                               const struct blorp_params *params)
51 {
52    struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
53    trace_intel_end_blorp(&cmd_buffer->trace,
54                          params->x1 - params->x0,
55                          params->y1 - params->y0,
56                          params->hiz_op,
57                          params->fast_clear_op,
58                          params->shader_type,
59                          params->shader_pipeline);
60 }
61 
62 static void *
blorp_emit_dwords(struct blorp_batch * batch,unsigned n)63 blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
64 {
65    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
66    return anv_batch_emit_dwords(&cmd_buffer->batch, n);
67 }
68 
69 static uint64_t
blorp_emit_reloc(struct blorp_batch * batch,void * location,struct blorp_address address,uint32_t delta)70 blorp_emit_reloc(struct blorp_batch *batch,
71                  void *location, struct blorp_address address, uint32_t delta)
72 {
73    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
74    assert(cmd_buffer->batch.start <= location &&
75           location < cmd_buffer->batch.end);
76    return anv_batch_emit_reloc(&cmd_buffer->batch, location,
77                                address.buffer, address.offset + delta);
78 }
79 
80 static void
blorp_surface_reloc(struct blorp_batch * batch,uint32_t ss_offset,struct blorp_address address,uint32_t delta)81 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
82                     struct blorp_address address, uint32_t delta)
83 {
84    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
85    VkResult result;
86 
87    if (ANV_ALWAYS_SOFTPIN) {
88       result = anv_reloc_list_add_bo(&cmd_buffer->surface_relocs,
89                                      &cmd_buffer->vk.pool->alloc,
90                                      address.buffer);
91       if (unlikely(result != VK_SUCCESS))
92          anv_batch_set_error(&cmd_buffer->batch, result);
93       return;
94    }
95 
96    uint64_t address_u64 = 0;
97    result = anv_reloc_list_add(&cmd_buffer->surface_relocs,
98                                &cmd_buffer->vk.pool->alloc,
99                                ss_offset, address.buffer,
100                                address.offset + delta,
101                                &address_u64);
102    if (result != VK_SUCCESS)
103       anv_batch_set_error(&cmd_buffer->batch, result);
104 
105    void *dest = anv_block_pool_map(
106       &cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8);
107    write_reloc(cmd_buffer->device, dest, address_u64, false);
108 }
109 
110 static uint64_t
blorp_get_surface_address(struct blorp_batch * blorp_batch,struct blorp_address address)111 blorp_get_surface_address(struct blorp_batch *blorp_batch,
112                           struct blorp_address address)
113 {
114    if (ANV_ALWAYS_SOFTPIN) {
115       struct anv_address anv_addr = {
116          .bo = address.buffer,
117          .offset = address.offset,
118       };
119       return anv_address_physical(anv_addr);
120    } else {
121       /* We'll let blorp_surface_reloc write the address. */
122       return 0;
123    }
124 }
125 
126 #if GFX_VER >= 7 && GFX_VER < 10
127 static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch * batch)128 blorp_get_surface_base_address(struct blorp_batch *batch)
129 {
130    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
131    return (struct blorp_address) {
132       .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
133       .offset = 0,
134    };
135 }
136 #endif
137 
138 static void *
blorp_alloc_dynamic_state(struct blorp_batch * batch,uint32_t size,uint32_t alignment,uint32_t * offset)139 blorp_alloc_dynamic_state(struct blorp_batch *batch,
140                           uint32_t size,
141                           uint32_t alignment,
142                           uint32_t *offset)
143 {
144    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
145 
146    struct anv_state state =
147       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
148 
149    *offset = state.offset;
150    return state.map;
151 }
152 
153 UNUSED static void *
blorp_alloc_general_state(struct blorp_batch * batch,uint32_t size,uint32_t alignment,uint32_t * offset)154 blorp_alloc_general_state(struct blorp_batch *batch,
155                           uint32_t size,
156                           uint32_t alignment,
157                           uint32_t *offset)
158 {
159    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
160 
161    struct anv_state state =
162       anv_state_stream_alloc(&cmd_buffer->general_state_stream, size,
163                              alignment);
164 
165    *offset = state.offset;
166    return state.map;
167 }
168 
169 static void
blorp_alloc_binding_table(struct blorp_batch * batch,unsigned num_entries,unsigned state_size,unsigned state_alignment,uint32_t * bt_offset,uint32_t * surface_offsets,void ** surface_maps)170 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
171                           unsigned state_size, unsigned state_alignment,
172                           uint32_t *bt_offset,
173                           uint32_t *surface_offsets, void **surface_maps)
174 {
175    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
176 
177    uint32_t state_offset;
178    struct anv_state bt_state;
179 
180    VkResult result =
181       anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries,
182                                                &state_offset, &bt_state);
183    if (result != VK_SUCCESS)
184       return;
185 
186    uint32_t *bt_map = bt_state.map;
187    *bt_offset = bt_state.offset;
188 
189    for (unsigned i = 0; i < num_entries; i++) {
190       struct anv_state surface_state =
191          anv_cmd_buffer_alloc_surface_state(cmd_buffer);
192       bt_map[i] = surface_state.offset + state_offset;
193       surface_offsets[i] = surface_state.offset;
194       surface_maps[i] = surface_state.map;
195    }
196 }
197 
198 static uint32_t
blorp_binding_table_offset_to_pointer(struct blorp_batch * batch,uint32_t offset)199 blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
200                                       uint32_t offset)
201 {
202    return offset;
203 }
204 
205 static void *
blorp_alloc_vertex_buffer(struct blorp_batch * batch,uint32_t size,struct blorp_address * addr)206 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
207                           struct blorp_address *addr)
208 {
209    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
210    struct anv_state vb_state =
211       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
212 
213    *addr = (struct blorp_address) {
214       .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
215       .offset = vb_state.offset,
216       .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
217                        ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false),
218    };
219 
220    return vb_state.map;
221 }
222 
223 static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch * batch,const struct blorp_address * addrs,uint32_t * sizes,unsigned num_vbs)224 blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
225                                            const struct blorp_address *addrs,
226                                            uint32_t *sizes,
227                                            unsigned num_vbs)
228 {
229    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
230 
231    for (unsigned i = 0; i < num_vbs; i++) {
232       struct anv_address anv_addr = {
233          .bo = addrs[i].buffer,
234          .offset = addrs[i].offset,
235       };
236       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
237                                                      i, anv_addr, sizes[i]);
238    }
239 
240    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
241 
242    /* Technically, we should call this *after* 3DPRIMITIVE but it doesn't
243     * really matter for blorp because we never call apply_pipe_flushes after
244     * this point.
245     */
246    genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
247                                                        (1 << num_vbs) - 1);
248 }
249 
250 UNUSED static struct blorp_address
blorp_get_workaround_address(struct blorp_batch * batch)251 blorp_get_workaround_address(struct blorp_batch *batch)
252 {
253    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
254 
255    return (struct blorp_address) {
256       .buffer = cmd_buffer->device->workaround_address.bo,
257       .offset = cmd_buffer->device->workaround_address.offset,
258    };
259 }
260 
261 static void
blorp_flush_range(struct blorp_batch * batch,void * start,size_t size)262 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
263 {
264    /* We don't need to flush states anymore, since everything will be snooped.
265     */
266 }
267 
268 static const struct intel_l3_config *
blorp_get_l3_config(struct blorp_batch * batch)269 blorp_get_l3_config(struct blorp_batch *batch)
270 {
271    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
272    return cmd_buffer->state.current_l3_config;
273 }
274 
275 static void
blorp_exec_on_render(struct blorp_batch * batch,const struct blorp_params * params)276 blorp_exec_on_render(struct blorp_batch *batch,
277                      const struct blorp_params *params)
278 {
279    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
280 
281    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
282    assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
283 
284    const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
285    genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
286                                       params->y1 - params->y0, scale);
287 
288 #if GFX_VER >= 11
289    /* The PIPE_CONTROL command description says:
290     *
291     *    "Whenever a Binding Table Index (BTI) used by a Render Target Message
292     *     points to a different RENDER_SURFACE_STATE, SW must issue a Render
293     *     Target Cache Flush by enabling this bit. When render target flush
294     *     is set due to new association of BTI, PS Scoreboard Stall bit must
295     *     be set in this packet."
296     */
297    anv_add_pending_pipe_bits(cmd_buffer,
298                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
299                              ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
300                              "before blorp BTI change");
301 #endif
302 
303    if (params->depth.enabled &&
304        !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
305       genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, &params->depth.surf);
306 
307    genX(flush_pipeline_select_3d)(cmd_buffer);
308 
309    /* Apply any outstanding flushes in case pipeline select haven't. */
310    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
311 
312    genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer);
313 
314    /* BLORP doesn't do anything fancy with depth such as discards, so we want
315     * the PMA fix off.  Also, off is always the safe option.
316     */
317    genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
318 
319    blorp_exec(batch, params);
320 
321 #if GFX_VER >= 11
322    /* The PIPE_CONTROL command description says:
323     *
324     *    "Whenever a Binding Table Index (BTI) used by a Render Target Message
325     *     points to a different RENDER_SURFACE_STATE, SW must issue a Render
326     *     Target Cache Flush by enabling this bit. When render target flush
327     *     is set due to new association of BTI, PS Scoreboard Stall bit must
328     *     be set in this packet."
329     */
330    anv_add_pending_pipe_bits(cmd_buffer,
331                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
332                              ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
333                              "after blorp BTI change");
334 #endif
335 
336    /* Calculate state that does not get touched by blorp.
337     * Flush everything else.
338     */
339    anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
340                                   ANV_CMD_DIRTY_XFB_ENABLE);
341 
342    BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
343    BITSET_ONES(dyn_dirty);
344    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
345    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
346    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
347    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
348    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
349    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
350    if (!params->wm_prog_data) {
351       BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
352       BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
353    }
354 
355    cmd_buffer->state.gfx.vb_dirty = ~0;
356    cmd_buffer->state.gfx.dirty |= dirty;
357    BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
358              cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
359    cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
360 }
361 
362 static void
blorp_exec_on_compute(struct blorp_batch * batch,const struct blorp_params * params)363 blorp_exec_on_compute(struct blorp_batch *batch,
364                       const struct blorp_params *params)
365 {
366    assert(batch->flags & BLORP_BATCH_USE_COMPUTE);
367 
368    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
369    assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
370 
371    genX(flush_pipeline_select_gpgpu)(cmd_buffer);
372 
373    /* Apply any outstanding flushes in case pipeline select haven't. */
374    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
375 
376    blorp_exec(batch, params);
377 
378    cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
379 }
380 
381 void
genX(blorp_exec)382 genX(blorp_exec)(struct blorp_batch *batch,
383                  const struct blorp_params *params)
384 {
385    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
386 
387    if (!cmd_buffer->state.current_l3_config) {
388       const struct intel_l3_config *cfg =
389          intel_get_default_l3_config(&cmd_buffer->device->info);
390       genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
391    }
392 
393 #if GFX_VER == 7
394    /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
395     * indirect fast-clear colors can cause GPU hangs if we don't stall first.
396     * See genX(cmd_buffer_mi_memcpy) for more details.
397     */
398    if (params->src.clear_color_addr.buffer ||
399        params->dst.clear_color_addr.buffer) {
400       anv_add_pending_pipe_bits(cmd_buffer,
401                                 ANV_PIPE_CS_STALL_BIT,
402                                 "before blorp prep fast clear");
403    }
404 #endif
405 
406    if (batch->flags & BLORP_BATCH_USE_COMPUTE)
407       blorp_exec_on_compute(batch, params);
408    else
409       blorp_exec_on_render(batch, params);
410 }
411