• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 
26 #include "anv_private.h"
27 #include "anv_measure.h"
28 
29 /* These are defined in anv_private.h and blorp_genX_exec_elk.h */
30 #undef __gen_address_type
31 #undef __gen_user_data
32 #undef __gen_combine_address
33 
34 #include "common/intel_l3_config.h"
35 #include "blorp/blorp_genX_exec_elk.h"
36 
37 #include "ds/intel_tracepoints.h"
38 
blorp_measure_start(struct blorp_batch * _batch,const struct blorp_params * params)39 static void blorp_measure_start(struct blorp_batch *_batch,
40                                 const struct blorp_params *params)
41 {
42    struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
43    trace_intel_begin_blorp(&cmd_buffer->trace);
44    anv_measure_snapshot(cmd_buffer,
45                         blorp_op_to_intel_measure_snapshot(params->op),
46                         NULL, 0);
47 }
48 
blorp_measure_end(struct blorp_batch * _batch,const struct blorp_params * params)49 static void blorp_measure_end(struct blorp_batch *_batch,
50                               const struct blorp_params *params)
51 {
52    struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
53    trace_intel_end_blorp(&cmd_buffer->trace,
54                          params->op,
55                          params->x1 - params->x0,
56                          params->y1 - params->y0,
57                          params->num_samples,
58                          params->shader_pipeline,
59                          params->dst.view.format,
60                          params->src.view.format,
61                          (_batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
62 }
63 
64 static void *
blorp_emit_dwords(struct blorp_batch * batch,unsigned n)65 blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
66 {
67    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
68    return anv_batch_emit_dwords(&cmd_buffer->batch, n);
69 }
70 
71 static uint64_t
blorp_emit_reloc(struct blorp_batch * batch,void * location,struct blorp_address address,uint32_t delta)72 blorp_emit_reloc(struct blorp_batch *batch,
73                  void *location, struct blorp_address address, uint32_t delta)
74 {
75    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
76    assert(cmd_buffer->batch.start <= location &&
77           location < cmd_buffer->batch.end);
78    return anv_batch_emit_reloc(&cmd_buffer->batch, location,
79                                address.buffer, address.offset + delta);
80 }
81 
82 static void
blorp_surface_reloc(struct blorp_batch * batch,uint32_t ss_offset,struct blorp_address address,uint32_t delta)83 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
84                     struct blorp_address address, uint32_t delta)
85 {
86    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
87    VkResult result;
88 
89    uint64_t address_u64 = 0;
90    result = anv_reloc_list_add(&cmd_buffer->surface_relocs,
91                                &cmd_buffer->vk.pool->alloc,
92                                ss_offset, address.buffer,
93                                address.offset + delta,
94                                &address_u64);
95    if (result != VK_SUCCESS)
96       anv_batch_set_error(&cmd_buffer->batch, result);
97 
98    void *dest = anv_block_pool_map(
99       &cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8);
100    write_reloc(cmd_buffer->device, dest, address_u64, false);
101 }
102 
103 static uint64_t
blorp_get_surface_address(struct blorp_batch * blorp_batch,struct blorp_address address)104 blorp_get_surface_address(struct blorp_batch *blorp_batch,
105                           struct blorp_address address)
106 {
107    /* We'll let blorp_surface_reloc write the address. */
108    return 0;
109 }
110 
111 static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch * batch)112 blorp_get_surface_base_address(struct blorp_batch *batch)
113 {
114    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
115    return (struct blorp_address) {
116       .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
117       .offset = 0,
118    };
119 }
120 
121 static void *
blorp_alloc_dynamic_state(struct blorp_batch * batch,uint32_t size,uint32_t alignment,uint32_t * offset)122 blorp_alloc_dynamic_state(struct blorp_batch *batch,
123                           uint32_t size,
124                           uint32_t alignment,
125                           uint32_t *offset)
126 {
127    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
128 
129    struct anv_state state =
130       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
131 
132    *offset = state.offset;
133    return state.map;
134 }
135 
136 UNUSED static void *
blorp_alloc_general_state(struct blorp_batch * batch,uint32_t size,uint32_t alignment,uint32_t * offset)137 blorp_alloc_general_state(struct blorp_batch *batch,
138                           uint32_t size,
139                           uint32_t alignment,
140                           uint32_t *offset)
141 {
142    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
143 
144    struct anv_state state =
145       anv_state_stream_alloc(&cmd_buffer->general_state_stream, size,
146                              alignment);
147 
148    *offset = state.offset;
149    return state.map;
150 }
151 
152 static bool
blorp_alloc_binding_table(struct blorp_batch * batch,unsigned num_entries,unsigned state_size,unsigned state_alignment,uint32_t * bt_offset,uint32_t * surface_offsets,void ** surface_maps)153 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
154                           unsigned state_size, unsigned state_alignment,
155                           uint32_t *bt_offset,
156                           uint32_t *surface_offsets, void **surface_maps)
157 {
158    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
159 
160    uint32_t state_offset;
161    struct anv_state bt_state;
162 
163    VkResult result =
164       anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries,
165                                                &state_offset, &bt_state);
166    if (result != VK_SUCCESS)
167       return false;
168 
169    uint32_t *bt_map = bt_state.map;
170    *bt_offset = bt_state.offset;
171 
172    for (unsigned i = 0; i < num_entries; i++) {
173       struct anv_state surface_state =
174          anv_cmd_buffer_alloc_surface_state(cmd_buffer);
175       bt_map[i] = surface_state.offset + state_offset;
176       surface_offsets[i] = surface_state.offset;
177       surface_maps[i] = surface_state.map;
178    }
179 
180    return true;
181 }
182 
183 static uint32_t
blorp_binding_table_offset_to_pointer(struct blorp_batch * batch,uint32_t offset)184 blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
185                                       uint32_t offset)
186 {
187    return offset;
188 }
189 
190 static void *
blorp_alloc_vertex_buffer(struct blorp_batch * batch,uint32_t size,struct blorp_address * addr)191 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
192                           struct blorp_address *addr)
193 {
194    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
195    struct anv_state vb_state =
196       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
197 
198    *addr = (struct blorp_address) {
199       .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
200       .offset = vb_state.offset,
201       .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
202                        ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false),
203    };
204 
205    return vb_state.map;
206 }
207 
208 static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch * batch,const struct blorp_address * addrs,uint32_t * sizes,unsigned num_vbs)209 blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
210                                            const struct blorp_address *addrs,
211                                            uint32_t *sizes,
212                                            unsigned num_vbs)
213 {
214    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
215 
216    for (unsigned i = 0; i < num_vbs; i++) {
217       struct anv_address anv_addr = {
218          .bo = addrs[i].buffer,
219          .offset = addrs[i].offset,
220       };
221       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
222                                                      i, anv_addr, sizes[i]);
223    }
224 
225    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
226 
227    /* Technically, we should call this *after* 3DPRIMITIVE but it doesn't
228     * really matter for blorp because we never call apply_pipe_flushes after
229     * this point.
230     */
231    genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
232                                                        (1 << num_vbs) - 1);
233 }
234 
235 UNUSED static struct blorp_address
blorp_get_workaround_address(struct blorp_batch * batch)236 blorp_get_workaround_address(struct blorp_batch *batch)
237 {
238    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
239 
240    return (struct blorp_address) {
241       .buffer = cmd_buffer->device->workaround_address.bo,
242       .offset = cmd_buffer->device->workaround_address.offset,
243    };
244 }
245 
246 static void
blorp_flush_range(struct blorp_batch * batch,void * start,size_t size)247 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
248 {
249    /* We don't need to flush states anymore, since everything will be snooped.
250     */
251 }
252 
253 static void
blorp_pre_emit_urb_config(struct blorp_batch * blorp_batch,struct intel_urb_config * urb_cfg)254 blorp_pre_emit_urb_config(struct blorp_batch *blorp_batch,
255                           struct intel_urb_config *urb_cfg)
256 {
257    /* Dummy. */
258 }
259 
260 static const struct intel_l3_config *
blorp_get_l3_config(struct blorp_batch * batch)261 blorp_get_l3_config(struct blorp_batch *batch)
262 {
263    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
264    return cmd_buffer->state.current_l3_config;
265 }
266 
267 static void
blorp_exec_on_render(struct blorp_batch * batch,const struct blorp_params * params)268 blorp_exec_on_render(struct blorp_batch *batch,
269                      const struct blorp_params *params)
270 {
271    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
272 
273    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
274    assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
275 
276    genX(flush_pipeline_select_3d)(cmd_buffer);
277 
278    /* Apply any outstanding flushes in case pipeline select haven't. */
279    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
280 
281    genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer);
282 
283    /* BLORP doesn't do anything fancy with depth such as discards, so we want
284     * the PMA fix off.  Also, off is always the safe option.
285     */
286    genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
287 
288    blorp_exec(batch, params);
289 
290    /* Calculate state that does not get touched by blorp.
291     * Flush everything else.
292     */
293    anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
294                                   ANV_CMD_DIRTY_XFB_ENABLE);
295 
296    BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
297    BITSET_ONES(dyn_dirty);
298    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
299    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
300    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
301    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
302    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
303    BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
304    if (!params->wm_prog_data) {
305       BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
306       BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
307    }
308 
309    cmd_buffer->state.gfx.vb_dirty = ~0;
310    cmd_buffer->state.gfx.dirty |= dirty;
311    BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
312              cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
313    cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
314 }
315 
316 static void
blorp_exec_on_compute(struct blorp_batch * batch,const struct blorp_params * params)317 blorp_exec_on_compute(struct blorp_batch *batch,
318                       const struct blorp_params *params)
319 {
320    assert(batch->flags & BLORP_BATCH_USE_COMPUTE);
321 
322    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
323    assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
324 
325    genX(flush_pipeline_select_gpgpu)(cmd_buffer);
326 
327    /* Apply any outstanding flushes in case pipeline select haven't. */
328    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
329 
330    blorp_exec(batch, params);
331 
332    cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
333 }
334 
335 void
genX(blorp_exec)336 genX(blorp_exec)(struct blorp_batch *batch,
337                  const struct blorp_params *params)
338 {
339    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
340 
341    if (!cmd_buffer->state.current_l3_config) {
342       const struct intel_l3_config *cfg =
343          intel_get_default_l3_config(cmd_buffer->device->info);
344       genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
345    }
346 
347 #if GFX_VER == 7
348    /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
349     * indirect fast-clear colors can cause GPU hangs if we don't stall first.
350     * See genX(cmd_buffer_mi_memcpy) for more details.
351     */
352    if (params->src.clear_color_addr.buffer ||
353        params->dst.clear_color_addr.buffer) {
354       anv_add_pending_pipe_bits(cmd_buffer,
355                                 ANV_PIPE_CS_STALL_BIT,
356                                 "before blorp prep fast clear");
357    }
358 #endif
359 
360    if (batch->flags & BLORP_BATCH_USE_COMPUTE)
361       blorp_exec_on_compute(batch, params);
362    else
363       blorp_exec_on_render(batch, params);
364 }
365 
366 static void
blorp_emit_pre_draw(struct blorp_batch * batch,const struct blorp_params * params)367 blorp_emit_pre_draw(struct blorp_batch *batch, const struct blorp_params *params)
368 {
369    /* "Not implemented" */
370 }
371 
372 static void
blorp_emit_post_draw(struct blorp_batch * batch,const struct blorp_params * params)373 blorp_emit_post_draw(struct blorp_batch *batch, const struct blorp_params *params)
374 {
375    /* "Not implemented" */
376 }
377