1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "anv_private.h"
27 #include "anv_measure.h"
28
29 /* These are defined in anv_private.h and blorp_genX_exec.h */
30 #undef __gen_address_type
31 #undef __gen_user_data
32 #undef __gen_combine_address
33
34 #include "common/intel_l3_config.h"
35 #include "blorp/blorp_genX_exec.h"
36
37 #include "ds/intel_tracepoints.h"
38
blorp_measure_start(struct blorp_batch * _batch,const struct blorp_params * params)39 static void blorp_measure_start(struct blorp_batch *_batch,
40 const struct blorp_params *params)
41 {
42 struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
43 trace_intel_begin_blorp(&cmd_buffer->trace);
44 anv_measure_snapshot(cmd_buffer,
45 params->snapshot_type,
46 NULL, 0);
47 }
48
blorp_measure_end(struct blorp_batch * _batch,const struct blorp_params * params)49 static void blorp_measure_end(struct blorp_batch *_batch,
50 const struct blorp_params *params)
51 {
52 struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
53 trace_intel_end_blorp(&cmd_buffer->trace,
54 params->x1 - params->x0,
55 params->y1 - params->y0,
56 params->hiz_op,
57 params->fast_clear_op,
58 params->shader_type,
59 params->shader_pipeline);
60 }
61
62 static void *
blorp_emit_dwords(struct blorp_batch * batch,unsigned n)63 blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
64 {
65 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
66 return anv_batch_emit_dwords(&cmd_buffer->batch, n);
67 }
68
69 static uint64_t
blorp_emit_reloc(struct blorp_batch * batch,void * location,struct blorp_address address,uint32_t delta)70 blorp_emit_reloc(struct blorp_batch *batch,
71 void *location, struct blorp_address address, uint32_t delta)
72 {
73 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
74 assert(cmd_buffer->batch.start <= location &&
75 location < cmd_buffer->batch.end);
76 return anv_batch_emit_reloc(&cmd_buffer->batch, location,
77 address.buffer, address.offset + delta);
78 }
79
80 static void
blorp_surface_reloc(struct blorp_batch * batch,uint32_t ss_offset,struct blorp_address address,uint32_t delta)81 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
82 struct blorp_address address, uint32_t delta)
83 {
84 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
85 VkResult result;
86
87 if (ANV_ALWAYS_SOFTPIN) {
88 result = anv_reloc_list_add_bo(&cmd_buffer->surface_relocs,
89 &cmd_buffer->vk.pool->alloc,
90 address.buffer);
91 if (unlikely(result != VK_SUCCESS))
92 anv_batch_set_error(&cmd_buffer->batch, result);
93 return;
94 }
95
96 uint64_t address_u64 = 0;
97 result = anv_reloc_list_add(&cmd_buffer->surface_relocs,
98 &cmd_buffer->vk.pool->alloc,
99 ss_offset, address.buffer,
100 address.offset + delta,
101 &address_u64);
102 if (result != VK_SUCCESS)
103 anv_batch_set_error(&cmd_buffer->batch, result);
104
105 void *dest = anv_block_pool_map(
106 &cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8);
107 write_reloc(cmd_buffer->device, dest, address_u64, false);
108 }
109
110 static uint64_t
blorp_get_surface_address(struct blorp_batch * blorp_batch,struct blorp_address address)111 blorp_get_surface_address(struct blorp_batch *blorp_batch,
112 struct blorp_address address)
113 {
114 if (ANV_ALWAYS_SOFTPIN) {
115 struct anv_address anv_addr = {
116 .bo = address.buffer,
117 .offset = address.offset,
118 };
119 return anv_address_physical(anv_addr);
120 } else {
121 /* We'll let blorp_surface_reloc write the address. */
122 return 0;
123 }
124 }
125
126 #if GFX_VER >= 7 && GFX_VER < 10
127 static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch * batch)128 blorp_get_surface_base_address(struct blorp_batch *batch)
129 {
130 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
131 return (struct blorp_address) {
132 .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
133 .offset = 0,
134 };
135 }
136 #endif
137
138 static void *
blorp_alloc_dynamic_state(struct blorp_batch * batch,uint32_t size,uint32_t alignment,uint32_t * offset)139 blorp_alloc_dynamic_state(struct blorp_batch *batch,
140 uint32_t size,
141 uint32_t alignment,
142 uint32_t *offset)
143 {
144 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
145
146 struct anv_state state =
147 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
148
149 *offset = state.offset;
150 return state.map;
151 }
152
153 UNUSED static void *
blorp_alloc_general_state(struct blorp_batch * batch,uint32_t size,uint32_t alignment,uint32_t * offset)154 blorp_alloc_general_state(struct blorp_batch *batch,
155 uint32_t size,
156 uint32_t alignment,
157 uint32_t *offset)
158 {
159 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
160
161 struct anv_state state =
162 anv_state_stream_alloc(&cmd_buffer->general_state_stream, size,
163 alignment);
164
165 *offset = state.offset;
166 return state.map;
167 }
168
169 static void
blorp_alloc_binding_table(struct blorp_batch * batch,unsigned num_entries,unsigned state_size,unsigned state_alignment,uint32_t * bt_offset,uint32_t * surface_offsets,void ** surface_maps)170 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
171 unsigned state_size, unsigned state_alignment,
172 uint32_t *bt_offset,
173 uint32_t *surface_offsets, void **surface_maps)
174 {
175 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
176
177 uint32_t state_offset;
178 struct anv_state bt_state;
179
180 VkResult result =
181 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, num_entries,
182 &state_offset, &bt_state);
183 if (result != VK_SUCCESS)
184 return;
185
186 uint32_t *bt_map = bt_state.map;
187 *bt_offset = bt_state.offset;
188
189 for (unsigned i = 0; i < num_entries; i++) {
190 struct anv_state surface_state =
191 anv_cmd_buffer_alloc_surface_state(cmd_buffer);
192 bt_map[i] = surface_state.offset + state_offset;
193 surface_offsets[i] = surface_state.offset;
194 surface_maps[i] = surface_state.map;
195 }
196 }
197
198 static uint32_t
blorp_binding_table_offset_to_pointer(struct blorp_batch * batch,uint32_t offset)199 blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
200 uint32_t offset)
201 {
202 return offset;
203 }
204
205 static void *
blorp_alloc_vertex_buffer(struct blorp_batch * batch,uint32_t size,struct blorp_address * addr)206 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
207 struct blorp_address *addr)
208 {
209 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
210 struct anv_state vb_state =
211 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
212
213 *addr = (struct blorp_address) {
214 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
215 .offset = vb_state.offset,
216 .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
217 ISL_SURF_USAGE_VERTEX_BUFFER_BIT, false),
218 };
219
220 return vb_state.map;
221 }
222
223 static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch * batch,const struct blorp_address * addrs,uint32_t * sizes,unsigned num_vbs)224 blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
225 const struct blorp_address *addrs,
226 uint32_t *sizes,
227 unsigned num_vbs)
228 {
229 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
230
231 for (unsigned i = 0; i < num_vbs; i++) {
232 struct anv_address anv_addr = {
233 .bo = addrs[i].buffer,
234 .offset = addrs[i].offset,
235 };
236 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer,
237 i, anv_addr, sizes[i]);
238 }
239
240 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
241
242 /* Technically, we should call this *after* 3DPRIMITIVE but it doesn't
243 * really matter for blorp because we never call apply_pipe_flushes after
244 * this point.
245 */
246 genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(cmd_buffer, SEQUENTIAL,
247 (1 << num_vbs) - 1);
248 }
249
250 UNUSED static struct blorp_address
blorp_get_workaround_address(struct blorp_batch * batch)251 blorp_get_workaround_address(struct blorp_batch *batch)
252 {
253 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
254
255 return (struct blorp_address) {
256 .buffer = cmd_buffer->device->workaround_address.bo,
257 .offset = cmd_buffer->device->workaround_address.offset,
258 };
259 }
260
261 static void
blorp_flush_range(struct blorp_batch * batch,void * start,size_t size)262 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
263 {
264 /* We don't need to flush states anymore, since everything will be snooped.
265 */
266 }
267
268 static const struct intel_l3_config *
blorp_get_l3_config(struct blorp_batch * batch)269 blorp_get_l3_config(struct blorp_batch *batch)
270 {
271 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
272 return cmd_buffer->state.current_l3_config;
273 }
274
275 static void
blorp_exec_on_render(struct blorp_batch * batch,const struct blorp_params * params)276 blorp_exec_on_render(struct blorp_batch *batch,
277 const struct blorp_params *params)
278 {
279 assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
280
281 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
282 assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT);
283
284 const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
285 genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, params->x1 - params->x0,
286 params->y1 - params->y0, scale);
287
288 #if GFX_VER >= 11
289 /* The PIPE_CONTROL command description says:
290 *
291 * "Whenever a Binding Table Index (BTI) used by a Render Target Message
292 * points to a different RENDER_SURFACE_STATE, SW must issue a Render
293 * Target Cache Flush by enabling this bit. When render target flush
294 * is set due to new association of BTI, PS Scoreboard Stall bit must
295 * be set in this packet."
296 */
297 anv_add_pending_pipe_bits(cmd_buffer,
298 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
299 ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
300 "before blorp BTI change");
301 #endif
302
303 if (params->depth.enabled &&
304 !(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
305 genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, ¶ms->depth.surf);
306
307 genX(flush_pipeline_select_3d)(cmd_buffer);
308
309 /* Apply any outstanding flushes in case pipeline select haven't. */
310 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
311
312 genX(cmd_buffer_emit_gfx7_depth_flush)(cmd_buffer);
313
314 /* BLORP doesn't do anything fancy with depth such as discards, so we want
315 * the PMA fix off. Also, off is always the safe option.
316 */
317 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
318
319 blorp_exec(batch, params);
320
321 #if GFX_VER >= 11
322 /* The PIPE_CONTROL command description says:
323 *
324 * "Whenever a Binding Table Index (BTI) used by a Render Target Message
325 * points to a different RENDER_SURFACE_STATE, SW must issue a Render
326 * Target Cache Flush by enabling this bit. When render target flush
327 * is set due to new association of BTI, PS Scoreboard Stall bit must
328 * be set in this packet."
329 */
330 anv_add_pending_pipe_bits(cmd_buffer,
331 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
332 ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
333 "after blorp BTI change");
334 #endif
335
336 /* Calculate state that does not get touched by blorp.
337 * Flush everything else.
338 */
339 anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER |
340 ANV_CMD_DIRTY_XFB_ENABLE);
341
342 BITSET_DECLARE(dyn_dirty, MESA_VK_DYNAMIC_GRAPHICS_STATE_ENUM_MAX);
343 BITSET_ONES(dyn_dirty);
344 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
345 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT);
346 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_VP_SCISSORS);
347 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE);
348 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_FSR);
349 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS);
350 if (!params->wm_prog_data) {
351 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
352 BITSET_CLEAR(dyn_dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP);
353 }
354
355 cmd_buffer->state.gfx.vb_dirty = ~0;
356 cmd_buffer->state.gfx.dirty |= dirty;
357 BITSET_OR(cmd_buffer->vk.dynamic_graphics_state.dirty,
358 cmd_buffer->vk.dynamic_graphics_state.dirty, dyn_dirty);
359 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
360 }
361
362 static void
blorp_exec_on_compute(struct blorp_batch * batch,const struct blorp_params * params)363 blorp_exec_on_compute(struct blorp_batch *batch,
364 const struct blorp_params *params)
365 {
366 assert(batch->flags & BLORP_BATCH_USE_COMPUTE);
367
368 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
369 assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
370
371 genX(flush_pipeline_select_gpgpu)(cmd_buffer);
372
373 /* Apply any outstanding flushes in case pipeline select haven't. */
374 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
375
376 blorp_exec(batch, params);
377
378 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
379 }
380
381 void
genX(blorp_exec)382 genX(blorp_exec)(struct blorp_batch *batch,
383 const struct blorp_params *params)
384 {
385 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
386
387 if (!cmd_buffer->state.current_l3_config) {
388 const struct intel_l3_config *cfg =
389 intel_get_default_l3_config(&cmd_buffer->device->info);
390 genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
391 }
392
393 #if GFX_VER == 7
394 /* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
395 * indirect fast-clear colors can cause GPU hangs if we don't stall first.
396 * See genX(cmd_buffer_mi_memcpy) for more details.
397 */
398 if (params->src.clear_color_addr.buffer ||
399 params->dst.clear_color_addr.buffer) {
400 anv_add_pending_pipe_bits(cmd_buffer,
401 ANV_PIPE_CS_STALL_BIT,
402 "before blorp prep fast clear");
403 }
404 #endif
405
406 if (batch->flags & BLORP_BATCH_USE_COMPUTE)
407 blorp_exec_on_compute(batch, params);
408 else
409 blorp_exec_on_render(batch, params);
410 }
411