1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * NOTE: The header can be included multiple times, from the same file.
26 */
27
28 /*
29 * Gen-specific function declarations. This header must *not* be included
30 * directly. Instead, it is included multiple times by anv_private.h.
31 *
32 * In this header file, the usual genx() macro is available.
33 */
34
35 #ifndef ANV_PRIVATE_H
36 #error This file is included by means other than anv_private.h
37 #endif
38
39 struct intel_sample_positions;
40 struct intel_urb_config;
41 struct anv_async_submit;
42 struct anv_embedded_sampler;
43 struct anv_pipeline_embedded_sampler_binding;
44 struct anv_trtt_bind;
45
46 typedef struct nir_builder nir_builder;
47 typedef struct nir_shader nir_shader;
48
49 void genX(init_physical_device_state)(struct anv_physical_device *device);
50
51 VkResult genX(init_device_state)(struct anv_device *device);
52
53 void genX(init_cps_device_state)(struct anv_device *device);
54
55 const uint32_t *genX(libanv_spv)(uint32_t *out_size);
56
57 uint32_t genX(call_internal_shader)(nir_builder *b,
58 enum anv_internal_kernel_name shader_name);
59
60 void
61 genX(set_fast_clear_state)(struct anv_cmd_buffer *cmd_buffer,
62 const struct anv_image *image,
63 const enum isl_format format,
64 const struct isl_swizzle swizzle,
65 union isl_color_value clear_color);
66
67 void
68 genX(cmd_buffer_load_clear_color)(struct anv_cmd_buffer *cmd_buffer,
69 struct anv_state surface_state,
70 const struct anv_image_view *iview);
71
72 void genX(cmd_buffer_emit_bt_pool_base_address)(struct anv_cmd_buffer *cmd_buffer);
73
74 void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
75
76 void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
77
78 void
79 genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
80 enum isl_aux_op aux_op);
81
82 void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
83 const struct isl_surf *surf);
84
85 void genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
86 int vb_index,
87 struct anv_address vb_address,
88 uint32_t vb_size);
89 void genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
90 uint32_t access_type,
91 uint64_t vb_used);
92
93 void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
94 unsigned width, unsigned height,
95 unsigned scale);
96
97 void genX(urb_workaround)(struct anv_cmd_buffer *cmd_buffer,
98 const struct intel_urb_config *urb_cfg);
99
100 void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
101 void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
102 void genX(emit_pipeline_select)(struct anv_batch *batch, uint32_t pipeline,
103 const struct anv_device *device);
104
105 void genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer);
106
107 void genX(batch_emit_vertex_input)(struct anv_batch *batch,
108 struct anv_device *device,
109 struct anv_graphics_pipeline *pipeline,
110 const struct vk_vertex_input_state *vi);
111
112 enum anv_pipe_bits
113 genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
114 struct anv_device *device,
115 uint32_t current_pipeline,
116 enum anv_pipe_bits bits,
117 enum anv_pipe_bits *emitted_flush_bits);
118 void
119 genX(invalidate_aux_map)(struct anv_batch *batch,
120 struct anv_device *device,
121 enum intel_engine_class engine_class,
122 enum anv_pipe_bits bits);
123
124 #if INTEL_WA_14018283232_GFX_VER
125 void genX(batch_emit_wa_14018283232)(struct anv_batch *batch);
126
127 static inline void
genX(cmd_buffer_ensure_wa_14018283232)128 genX(cmd_buffer_ensure_wa_14018283232)(struct anv_cmd_buffer *cmd_buffer,
129 bool toggle)
130 {
131 struct anv_gfx_dynamic_state *hw_state =
132 &cmd_buffer->state.gfx.dyn_state;
133 if (intel_needs_workaround(cmd_buffer->device->info, 14018283232) &&
134 hw_state->wa_14018283232_toggle != toggle) {
135 hw_state->wa_14018283232_toggle = toggle;
136 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WA_14018283232);
137 genX(batch_emit_wa_14018283232)(&cmd_buffer->batch);
138 }
139 }
140 #endif
141
142 static inline bool
genX(cmd_buffer_set_coarse_pixel_active)143 genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer,
144 enum anv_coarse_pixel_state state)
145 {
146 #if INTEL_WA_18038825448_GFX_VER
147 struct anv_cmd_graphics_state *gfx =
148 &cmd_buffer->state.gfx;
149 if (intel_needs_workaround(cmd_buffer->device->info, 18038825448) &&
150 gfx->dyn_state.coarse_state != state) {
151 gfx->dyn_state.coarse_state = state;
152 BITSET_SET(gfx->dyn_state.dirty, ANV_GFX_STATE_COARSE_STATE);
153 return true;
154 }
155 return false;
156 #else
157 return false;
158 #endif
159 }
160
161 void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
162 struct anv_device *device,
163 struct anv_cmd_buffer *cmd_buffer,
164 struct anv_batch *batch);
165
166 void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state);
167
168 void genX(emit_so_memcpy_end)(struct anv_memcpy_state *state);
169
170 void genX(emit_so_memcpy)(struct anv_memcpy_state *state,
171 struct anv_address dst, struct anv_address src,
172 uint32_t size);
173
174 void genX(emit_l3_config)(struct anv_batch *batch,
175 const struct anv_device *device,
176 const struct intel_l3_config *cfg);
177
178 void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
179 const struct intel_l3_config *cfg);
180
181 void genX(flush_descriptor_buffers)(struct anv_cmd_buffer *cmd_buffer,
182 struct anv_cmd_pipeline_state *pipe_state);
183
184 uint32_t
185 genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
186 struct anv_cmd_pipeline_state *pipe_state,
187 const VkShaderStageFlags dirty,
188 struct anv_shader_bin **shaders,
189 uint32_t num_shaders);
190
191 void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer);
192
193 void genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer);
194
195 void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer);
196
197 void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
198 bool enable);
199
200 void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
201 const struct anv_image *image,
202 VkImageAspectFlagBits aspect,
203 enum isl_aux_usage aux_usage,
204 uint32_t level,
205 uint32_t base_layer,
206 uint32_t layer_count);
207
208 void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
209
210 struct anv_address genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer);
211
212 void genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
213 uint32_t total_scratch);
214
215 void
216 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
217 const struct intel_l3_config *l3_config,
218 VkShaderStageFlags active_stages,
219 const struct intel_urb_config *urb_cfg_in,
220 struct intel_urb_config *urb_cfg_out,
221 enum intel_urb_deref_block_size *deref_block_size);
222
223 void genX(emit_sample_pattern)(struct anv_batch *batch,
224 const struct vk_sample_locations_state *sl);
225
226 void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
227 struct anv_address dst, struct anv_address src,
228 uint32_t size);
229
230 void genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer,
231 struct anv_kernel *kernel,
232 const uint32_t *global_size, /* NULL for indirect */
233 uint32_t arg_count,
234 const struct anv_kernel_arg *args);
235
236 void genX(blorp_init_dynamic_states)(struct blorp_context *context);
237
238 void genX(blorp_exec)(struct blorp_batch *batch,
239 const struct blorp_params *params);
240
241 void genX(batch_emit_secondary_call)(struct anv_batch *batch,
242 struct anv_device *device,
243 struct anv_address secondary_addr,
244 struct anv_address secondary_return_addr);
245
246 void *genX(batch_emit_return)(struct anv_batch *batch);
247
248 void genX(cmd_emit_timestamp)(struct anv_batch *batch,
249 struct anv_device *device,
250 struct anv_address addr,
251 enum anv_timestamp_capture_type type,
252 void *data);
253
254 void genX(cmd_capture_data)(struct anv_batch *batch,
255 struct anv_device *device,
256 struct anv_address dst_addr,
257 struct anv_address src_addr,
258 uint32_t size_B);
259
260 void
261 genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch,
262 const struct anv_device *device,
263 uint32_t primitive_topology,
264 uint32_t vertex_count);
265
266 void genX(batch_emit_fast_color_dummy_blit)(struct anv_batch *batch,
267 struct anv_device *device);
268
269 void
270 genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
271 const struct vk_graphics_pipeline_state *state);
272
273 void
274 genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline);
275
276 void
277 genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
278
279 #if GFX_VERx10 >= 300
280 #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
281 assert((local_arg_offset) % 8 == 0); \
282 const struct brw_bs_prog_data *prog_data = \
283 brw_bs_prog_data_const(bin->prog_data); \
284 assert(prog_data->simd_size == 16); \
285 \
286 (struct GENX(BINDLESS_SHADER_RECORD)) { \
287 .OffsetToLocalArguments = (local_arg_offset) / 8, \
288 .BindlessShaderDispatchMode = RT_SIMD16, \
289 .KernelStartPointer = bin->kernel.offset, \
290 .RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used), \
291 }; \
292 })
293 #else
294 #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
295 assert((local_arg_offset) % 8 == 0); \
296 const struct brw_bs_prog_data *prog_data = \
297 brw_bs_prog_data_const(bin->prog_data); \
298 assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \
299 \
300 (struct GENX(BINDLESS_SHADER_RECORD)) { \
301 .OffsetToLocalArguments = (local_arg_offset) / 8, \
302 .BindlessShaderDispatchMode = \
303 prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \
304 .KernelStartPointer = bin->kernel.offset, \
305 }; \
306 })
307 #endif
308
309 void
310 genX(batch_set_preemption)(struct anv_batch *batch,
311 struct anv_device *device,
312 uint32_t current_pipeline,
313 bool value);
314
315 void
316 genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value);
317
318 void
319 genX(batch_emit_pipe_control)(struct anv_batch *batch,
320 const struct intel_device_info *devinfo,
321 uint32_t current_pipeline,
322 enum anv_pipe_bits bits,
323 const char *reason);
324
325 void
326 genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
327 const struct intel_device_info *devinfo,
328 uint32_t current_pipeline,
329 uint32_t post_sync_op,
330 struct anv_address address,
331 uint32_t imm_data,
332 enum anv_pipe_bits bits,
333 const char *reason);
334
335 #define genx_batch_emit_pipe_control(a, b, c, d) \
336 genX(batch_emit_pipe_control) (a, b, c, d, __func__)
337
338 #define genx_batch_emit_pipe_control_write(a, b, c, d, e, f, g) \
339 genX(batch_emit_pipe_control_write) (a, b, c, d, e, f, g, __func__)
340
341 void genX(batch_emit_breakpoint)(struct anv_batch *batch,
342 struct anv_device *device,
343 bool emit_before_draw);
344
345 static inline void
genX(emit_breakpoint)346 genX(emit_breakpoint)(struct anv_batch *batch,
347 struct anv_device *device,
348 bool emit_before_draw)
349 {
350 if (INTEL_DEBUG(DEBUG_DRAW_BKP))
351 genX(batch_emit_breakpoint)(batch, device, emit_before_draw);
352 }
353
354 void
355 genX(cmd_buffer_begin_companion)(struct anv_cmd_buffer *buffer,
356 VkCommandBufferLevel level);
357
358 struct anv_state
359 genX(cmd_buffer_begin_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer);
360
361 void
362 genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
363 struct anv_state syncpoint);
364 void
365 genX(cmd_write_buffer_cp)(struct anv_cmd_buffer *cmd_buffer,
366 VkDeviceAddress dstAddr,
367 void *data, uint32_t size);
368
369 void
370 genX(emit_simple_shader_init)(struct anv_simple_shader *state);
371
372 void
373 genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
374 uint32_t num_threads,
375 struct anv_state push_state);
376
377 struct anv_state
378 genX(simple_shader_alloc_push)(struct anv_simple_shader *state, uint32_t size);
379
380 struct anv_address
381 genX(simple_shader_push_state_address)(struct anv_simple_shader *state,
382 struct anv_state push_state);
383
384 void
385 genX(emit_simple_shader_end)(struct anv_simple_shader *state);
386
387 VkResult genX(init_trtt_context_state)(struct anv_async_submit *submit);
388
389 void genX(write_trtt_entries)(struct anv_async_submit *submit,
390 struct anv_trtt_bind *l3l2_binds,
391 uint32_t n_l3l2_binds,
392 struct anv_trtt_bind *l1_binds,
393 uint32_t n_l1_binds);
394
395 void genX(async_submit_end)(struct anv_async_submit *submit);
396
397 void
398 genX(cmd_buffer_emit_push_descriptor_buffer_surface)(struct anv_cmd_buffer *cmd_buffer,
399 struct anv_descriptor_set *set);
400
401 void
402 genX(cmd_buffer_emit_push_descriptor_surfaces)(struct anv_cmd_buffer *cmd_buffer,
403 struct anv_descriptor_set *set);
404
405 static inline VkShaderStageFlags
genX(cmd_buffer_flush_push_descriptors)406 genX(cmd_buffer_flush_push_descriptors)(struct anv_cmd_buffer *cmd_buffer,
407 struct anv_cmd_pipeline_state *state,
408 struct anv_pipeline *pipeline)
409 {
410 if (!pipeline->use_push_descriptor && !pipeline->use_push_descriptor_buffer)
411 return 0;
412
413 assert(pipeline->layout.push_descriptor_set_index != -1);
414 struct anv_descriptor_set *set =
415 state->descriptors[pipeline->layout.push_descriptor_set_index];
416 assert(set->is_push);
417
418 const VkShaderStageFlags push_buffer_dirty =
419 cmd_buffer->state.push_descriptors_dirty &
420 pipeline->use_push_descriptor_buffer;
421 if (push_buffer_dirty) {
422 if (set->desc_surface_state.map == NULL)
423 genX(cmd_buffer_emit_push_descriptor_buffer_surface)(cmd_buffer, set);
424
425 /* Force the next push descriptor update to allocate a new descriptor set. */
426 state->push_descriptor.set_used_on_gpu = true;
427 }
428
429 const VkShaderStageFlags push_descriptor_dirty =
430 cmd_buffer->state.push_descriptors_dirty & pipeline->use_push_descriptor;
431 if (push_descriptor_dirty) {
432 genX(cmd_buffer_emit_push_descriptor_surfaces)(cmd_buffer, set);
433
434 /* Force the next push descriptor update to allocate a new descriptor set. */
435 state->push_descriptor.set_used_on_gpu = true;
436 }
437
438 /* Clear the dirty stages now that we've generated the surface states for
439 * them.
440 */
441 cmd_buffer->state.push_descriptors_dirty &=
442 ~(push_descriptor_dirty | push_buffer_dirty);
443
444 /* Return the binding table stages that need to be updated */
445 return push_buffer_dirty | push_descriptor_dirty;
446 }
447
448 void genX(emit_embedded_sampler)(struct anv_device *device,
449 struct anv_embedded_sampler *sampler,
450 struct anv_pipeline_embedded_sampler_binding *binding);
451
452 void
453 genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
454 struct anv_address indirect_addr,
455 bool is_unaligned_size_x);
456
457 void
458 genX(cmd_dispatch_unaligned)(
459 VkCommandBuffer commandBuffer,
460 uint32_t invocations_x,
461 uint32_t invocations_y,
462 uint32_t invocations_z);
463