1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef IRIS_BATCH_DOT_H
25 #define IRIS_BATCH_DOT_H
26
27 #include <stdint.h>
28 #include <stdbool.h>
29 #include <string.h>
30
31 #include "util/u_dynarray.h"
32 #include "util/perf/u_trace.h"
33
34 #include "decoder/intel_decoder.h"
35 #include "ds/intel_driver_ds.h"
36 #include "ds/intel_tracepoints.h"
37
38 #include "iris_fence.h"
39 #include "iris_fine_fence.h"
40
41 struct iris_context;
42
43 /* The kernel assumes batchbuffers are smaller than 256kB. */
44 #define MAX_BATCH_SIZE (256 * 1024)
45
46 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
47 * bytes for MI_BATCH_BUFFER_START (when chaining). Plus another 24 bytes for
48 * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
49 * invalidation pipe control.
50 */
51 #define BATCH_RESERVED 60
52
53 /* Our target batch size - flush approximately at this point. */
54 #define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
55
56 enum iris_batch_name {
57 IRIS_BATCH_RENDER,
58 IRIS_BATCH_COMPUTE,
59 IRIS_BATCH_BLITTER,
60 };
61
62 /* Same definition as drm_i915_gem_exec_fence so drm_i915_gem_execbuffer2
63 * can directly use exec_fences without extra memory allocation
64 */
65 struct iris_batch_fence {
66 uint32_t handle;
67
68 #define IRIS_BATCH_FENCE_WAIT (1 << 0)
69 #define IRIS_BATCH_FENCE_SIGNAL (1 << 1)
70 uint32_t flags;
71 };
72
73 struct iris_batch {
74 struct iris_context *ice;
75 struct iris_screen *screen;
76 struct util_debug_callback *dbg;
77 struct pipe_device_reset_callback *reset;
78
79 /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
80 enum iris_batch_name name;
81
82 /** Current batchbuffer being queued up. */
83 struct iris_bo *bo;
84 void *map;
85 void *map_next;
86
87 /** Size of the primary batch being submitted to execbuf (in bytes). */
88 unsigned primary_batch_size;
89
90 /** Total size of all chained batches (in bytes). */
91 unsigned total_chained_batch_size;
92
93 /** Last binder address set in this hardware context. */
94 uint64_t last_binder_address;
95
96 union {
97 struct {
98 uint32_t ctx_id;
99 uint32_t exec_flags;
100 } i915;
101 struct {
102 uint32_t exec_queue_id;
103 } xe;
104 };
105
106 /** A list of all BOs referenced by this batch */
107 struct iris_bo **exec_bos;
108 int exec_count;
109 int exec_array_size;
110 /** Bitset of whether this batch writes to BO `i'. */
111 BITSET_WORD *bos_written;
112 uint32_t max_gem_handle;
113
114 /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
115 * instruction is a MI_BATCH_BUFFER_END).
116 */
117 bool noop_enabled;
118
119 /** Whether the first utrace point has been recorded.
120 */
121 bool begin_trace_recorded;
122
123 /**
124 * A list of iris_syncobjs associated with this batch.
125 *
126 * The first list entry will always be a signalling sync-point, indicating
127 * that this batch has completed. The others are likely to be sync-points
128 * to wait on before executing the batch.
129 */
130 struct util_dynarray syncobjs;
131
132 /** A list of iris_batch_fences to have execbuf signal or wait on */
133 struct util_dynarray exec_fences;
134
135 /** The amount of aperture space (in bytes) used by all exec_bos */
136 int aperture_space;
137
138 struct {
139 /** Uploader to use for sequence numbers */
140 struct u_upload_mgr *uploader;
141
142 /** GPU buffer and CPU map where our seqno's will be written. */
143 struct iris_state_ref ref;
144 uint32_t *map;
145
146 /** The sequence number to write the next time we add a fence. */
147 uint32_t next;
148 } fine_fences;
149
150 /** A seqno (and syncobj) for the last batch that was submitted. */
151 struct iris_fine_fence *last_fence;
152
153 /** List of other batches which we might need to flush to use a BO */
154 struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
155 unsigned num_other_batches;
156
157 /**
158 * Table containing struct iris_bo * that have been accessed within this
159 * batchbuffer and would need flushing before being used with a different
160 * aux mode.
161 */
162 struct hash_table *bo_aux_modes;
163
164 struct intel_batch_decode_ctx decoder;
165 struct hash_table_u64 *state_sizes;
166
167 /**
168 * Matrix representation of the cache coherency status of the GPU at the
169 * current end point of the batch. For every i and j,
170 * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
171 * cache domain j visible to cache domain i (which obviously implies that
172 * coherent_seqnos[i][i] is the most recent flush of cache domain i). This
173 * can be used to efficiently determine whether synchronization is
174 * necessary before accessing data from cache domain i if it was previously
175 * accessed from another cache domain j.
176 */
177 uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
178
179 /**
180 * A vector representing the cache coherency status of the L3. For each
181 * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
182 * recent flush of that domain which is visible to L3 clients.
183 */
184 uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];
185
186 /**
187 * Sequence number used to track the completion of any subsequent memory
188 * operations in the batch until the next sync boundary.
189 */
190 uint64_t next_seqno;
191
192 /** Have we emitted any draw calls to this batch? */
193 bool contains_draw;
194
195 /** Have we emitted any draw calls with next_seqno? */
196 bool contains_draw_with_next_seqno;
197
198 /** Batch contains fence signal operation. */
199 bool contains_fence_signal;
200
201 /**
202 * Number of times iris_batch_sync_region_start() has been called without a
203 * matching iris_batch_sync_region_end() on this batch.
204 */
205 uint32_t sync_region_depth;
206
207 uint32_t last_aux_map_state;
208 struct iris_measure_batch *measure;
209
210 /** Where tracepoints are recorded */
211 struct u_trace trace;
212
213 /** Batch wrapper structure for perfetto */
214 struct intel_ds_queue ds;
215
216 uint8_t num_3d_primitives_emitted;
217 };
218
219 void iris_init_batches(struct iris_context *ice);
220 void iris_chain_to_new_batch(struct iris_batch *batch);
221 void iris_destroy_batches(struct iris_context *ice);
222 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
223
224 void iris_batch_maybe_begin_frame(struct iris_batch *batch);
225
226 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
227 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
228
229 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
230
231 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
232
233 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
234 bool writable, enum iris_domain access);
235
236 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
237
238 bool iris_batch_syncobj_to_sync_file_fd(struct iris_batch *batch, int *out_fd);
239
240 static inline unsigned
iris_batch_bytes_used(struct iris_batch * batch)241 iris_batch_bytes_used(struct iris_batch *batch)
242 {
243 return batch->map_next - batch->map;
244 }
245
246 static inline uint64_t
iris_batch_current_address_u64(struct iris_batch * batch)247 iris_batch_current_address_u64(struct iris_batch *batch)
248 {
249 return batch->bo->address + (batch->map_next - batch->map);
250 }
251
252 /**
253 * Ensure the current command buffer has \param size bytes of space
254 * remaining. If not, this creates a secondary batch buffer and emits
255 * a jump from the primary batch to the start of the secondary.
256 *
257 * Most callers want iris_get_command_space() instead.
258 */
259 static inline void
iris_require_command_space(struct iris_batch * batch,unsigned size)260 iris_require_command_space(struct iris_batch *batch, unsigned size)
261 {
262 const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
263
264 if (required_bytes >= BATCH_SZ) {
265 iris_chain_to_new_batch(batch);
266 }
267 }
268
269 /**
270 * Allocate space in the current command buffer, and return a pointer
271 * to the mapped area so the caller can write commands there.
272 *
273 * This should be called whenever emitting commands.
274 */
275 static inline void *
iris_get_command_space(struct iris_batch * batch,unsigned bytes)276 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
277 {
278 if (!batch->begin_trace_recorded) {
279 batch->begin_trace_recorded = true;
280 iris_batch_maybe_begin_frame(batch);
281 trace_intel_begin_batch(&batch->trace);
282 }
283 iris_require_command_space(batch, bytes);
284 void *map = batch->map_next;
285 batch->map_next += bytes;
286 return map;
287 }
288
289 /**
290 * Helper to emit GPU commands - allocates space, copies them there.
291 */
292 static inline void
iris_batch_emit(struct iris_batch * batch,const void * data,unsigned size)293 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
294 {
295 void *map = iris_get_command_space(batch, size);
296 memcpy(map, data, size);
297 }
298
299 /**
300 * Get a pointer to the batch's signalling syncobj. Does not refcount.
301 */
302 static inline struct iris_syncobj *
iris_batch_get_signal_syncobj(struct iris_batch * batch)303 iris_batch_get_signal_syncobj(struct iris_batch *batch)
304 {
305 /* The signalling syncobj is the first one in the list. */
306 struct iris_syncobj *syncobj =
307 ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
308 return syncobj;
309 }
310
311
312 /**
313 * Take a reference to the batch's signalling syncobj.
314 *
315 * Callers can use this to wait for the the current batch under construction
316 * to complete (after flushing it).
317 */
318 static inline void
iris_batch_reference_signal_syncobj(struct iris_batch * batch,struct iris_syncobj ** out_syncobj)319 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
320 struct iris_syncobj **out_syncobj)
321 {
322 struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
323 iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
324 }
325
326 /**
327 * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
328 */
329 static inline void
iris_record_state_size(struct hash_table_u64 * ht,uint32_t offset_from_base,uint32_t size)330 iris_record_state_size(struct hash_table_u64 *ht,
331 uint32_t offset_from_base,
332 uint32_t size)
333 {
334 if (ht) {
335 _mesa_hash_table_u64_insert(ht, offset_from_base,
336 (void *)(uintptr_t) size);
337 }
338 }
339
340 /**
341 * Mark the start of a region in the batch with stable synchronization
342 * sequence number. Any buffer object accessed by the batch buffer only needs
343 * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
344 * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
345 */
346 static inline void
iris_batch_sync_region_start(struct iris_batch * batch)347 iris_batch_sync_region_start(struct iris_batch *batch)
348 {
349 batch->sync_region_depth++;
350 }
351
352 /**
353 * Mark the end of a region in the batch with stable synchronization sequence
354 * number. Should be called once after each call to
355 * iris_batch_sync_region_start().
356 */
357 static inline void
iris_batch_sync_region_end(struct iris_batch * batch)358 iris_batch_sync_region_end(struct iris_batch *batch)
359 {
360 assert(batch->sync_region_depth);
361 batch->sync_region_depth--;
362 }
363
364 /**
365 * Start a new synchronization section at the current point of the batch,
366 * unless disallowed by a previous iris_batch_sync_region_start().
367 */
368 static inline void
iris_batch_sync_boundary(struct iris_batch * batch)369 iris_batch_sync_boundary(struct iris_batch *batch)
370 {
371 if (!batch->sync_region_depth) {
372 batch->contains_draw_with_next_seqno = false;
373 batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
374 assert(batch->next_seqno > 0);
375 }
376 }
377
378 /**
379 * Update the cache coherency status of the batch to reflect a flush of the
380 * specified caching domain.
381 */
382 static inline void
iris_batch_mark_flush_sync(struct iris_batch * batch,enum iris_domain access)383 iris_batch_mark_flush_sync(struct iris_batch *batch,
384 enum iris_domain access)
385 {
386 const struct intel_device_info *devinfo = batch->screen->devinfo;
387
388 if (iris_domain_is_l3_coherent(devinfo, access))
389 batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
390 else
391 batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
392 }
393
394 /**
395 * Update the cache coherency status of the batch to reflect an invalidation
396 * of the specified caching domain. All prior flushes of other caches will be
397 * considered visible to the specified caching domain.
398 */
399 static inline void
iris_batch_mark_invalidate_sync(struct iris_batch * batch,enum iris_domain access)400 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
401 enum iris_domain access)
402 {
403 const struct intel_device_info *devinfo = batch->screen->devinfo;
404
405 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
406 if (i == access)
407 continue;
408
409 if (iris_domain_is_l3_coherent(devinfo, access)) {
410 if (iris_domain_is_read_only(access)) {
411 /* Invalidating a L3-coherent read-only domain "access" also
412 * triggers an invalidation of any matching L3 cachelines as well.
413 *
414 * If domain 'i' is L3-coherent, it sees the latest data in L3,
415 * otherwise it sees the latest globally-observable data.
416 */
417 batch->coherent_seqnos[access][i] =
418 iris_domain_is_l3_coherent(devinfo, i) ?
419 batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
420 } else {
421 /* Invalidating L3-coherent write domains does not trigger
422 * an invalidation of any matching L3 cachelines, however.
423 *
424 * It sees the latest data from domain i visible to L3 clients.
425 */
426 batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
427 }
428 } else {
429 /* "access" isn't L3-coherent, so invalidating it means it sees the
430 * most recent globally-observable data from domain i.
431 */
432 batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
433 }
434 }
435 }
436
437 /**
438 * Update the cache coherency status of the batch to reflect a reset. All
439 * previously accessed data can be considered visible to every caching domain
440 * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
441 */
442 static inline void
iris_batch_mark_reset_sync(struct iris_batch * batch)443 iris_batch_mark_reset_sync(struct iris_batch *batch)
444 {
445 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
446 batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
447 for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
448 batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
449 }
450 }
451
452 const char *
453 iris_batch_name_to_string(enum iris_batch_name name);
454
455 bool
456 iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);
457
458 #define iris_foreach_batch(ice, batch) \
459 for (struct iris_batch *batch = &ice->batches[0]; \
460 batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
461 ++batch)
462
463 void iris_batch_update_syncobjs(struct iris_batch *batch);
464 unsigned iris_batch_num_fences(struct iris_batch *batch);
465
466 void iris_dump_fence_list(struct iris_batch *batch);
467 void iris_dump_bo_list(struct iris_batch *batch);
468 void iris_batch_decode_batch(struct iris_batch *batch);
469
470 #endif
471