• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef IRIS_BATCH_DOT_H
25 #define IRIS_BATCH_DOT_H
26 
27 #include <stdint.h>
28 #include <stdbool.h>
29 #include <string.h>
30 
31 #include "util/u_dynarray.h"
32 #include "util/perf/u_trace.h"
33 
34 #include "decoder/intel_decoder.h"
35 #include "ds/intel_driver_ds.h"
36 #include "ds/intel_tracepoints.h"
37 
38 #include "iris_fence.h"
39 #include "iris_fine_fence.h"
40 
41 struct iris_context;
42 
43 /* The kernel assumes batchbuffers are smaller than 256kB. */
44 #define MAX_BATCH_SIZE (256 * 1024)
45 
46 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
47  * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
48  * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
49  * invalidation pipe control.
50  */
51 #define BATCH_RESERVED 60
52 
53 /* Our target batch size - flush approximately at this point. */
54 #define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
55 
56 enum iris_batch_name {
57    IRIS_BATCH_RENDER,
58    IRIS_BATCH_COMPUTE,
59    IRIS_BATCH_BLITTER,
60 };
61 
62 /* Same definition as drm_i915_gem_exec_fence so drm_i915_gem_execbuffer2
63  * can directly use exec_fences without extra memory allocation
64  */
65 struct iris_batch_fence {
66    uint32_t handle;
67 
68 #define IRIS_BATCH_FENCE_WAIT (1 << 0)
69 #define IRIS_BATCH_FENCE_SIGNAL (1 << 1)
70    uint32_t flags;
71 };
72 
73 struct iris_batch {
74    struct iris_context *ice;
75    struct iris_screen *screen;
76    struct util_debug_callback *dbg;
77    struct pipe_device_reset_callback *reset;
78 
79    /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
80    enum iris_batch_name name;
81 
82    /** Current batchbuffer being queued up. */
83    struct iris_bo *bo;
84    void *map;
85    void *map_next;
86 
87    /** Size of the primary batch being submitted to execbuf (in bytes). */
88    unsigned primary_batch_size;
89 
90    /** Total size of all chained batches (in bytes). */
91    unsigned total_chained_batch_size;
92 
93    /** Last binder address set in this hardware context. */
94    uint64_t last_binder_address;
95 
96    union {
97       struct {
98          uint32_t ctx_id;
99          uint32_t exec_flags;
100       } i915;
101       struct {
102          uint32_t exec_queue_id;
103       } xe;
104    };
105 
106    /** A list of all BOs referenced by this batch */
107    struct iris_bo **exec_bos;
108    int exec_count;
109    int exec_array_size;
110    /** Bitset of whether this batch writes to BO `i'. */
111    BITSET_WORD *bos_written;
112    uint32_t max_gem_handle;
113 
114    /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
115     * instruction is a MI_BATCH_BUFFER_END).
116     */
117    bool noop_enabled;
118 
119    /** Whether the first utrace point has been recorded.
120     */
121    bool begin_trace_recorded;
122 
123    /**
124     * A list of iris_syncobjs associated with this batch.
125     *
126     * The first list entry will always be a signalling sync-point, indicating
127     * that this batch has completed.  The others are likely to be sync-points
128     * to wait on before executing the batch.
129     */
130    struct util_dynarray syncobjs;
131 
132    /** A list of iris_batch_fences to have execbuf signal or wait on */
133    struct util_dynarray exec_fences;
134 
135    /** The amount of aperture space (in bytes) used by all exec_bos */
136    int aperture_space;
137 
138    struct {
139       /** Uploader to use for sequence numbers */
140       struct u_upload_mgr *uploader;
141 
142       /** GPU buffer and CPU map where our seqno's will be written. */
143       struct iris_state_ref ref;
144       uint32_t *map;
145 
146       /** The sequence number to write the next time we add a fence. */
147       uint32_t next;
148    } fine_fences;
149 
150    /** A seqno (and syncobj) for the last batch that was submitted. */
151    struct iris_fine_fence *last_fence;
152 
153    /** List of other batches which we might need to flush to use a BO */
154    struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
155    unsigned num_other_batches;
156 
157    /**
158     * Table containing struct iris_bo * that have been accessed within this
159     * batchbuffer and would need flushing before being used with a different
160     * aux mode.
161     */
162    struct hash_table *bo_aux_modes;
163 
164    struct intel_batch_decode_ctx decoder;
165    struct hash_table_u64 *state_sizes;
166 
167    /**
168     * Matrix representation of the cache coherency status of the GPU at the
169     * current end point of the batch.  For every i and j,
170     * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
171     * cache domain j visible to cache domain i (which obviously implies that
172     * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
173     * can be used to efficiently determine whether synchronization is
174     * necessary before accessing data from cache domain i if it was previously
175     * accessed from another cache domain j.
176     */
177    uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
178 
179    /**
180     * A vector representing the cache coherency status of the L3.  For each
181     * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
182     * recent flush of that domain which is visible to L3 clients.
183     */
184    uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];
185 
186    /**
187     * Sequence number used to track the completion of any subsequent memory
188     * operations in the batch until the next sync boundary.
189     */
190    uint64_t next_seqno;
191 
192    /** Have we emitted any draw calls to this batch? */
193    bool contains_draw;
194 
195    /** Have we emitted any draw calls with next_seqno? */
196    bool contains_draw_with_next_seqno;
197 
198    /** Batch contains fence signal operation. */
199    bool contains_fence_signal;
200 
201    /**
202     * Number of times iris_batch_sync_region_start() has been called without a
203     * matching iris_batch_sync_region_end() on this batch.
204     */
205    uint32_t sync_region_depth;
206 
207    uint32_t last_aux_map_state;
208    struct iris_measure_batch *measure;
209 
210    /** Where tracepoints are recorded */
211    struct u_trace trace;
212 
213    /** Batch wrapper structure for perfetto */
214    struct intel_ds_queue ds;
215 
216    uint8_t num_3d_primitives_emitted;
217 };
218 
219 void iris_init_batches(struct iris_context *ice);
220 void iris_chain_to_new_batch(struct iris_batch *batch);
221 void iris_destroy_batches(struct iris_context *ice);
222 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
223 
224 void iris_batch_maybe_begin_frame(struct iris_batch *batch);
225 
226 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
227 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
228 
229 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
230 
231 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
232 
233 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
234                         bool writable, enum iris_domain access);
235 
236 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
237 
238 bool iris_batch_syncobj_to_sync_file_fd(struct iris_batch *batch, int *out_fd);
239 
240 static inline unsigned
iris_batch_bytes_used(struct iris_batch * batch)241 iris_batch_bytes_used(struct iris_batch *batch)
242 {
243    return batch->map_next - batch->map;
244 }
245 
246 static inline uint64_t
iris_batch_current_address_u64(struct iris_batch * batch)247 iris_batch_current_address_u64(struct iris_batch *batch)
248 {
249    return batch->bo->address + (batch->map_next - batch->map);
250 }
251 
252 /**
253  * Ensure the current command buffer has \param size bytes of space
254  * remaining.  If not, this creates a secondary batch buffer and emits
255  * a jump from the primary batch to the start of the secondary.
256  *
257  * Most callers want iris_get_command_space() instead.
258  */
259 static inline void
iris_require_command_space(struct iris_batch * batch,unsigned size)260 iris_require_command_space(struct iris_batch *batch, unsigned size)
261 {
262    const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
263 
264    if (required_bytes >= BATCH_SZ) {
265       iris_chain_to_new_batch(batch);
266    }
267 }
268 
269 /**
270  * Allocate space in the current command buffer, and return a pointer
271  * to the mapped area so the caller can write commands there.
272  *
273  * This should be called whenever emitting commands.
274  */
275 static inline void *
iris_get_command_space(struct iris_batch * batch,unsigned bytes)276 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
277 {
278    if (!batch->begin_trace_recorded) {
279       batch->begin_trace_recorded = true;
280       iris_batch_maybe_begin_frame(batch);
281       trace_intel_begin_batch(&batch->trace);
282    }
283    iris_require_command_space(batch, bytes);
284    void *map = batch->map_next;
285    batch->map_next += bytes;
286    return map;
287 }
288 
289 /**
290  * Helper to emit GPU commands - allocates space, copies them there.
291  */
292 static inline void
iris_batch_emit(struct iris_batch * batch,const void * data,unsigned size)293 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
294 {
295    void *map = iris_get_command_space(batch, size);
296    memcpy(map, data, size);
297 }
298 
299 /**
300  * Get a pointer to the batch's signalling syncobj.  Does not refcount.
301  */
302 static inline struct iris_syncobj *
iris_batch_get_signal_syncobj(struct iris_batch * batch)303 iris_batch_get_signal_syncobj(struct iris_batch *batch)
304 {
305    /* The signalling syncobj is the first one in the list. */
306    struct iris_syncobj *syncobj =
307       ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
308    return syncobj;
309 }
310 
311 
312 /**
313  * Take a reference to the batch's signalling syncobj.
314  *
315  * Callers can use this to wait for the the current batch under construction
316  * to complete (after flushing it).
317  */
318 static inline void
iris_batch_reference_signal_syncobj(struct iris_batch * batch,struct iris_syncobj ** out_syncobj)319 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
320                                    struct iris_syncobj **out_syncobj)
321 {
322    struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
323    iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
324 }
325 
326 /**
327  * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
328  */
329 static inline void
iris_record_state_size(struct hash_table_u64 * ht,uint32_t offset_from_base,uint32_t size)330 iris_record_state_size(struct hash_table_u64 *ht,
331                        uint32_t offset_from_base,
332                        uint32_t size)
333 {
334    if (ht) {
335       _mesa_hash_table_u64_insert(ht, offset_from_base,
336                                   (void *)(uintptr_t) size);
337    }
338 }
339 
340 /**
341  * Mark the start of a region in the batch with stable synchronization
342  * sequence number.  Any buffer object accessed by the batch buffer only needs
343  * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
344  * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
345  */
346 static inline void
iris_batch_sync_region_start(struct iris_batch * batch)347 iris_batch_sync_region_start(struct iris_batch *batch)
348 {
349    batch->sync_region_depth++;
350 }
351 
352 /**
353  * Mark the end of a region in the batch with stable synchronization sequence
354  * number.  Should be called once after each call to
355  * iris_batch_sync_region_start().
356  */
357 static inline void
iris_batch_sync_region_end(struct iris_batch * batch)358 iris_batch_sync_region_end(struct iris_batch *batch)
359 {
360    assert(batch->sync_region_depth);
361    batch->sync_region_depth--;
362 }
363 
364 /**
365  * Start a new synchronization section at the current point of the batch,
366  * unless disallowed by a previous iris_batch_sync_region_start().
367  */
368 static inline void
iris_batch_sync_boundary(struct iris_batch * batch)369 iris_batch_sync_boundary(struct iris_batch *batch)
370 {
371    if (!batch->sync_region_depth) {
372       batch->contains_draw_with_next_seqno = false;
373       batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
374       assert(batch->next_seqno > 0);
375    }
376 }
377 
378 /**
379  * Update the cache coherency status of the batch to reflect a flush of the
380  * specified caching domain.
381  */
382 static inline void
iris_batch_mark_flush_sync(struct iris_batch * batch,enum iris_domain access)383 iris_batch_mark_flush_sync(struct iris_batch *batch,
384                            enum iris_domain access)
385 {
386    const struct intel_device_info *devinfo = batch->screen->devinfo;
387 
388    if (iris_domain_is_l3_coherent(devinfo, access))
389       batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
390    else
391       batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
392 }
393 
394 /**
395  * Update the cache coherency status of the batch to reflect an invalidation
396  * of the specified caching domain.  All prior flushes of other caches will be
397  * considered visible to the specified caching domain.
398  */
399 static inline void
iris_batch_mark_invalidate_sync(struct iris_batch * batch,enum iris_domain access)400 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
401                                 enum iris_domain access)
402 {
403    const struct intel_device_info *devinfo = batch->screen->devinfo;
404 
405    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
406       if (i == access)
407          continue;
408 
409       if (iris_domain_is_l3_coherent(devinfo, access)) {
410          if (iris_domain_is_read_only(access)) {
411             /* Invalidating a L3-coherent read-only domain "access" also
412              * triggers an invalidation of any matching L3 cachelines as well.
413              *
414              * If domain 'i' is L3-coherent, it sees the latest data in L3,
415              * otherwise it sees the latest globally-observable data.
416              */
417             batch->coherent_seqnos[access][i] =
418                iris_domain_is_l3_coherent(devinfo, i) ?
419                batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
420          } else {
421             /* Invalidating L3-coherent write domains does not trigger
422              * an invalidation of any matching L3 cachelines, however.
423              *
424              * It sees the latest data from domain i visible to L3 clients.
425              */
426             batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
427          }
428       } else {
429          /* "access" isn't L3-coherent, so invalidating it means it sees the
430           * most recent globally-observable data from domain i.
431           */
432          batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
433       }
434    }
435 }
436 
437 /**
438  * Update the cache coherency status of the batch to reflect a reset.  All
439  * previously accessed data can be considered visible to every caching domain
440  * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
441  */
442 static inline void
iris_batch_mark_reset_sync(struct iris_batch * batch)443 iris_batch_mark_reset_sync(struct iris_batch *batch)
444 {
445    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
446       batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
447       for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
448          batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
449    }
450 }
451 
452 const char *
453 iris_batch_name_to_string(enum iris_batch_name name);
454 
455 bool
456 iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);
457 
458 #define iris_foreach_batch(ice, batch)                \
459    for (struct iris_batch *batch = &ice->batches[0];  \
460         batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
461         ++batch)
462 
463 void iris_batch_update_syncobjs(struct iris_batch *batch);
464 unsigned iris_batch_num_fences(struct iris_batch *batch);
465 
466 void iris_dump_fence_list(struct iris_batch *batch);
467 void iris_dump_bo_list(struct iris_batch *batch);
468 void iris_batch_decode_batch(struct iris_batch *batch);
469 
470 #endif
471