• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef IRIS_BATCH_DOT_H
25 #define IRIS_BATCH_DOT_H
26 
27 #include <stdint.h>
28 #include <stdbool.h>
29 #include <string.h>
30 
31 #include "util/u_dynarray.h"
32 #include "util/perf/u_trace.h"
33 
34 #include "drm-uapi/i915_drm.h"
35 #include "common/intel_decoder.h"
36 #include "ds/intel_driver_ds.h"
37 #include "ds/intel_tracepoints.h"
38 
39 #include "iris_fence.h"
40 #include "iris_fine_fence.h"
41 
42 struct iris_context;
43 
44 /* The kernel assumes batchbuffers are smaller than 256kB. */
45 #define MAX_BATCH_SIZE (256 * 1024)
46 
47 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
48  * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
49  * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
50  * invalidation pipe control.
51  */
52 #define BATCH_RESERVED 60
53 
54 /* Our target batch size - flush approximately at this point. */
55 #define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
56 
57 enum iris_batch_name {
58    IRIS_BATCH_RENDER,
59    IRIS_BATCH_COMPUTE,
60    IRIS_BATCH_BLITTER,
61 };
62 
63 struct iris_batch {
64    struct iris_context *ice;
65    struct iris_screen *screen;
66    struct util_debug_callback *dbg;
67    struct pipe_device_reset_callback *reset;
68 
69    /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
70    enum iris_batch_name name;
71 
72    /** Current batchbuffer being queued up. */
73    struct iris_bo *bo;
74    void *map;
75    void *map_next;
76 
77    /** Size of the primary batch being submitted to execbuf (in bytes). */
78    unsigned primary_batch_size;
79 
80    /** Total size of all chained batches (in bytes). */
81    unsigned total_chained_batch_size;
82 
83    /** Last binder address set in this hardware context. */
84    uint64_t last_binder_address;
85 
86    uint32_t ctx_id;
87    uint32_t exec_flags;
88    bool has_engines_context;
89 
90    /** A list of all BOs referenced by this batch */
91    struct iris_bo **exec_bos;
92    int exec_count;
93    int exec_array_size;
94    /** Bitset of whether this batch writes to BO `i'. */
95    BITSET_WORD *bos_written;
96    uint32_t max_gem_handle;
97 
98    /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
99     * instruction is a MI_BATCH_BUFFER_END).
100     */
101    bool noop_enabled;
102 
103    /** Whether the first utrace point has been recorded.
104     */
105    bool begin_trace_recorded;
106 
107    /**
108     * A list of iris_syncobjs associated with this batch.
109     *
110     * The first list entry will always be a signalling sync-point, indicating
111     * that this batch has completed.  The others are likely to be sync-points
112     * to wait on before executing the batch.
113     */
114    struct util_dynarray syncobjs;
115 
116    /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
117    struct util_dynarray exec_fences;
118 
119    /** The amount of aperture space (in bytes) used by all exec_bos */
120    int aperture_space;
121 
122    struct {
123       /** Uploader to use for sequence numbers */
124       struct u_upload_mgr *uploader;
125 
126       /** GPU buffer and CPU map where our seqno's will be written. */
127       struct iris_state_ref ref;
128       uint32_t *map;
129 
130       /** The sequence number to write the next time we add a fence. */
131       uint32_t next;
132    } fine_fences;
133 
134    /** A seqno (and syncobj) for the last batch that was submitted. */
135    struct iris_fine_fence *last_fence;
136 
137    /** List of other batches which we might need to flush to use a BO */
138    struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
139    unsigned num_other_batches;
140 
141    struct {
142       /**
143        * Set of struct brw_bo * that have been rendered to within this
144        * batchbuffer and would need flushing before being used from another
145        * cache domain that isn't coherent with it (i.e. the sampler).
146        */
147       struct hash_table *render;
148    } cache;
149 
150    struct intel_batch_decode_ctx decoder;
151    struct hash_table_u64 *state_sizes;
152 
153    /**
154     * Matrix representation of the cache coherency status of the GPU at the
155     * current end point of the batch.  For every i and j,
156     * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
157     * cache domain j visible to cache domain i (which obviously implies that
158     * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
159     * can be used to efficiently determine whether synchronization is
160     * necessary before accessing data from cache domain i if it was previously
161     * accessed from another cache domain j.
162     */
163    uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
164 
165    /**
166     * A vector representing the cache coherency status of the L3.  For each
167     * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
168     * recent flush of that domain which is visible to L3 clients.
169     */
170    uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];
171 
172    /**
173     * Sequence number used to track the completion of any subsequent memory
174     * operations in the batch until the next sync boundary.
175     */
176    uint64_t next_seqno;
177 
178    /** Have we emitted any draw calls to this batch? */
179    bool contains_draw;
180 
181    /** Have we emitted any draw calls with next_seqno? */
182    bool contains_draw_with_next_seqno;
183 
184    /** Batch contains fence signal operation. */
185    bool contains_fence_signal;
186 
187    /**
188     * Number of times iris_batch_sync_region_start() has been called without a
189     * matching iris_batch_sync_region_end() on this batch.
190     */
191    uint32_t sync_region_depth;
192 
193    uint32_t last_aux_map_state;
194    struct iris_measure_batch *measure;
195 
196    /** Where tracepoints are recorded */
197    struct u_trace trace;
198 
199    /** Batch wrapper structure for perfetto */
200    struct intel_ds_queue *ds;
201 };
202 
203 void iris_init_batches(struct iris_context *ice, int priority);
204 void iris_chain_to_new_batch(struct iris_batch *batch);
205 void iris_destroy_batches(struct iris_context *ice);
206 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
207 
208 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
209 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
210 
211 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
212 
213 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
214 
215 #define RELOC_WRITE EXEC_OBJECT_WRITE
216 
217 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
218                         bool writable, enum iris_domain access);
219 
220 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
221 
222 static inline unsigned
iris_batch_bytes_used(struct iris_batch * batch)223 iris_batch_bytes_used(struct iris_batch *batch)
224 {
225    return batch->map_next - batch->map;
226 }
227 
228 /**
229  * Ensure the current command buffer has \param size bytes of space
230  * remaining.  If not, this creates a secondary batch buffer and emits
231  * a jump from the primary batch to the start of the secondary.
232  *
233  * Most callers want iris_get_command_space() instead.
234  */
235 static inline void
iris_require_command_space(struct iris_batch * batch,unsigned size)236 iris_require_command_space(struct iris_batch *batch, unsigned size)
237 {
238    const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
239 
240    if (required_bytes >= BATCH_SZ) {
241       iris_chain_to_new_batch(batch);
242    }
243 }
244 
245 /**
246  * Allocate space in the current command buffer, and return a pointer
247  * to the mapped area so the caller can write commands there.
248  *
249  * This should be called whenever emitting commands.
250  */
251 static inline void *
iris_get_command_space(struct iris_batch * batch,unsigned bytes)252 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
253 {
254    if (!batch->begin_trace_recorded) {
255       batch->begin_trace_recorded = true;
256       trace_intel_begin_batch(&batch->trace);
257    }
258    iris_require_command_space(batch, bytes);
259    void *map = batch->map_next;
260    batch->map_next += bytes;
261    return map;
262 }
263 
264 /**
265  * Helper to emit GPU commands - allocates space, copies them there.
266  */
267 static inline void
iris_batch_emit(struct iris_batch * batch,const void * data,unsigned size)268 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
269 {
270    void *map = iris_get_command_space(batch, size);
271    memcpy(map, data, size);
272 }
273 
274 /**
275  * Get a pointer to the batch's signalling syncobj.  Does not refcount.
276  */
277 static inline struct iris_syncobj *
iris_batch_get_signal_syncobj(struct iris_batch * batch)278 iris_batch_get_signal_syncobj(struct iris_batch *batch)
279 {
280    /* The signalling syncobj is the first one in the list. */
281    struct iris_syncobj *syncobj =
282       ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
283    return syncobj;
284 }
285 
286 
287 /**
288  * Take a reference to the batch's signalling syncobj.
289  *
290  * Callers can use this to wait for the the current batch under construction
291  * to complete (after flushing it).
292  */
293 static inline void
iris_batch_reference_signal_syncobj(struct iris_batch * batch,struct iris_syncobj ** out_syncobj)294 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
295                                    struct iris_syncobj **out_syncobj)
296 {
297    struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
298    iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
299 }
300 
301 /**
302  * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
303  */
304 static inline void
iris_record_state_size(struct hash_table_u64 * ht,uint32_t offset_from_base,uint32_t size)305 iris_record_state_size(struct hash_table_u64 *ht,
306                        uint32_t offset_from_base,
307                        uint32_t size)
308 {
309    if (ht) {
310       _mesa_hash_table_u64_insert(ht, offset_from_base,
311                                   (void *)(uintptr_t) size);
312    }
313 }
314 
315 /**
316  * Mark the start of a region in the batch with stable synchronization
317  * sequence number.  Any buffer object accessed by the batch buffer only needs
318  * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
319  * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
320  */
321 static inline void
iris_batch_sync_region_start(struct iris_batch * batch)322 iris_batch_sync_region_start(struct iris_batch *batch)
323 {
324    batch->sync_region_depth++;
325 }
326 
327 /**
328  * Mark the end of a region in the batch with stable synchronization sequence
329  * number.  Should be called once after each call to
330  * iris_batch_sync_region_start().
331  */
332 static inline void
iris_batch_sync_region_end(struct iris_batch * batch)333 iris_batch_sync_region_end(struct iris_batch *batch)
334 {
335    assert(batch->sync_region_depth);
336    batch->sync_region_depth--;
337 }
338 
339 /**
340  * Start a new synchronization section at the current point of the batch,
341  * unless disallowed by a previous iris_batch_sync_region_start().
342  */
343 static inline void
iris_batch_sync_boundary(struct iris_batch * batch)344 iris_batch_sync_boundary(struct iris_batch *batch)
345 {
346    if (!batch->sync_region_depth) {
347       batch->contains_draw_with_next_seqno = false;
348       batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
349       assert(batch->next_seqno > 0);
350    }
351 }
352 
353 /**
354  * Update the cache coherency status of the batch to reflect a flush of the
355  * specified caching domain.
356  */
357 static inline void
iris_batch_mark_flush_sync(struct iris_batch * batch,enum iris_domain access)358 iris_batch_mark_flush_sync(struct iris_batch *batch,
359                            enum iris_domain access)
360 {
361    const struct intel_device_info *devinfo = &batch->screen->devinfo;
362 
363    if (iris_domain_is_l3_coherent(devinfo, access))
364       batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
365    else
366       batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
367 }
368 
369 /**
370  * Update the cache coherency status of the batch to reflect an invalidation
371  * of the specified caching domain.  All prior flushes of other caches will be
372  * considered visible to the specified caching domain.
373  */
374 static inline void
iris_batch_mark_invalidate_sync(struct iris_batch * batch,enum iris_domain access)375 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
376                                 enum iris_domain access)
377 {
378    const struct intel_device_info *devinfo = &batch->screen->devinfo;
379 
380    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
381       if (i == access)
382          continue;
383 
384       if (iris_domain_is_l3_coherent(devinfo, access)) {
385          if (iris_domain_is_read_only(access)) {
386             /* Invalidating a L3-coherent read-only domain "access" also
387              * triggers an invalidation of any matching L3 cachelines as well.
388              *
389              * If domain 'i' is L3-coherent, it sees the latest data in L3,
390              * otherwise it sees the latest globally-observable data.
391              */
392             batch->coherent_seqnos[access][i] =
393                iris_domain_is_l3_coherent(devinfo, i) ?
394                batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
395          } else {
396             /* Invalidating L3-coherent write domains does not trigger
397              * an invalidation of any matching L3 cachelines, however.
398              *
399              * It sees the latest data from domain i visible to L3 clients.
400              */
401             batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
402          }
403       } else {
404          /* "access" isn't L3-coherent, so invalidating it means it sees the
405           * most recent globally-observable data from domain i.
406           */
407          batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
408       }
409    }
410 }
411 
412 /**
413  * Update the cache coherency status of the batch to reflect a reset.  All
414  * previously accessed data can be considered visible to every caching domain
415  * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
416  */
417 static inline void
iris_batch_mark_reset_sync(struct iris_batch * batch)418 iris_batch_mark_reset_sync(struct iris_batch *batch)
419 {
420    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
421       batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
422       for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
423          batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
424    }
425 }
426 
427 const char *
428 iris_batch_name_to_string(enum iris_batch_name name);
429 
430 #define iris_foreach_batch(ice, batch)                \
431    for (struct iris_batch *batch = &ice->batches[0];  \
432         batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo.ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
433         ++batch)
434 
435 #endif
436