• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 /* This is a wrapper for pipe_context that executes all pipe_context calls
28  * in another thread.
29  *
30  *
31  * Guidelines for adopters and deviations from Gallium
32  * ---------------------------------------------------
33  *
34  * 1) pipe_context is wrapped. pipe_screen isn't wrapped. All pipe_screen
35  *    driver functions that take a context (fence_finish, texture_get_handle)
36  *    should manually unwrap pipe_context by doing:
37  *      pipe = threaded_context_unwrap_sync(pipe);
38  *
39  *    pipe_context::priv is used to unwrap the context, so drivers and state
40  *    trackers shouldn't use it.
41  *
42  *    No other objects are wrapped.
43  *
44  * 2) Drivers must subclass and initialize these structures:
45  *    - threaded_resource for pipe_resource (use threaded_resource_init/deinit)
46  *    - threaded_query for pipe_query (zero memory)
47  *    - threaded_transfer for pipe_transfer (zero memory)
48  *
49  * 3) The threaded context must not be enabled for contexts that can use video
50  *    codecs.
51  *
52  * 4) Changes in driver behavior:
53  *    - begin_query and end_query always return true; return values from
54  *      the driver are ignored.
55  *    - generate_mipmap uses is_format_supported to determine success;
56  *      the return value from the driver is ignored.
57  *    - resource_commit always returns true; failures are ignored.
58  *    - set_debug_callback is skipped if the callback is synchronous.
59  *
60  *
61  * Thread-safety requirements on context functions
62  * -----------------------------------------------
63  *
64  * These pipe_context functions are executed directly, so they shouldn't use
65  * pipe_context in an unsafe way. They are de-facto screen functions now:
66  * - create_query
67  * - create_batch_query
68  * - create_*_state (all CSOs and shaders)
69  *     - Make sure the shader compiler doesn't use any per-context stuff.
70  *       (e.g. LLVM target machine)
71  *     - Only pipe_context's debug callback for shader dumps is guaranteed to
72  *       be up to date, because set_debug_callback synchronizes execution.
73  * - create_surface
74  * - surface_destroy
75  * - create_sampler_view
76  * - sampler_view_destroy
77  * - stream_output_target_destroy
78  * - transfer_map (only unsychronized buffer mappings)
79  * - get_query_result (when threaded_query::flushed == true)
80  * - create_stream_output_target
81  *
82  *
83  * Transfer_map rules for buffer mappings
84  * --------------------------------------
85  *
86  * 1) If transfer_map has PIPE_MAP_UNSYNCHRONIZED, the call is made
87  *    in the non-driver thread without flushing the queue. The driver will
88  *    receive TC_TRANSFER_MAP_THREADED_UNSYNC in addition to PIPE_MAP_-
89  *    UNSYNCHRONIZED to indicate this.
90  *    Note that transfer_unmap is always enqueued and called from the driver
91  *    thread.
92  *
93  * 2) The driver isn't allowed to infer unsychronized mappings by tracking
94  *    the valid buffer range. The threaded context always sends TC_TRANSFER_-
95  *    MAP_NO_INFER_UNSYNCHRONIZED to indicate this. Ignoring the flag will lead
96  *    to failures.
97  *    The threaded context does its own detection of unsynchronized mappings.
98  *
99  * 3) The driver isn't allowed to do buffer invalidations by itself under any
100  *    circumstances. This is necessary for unsychronized maps to map the latest
101  *    version of the buffer. (because invalidations can be queued, while
102  *    unsychronized maps are not queued and they should return the latest
103  *    storage after invalidation). The threaded context always sends
104  *    TC_TRANSFER_MAP_NO_INVALIDATE into transfer_map and buffer_subdata to
105  *    indicate this. Ignoring the flag will lead to failures.
106  *    The threaded context uses its own buffer invalidation mechanism.
107  *    Do NOT use pipe_buffer_write, as this may trigger invalidation;
108  *    use tc_buffer_write instead.
109  *
110  * 4) PIPE_MAP_ONCE can no longer be used to infer that a buffer will not be mapped
111  *    a second time before it is unmapped.
112  *
113  *
114  * Rules for fences
115  * ----------------
116  *
117  * Flushes will be executed asynchronously in the driver thread if a
118  * create_fence callback is provided. This affects fence semantics as follows.
119  *
120  * When the threaded context wants to perform an asynchronous flush, it will
121  * use the create_fence callback to pre-create the fence from the calling
122  * thread. This pre-created fence will be passed to pipe_context::flush
123  * together with the TC_FLUSH_ASYNC flag.
124  *
125  * The callback receives the unwrapped context as a parameter, but must use it
126  * in a thread-safe way because it is called from a non-driver thread.
127  *
128  * If the threaded_context does not immediately flush the current batch, the
129  * callback also receives a tc_unflushed_batch_token. If fence_finish is called
130  * on the returned fence in the context that created the fence,
131  * threaded_context_flush must be called.
132  *
133  * The driver must implement pipe_context::fence_server_sync properly, since
134  * the threaded context handles PIPE_FLUSH_ASYNC.
135  *
136  *
137  * Additional requirements
138  * -----------------------
139  *
140  * get_query_result:
141  *    If threaded_query::flushed == true, get_query_result should assume that
142  *    it's called from a non-driver thread, in which case the driver shouldn't
143  *    use the context in an unsafe way.
144  *
145  * replace_buffer_storage:
146  *    The driver has to implement this callback, which will be called when
147  *    the threaded context wants to replace a resource's backing storage with
148  *    another resource's backing storage. The threaded context uses it to
149  *    implement buffer invalidation. This call is always queued.
150  *    Note that 'minimum_num_rebinds' specifies only the minimum number of rebinds
151  *    which must be managed by the driver; if a buffer is bound multiple times in
152  *    the same binding point (e.g., vertex buffer slots 0,1,2), this will be counted
153  *    as a single rebind.
154  *
155  *
156  * Optional resource busy callbacks for better performance
157  * -------------------------------------------------------
158  *
159  * This adds checking whether a resource is used by the GPU and whether
160  * a resource is referenced by an unflushed command buffer. If neither is true,
161  * the threaded context will map the buffer as UNSYNCHRONIZED without flushing
162  * or synchronizing the thread and will skip any buffer invalidations
163  * (reallocations) because invalidating an idle buffer has no benefit.
164  *
165  * There are 1 driver callback and 1 TC callback:
166  *
167  * 1) is_resource_busy: It returns true when a resource is busy. If this is NULL,
168  *    the resource is considered always busy.
169  *
170  * 2) tc_driver_internal_flush_notify: If the driver set
171  *    driver_calls_flush_notify = true in threaded_context_create, it should
172  *    call this after every internal driver flush. The threaded context uses it
173  *    to track internal driver flushes for the purpose of tracking which
174  *    buffers are referenced by an unflushed command buffer.
175  *
176  * If is_resource_busy is set, threaded_resource::buffer_id_unique must be
177  * generated by the driver, and the replace_buffer_storage callback should
178  * delete the buffer ID passed to it. The driver should use
179  * util_idalloc_mt_init_tc.
180  *
181  *
182  * How it works (queue architecture)
183  * ---------------------------------
184  *
185  * There is a multithreaded queue consisting of batches, each batch containing
186  * 8-byte slots. Calls can occupy 1 or more slots.
187  *
188  * Once a batch is full and there is no space for the next call, it's flushed,
189  * meaning that it's added to the queue for execution in the other thread.
190  * The batches are ordered in a ring and reused once they are idle again.
191  * The batching is necessary for low queue/mutex overhead.
192  */
193 
194 #ifndef U_THREADED_CONTEXT_H
195 #define U_THREADED_CONTEXT_H
196 
197 #include "c11/threads.h"
198 #include "pipe/p_context.h"
199 #include "pipe/p_state.h"
200 #include "util/bitset.h"
201 #include "util/u_inlines.h"
202 #include "util/u_memory.h"
203 #include "util/u_queue.h"
204 #include "util/u_range.h"
205 #include "util/u_thread.h"
206 #include "util/slab.h"
207 
208 #ifdef __cplusplus
209 extern "C" {
210 #endif
211 
212 struct threaded_context;
213 struct tc_unflushed_batch_token;
214 
215 /* 0 = disabled, 1 = assertions, 2 = printfs, 3 = logging */
216 #define TC_DEBUG 0
217 
218 /* This is an internal flag not sent to the driver. */
219 #define TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE   (1u << 28)
220 /* These are map flags sent to drivers. */
221 /* Never infer whether it's safe to use unsychronized mappings: */
222 #define TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED (1u << 29)
223 /* Don't invalidate buffers: */
224 #define TC_TRANSFER_MAP_NO_INVALIDATE        (1u << 30)
225 /* transfer_map is called from a non-driver thread: */
226 #define TC_TRANSFER_MAP_THREADED_UNSYNC      (1u << 31)
227 
228 /* Custom flush flags sent to drivers. */
229 /* fence is pre-populated with a fence created by the create_fence callback */
230 #define TC_FLUSH_ASYNC        (1u << 31)
231 
232 /* Size of the queue = number of batch slots in memory.
233  * - 1 batch is always idle and records new commands
234  * - 1 batch is being executed
235  * so the queue size is TC_MAX_BATCHES - 2 = number of waiting batches.
236  *
237  * Use a size as small as possible for low CPU L2 cache usage but large enough
238  * so that the queue isn't stalled too often for not having enough idle batch
239  * slots.
240  */
241 #define TC_MAX_BATCHES        10
242 
243 /* The size of one batch. Non-trivial calls (i.e. not setting a CSO pointer)
244  * can occupy multiple call slots.
245  *
246  * The idea is to have batches as small as possible but large enough so that
247  * the queuing and mutex overhead is negligible.
248  */
249 #define TC_SLOTS_PER_BATCH    1536
250 
251 /* The buffer list queue is much deeper than the batch queue because buffer
252  * lists need to stay around until the driver internally flushes its command
253  * buffer.
254  */
255 #define TC_MAX_BUFFER_LISTS   (TC_MAX_BATCHES * 4)
256 
257 /* This mask is used to get a hash of a buffer ID. It's also the bit size of
258  * the buffer list - 1. It must be 2^n - 1. The size should be as low as
259  * possible to minimize memory usage, but high enough to minimize hash
260  * collisions.
261  */
262 #define TC_BUFFER_ID_MASK      BITFIELD_MASK(14)
263 
264 /* Threshold for when to use the queue or sync. */
265 #define TC_MAX_STRING_MARKER_BYTES  512
266 
267 /* Threshold for when to enqueue buffer/texture_subdata as-is.
268  * If the upload size is greater than this, it will do instead:
269  * - for buffers: DISCARD_RANGE is done by the threaded context
270  * - for textures: sync and call the driver directly
271  */
272 #define TC_MAX_SUBDATA_BYTES        320
273 
274 enum tc_binding_type {
275    TC_BINDING_VERTEX_BUFFER,
276    TC_BINDING_STREAMOUT_BUFFER,
277    TC_BINDING_UBO_VS,
278    TC_BINDING_UBO_FS,
279    TC_BINDING_UBO_GS,
280    TC_BINDING_UBO_TCS,
281    TC_BINDING_UBO_TES,
282    TC_BINDING_UBO_CS,
283    TC_BINDING_SAMPLERVIEW_VS,
284    TC_BINDING_SAMPLERVIEW_FS,
285    TC_BINDING_SAMPLERVIEW_GS,
286    TC_BINDING_SAMPLERVIEW_TCS,
287    TC_BINDING_SAMPLERVIEW_TES,
288    TC_BINDING_SAMPLERVIEW_CS,
289    TC_BINDING_SSBO_VS,
290    TC_BINDING_SSBO_FS,
291    TC_BINDING_SSBO_GS,
292    TC_BINDING_SSBO_TCS,
293    TC_BINDING_SSBO_TES,
294    TC_BINDING_SSBO_CS,
295    TC_BINDING_IMAGE_VS,
296    TC_BINDING_IMAGE_FS,
297    TC_BINDING_IMAGE_GS,
298    TC_BINDING_IMAGE_TCS,
299    TC_BINDING_IMAGE_TES,
300    TC_BINDING_IMAGE_CS,
301 };
302 
303 typedef void (*tc_replace_buffer_storage_func)(struct pipe_context *ctx,
304                                                struct pipe_resource *dst,
305                                                struct pipe_resource *src,
306                                                unsigned minimum_num_rebinds,
307                                                uint32_t rebind_mask,
308                                                uint32_t delete_buffer_id);
309 typedef struct pipe_fence_handle *(*tc_create_fence_func)(struct pipe_context *ctx,
310                                                           struct tc_unflushed_batch_token *token);
311 typedef bool (*tc_is_resource_busy)(struct pipe_screen *screen,
312                                     struct pipe_resource *resource,
313                                     unsigned usage);
314 
315 struct threaded_resource {
316    struct pipe_resource b;
317 
318    /* Pointer to the TC that first used this threaded_resource (buffer). This is used to
319     * allow TCs to determine whether they have been given a buffer that was created by a
320     * different TC, in which case all TCs have to disable busyness tracking and buffer
321     * replacement for that particular buffer.
322     * DO NOT DEREFERENCE. The only operation allowed on this pointer is equality-checking
323     * since it might be dangling if a buffer has been shared and its first_user has
324     * already been destroyed. The pointer is const void to discourage such disallowed usage.
325     * This is NULL if no TC has used this buffer yet.
326     */
327    const void *first_user;
328 
329    /* Since buffer invalidations are queued, we can't use the base resource
330     * for unsychronized mappings. This points to the latest version of
331     * the buffer after the latest invalidation. It's only used for unsychro-
332     * nized mappings in the non-driver thread. Initially it's set to &b.
333     */
334    struct pipe_resource *latest;
335 
336    /* Optional CPU storage of the buffer. When we get partial glBufferSubData(implemented by
337     * copy_buffer) + glDrawElements, we don't want to drain the gfx pipeline before executing
338     * the copy. For ideal pipelining, we upload to this CPU storage and then reallocate
339     * the GPU storage completely and reupload everything without copy_buffer.
340     */
341    void *cpu_storage;
342 
343    /* The buffer range which is initialized (with a write transfer, streamout,
344     * or writable shader resources). The remainder of the buffer is considered
345     * invalid and can be mapped unsynchronized.
346     *
347     * This allows unsychronized mapping of a buffer range which hasn't been
348     * used yet. It's for applications which forget to use the unsynchronized
349     * map flag and expect the driver to figure it out.
350     *
351     * Drivers should set this to the full range for buffers backed by user
352     * memory.
353     */
354    struct util_range valid_buffer_range;
355 
356    /* True if multiple threaded contexts have accessed this buffer.
357     * Disables non-multicontext-safe optimizations in TC.
358     * We can't just re-use is_shared for that purpose as that would confuse drivers.
359     */
360    bool used_by_multiple_contexts;
361 
362    /* Drivers are required to update this for shared resources and user
363     * pointers. */
364    bool is_shared;
365    bool is_user_ptr;
366    bool allow_cpu_storage;
367 
368    /* Unique buffer ID. Drivers must set it to non-zero for buffers and it must
369     * be unique. Textures must set 0. Low bits are used as a hash of the ID.
370     * Use util_idalloc_mt to generate these IDs.
371     */
372    uint32_t buffer_id_unique;
373 
374    /* If positive, then a staging transfer is in progress.
375     */
376    int pending_staging_uploads;
377 
378    /* If staging uploads are pending, this will hold the union of the mapped
379     * ranges.
380     */
381    struct util_range pending_staging_uploads_range;
382 };
383 
384 struct threaded_transfer {
385    struct pipe_transfer b;
386 
387    /* Staging buffer for DISCARD_RANGE transfers. */
388    struct pipe_resource *staging;
389 
390    /* If b.resource is not the base instance of the buffer, but it's one of its
391     * reallocations (set in "latest" of the base instance), this points to
392     * the valid range of the base instance. It's used for transfers after
393     * a buffer invalidation, because such transfers operate on "latest", not
394     * the base instance. Initially it's set to &b.resource->valid_buffer_range.
395     */
396    struct util_range *valid_buffer_range;
397 
398    bool cpu_storage_mapped;
399 };
400 
401 struct threaded_query {
402    /* The query is added to the list in end_query and removed in flush. */
403    struct list_head head_unflushed;
404 
405    /* Whether pipe->flush has been called in non-deferred mode after end_query. */
406    bool flushed;
407 };
408 
409 struct tc_call_base {
410 #if !defined(NDEBUG) && TC_DEBUG >= 1
411    uint32_t sentinel;
412 #endif
413    ushort num_slots;
414    ushort call_id;
415 };
416 
417 /**
418  * A token representing an unflushed batch.
419  *
420  * See the general rules for fences for an explanation.
421  */
422 struct tc_unflushed_batch_token {
423    struct pipe_reference ref;
424    struct threaded_context *tc;
425 };
426 
427 struct tc_batch {
428    struct threaded_context *tc;
429 #if !defined(NDEBUG) && TC_DEBUG >= 1
430    unsigned sentinel;
431 #endif
432    uint16_t num_total_slots;
433    uint16_t buffer_list_index;
434    struct util_queue_fence fence;
435    struct tc_unflushed_batch_token *token;
436    uint64_t slots[TC_SLOTS_PER_BATCH];
437 };
438 
439 struct tc_buffer_list {
440    /* Signalled by the driver after it flushes its internal command buffer. */
441    struct util_queue_fence driver_flushed_fence;
442 
443    /* Buffer list where bit N means whether ID hash N is in the list. */
444    BITSET_DECLARE(buffer_list, TC_BUFFER_ID_MASK + 1);
445 };
446 
447 /**
448  * Optional TC parameters/callbacks.
449  */
450 struct threaded_context_options {
451    tc_create_fence_func create_fence;
452    tc_is_resource_busy is_resource_busy;
453    bool driver_calls_flush_notify;
454 
455    /**
456     * If true, ctx->get_device_reset_status() will be called without
457     * synchronizing with driver thread.  Drivers can enable this to avoid
458     * TC syncs if their implementation of get_device_reset_status() is
459     * safe to call without synchronizing with driver thread.
460     */
461    bool unsynchronized_get_device_reset_status;
462 };
463 
464 struct threaded_context {
465    struct pipe_context base;
466    struct pipe_context *pipe;
467    struct slab_child_pool pool_transfers;
468    tc_replace_buffer_storage_func replace_buffer_storage;
469    struct threaded_context_options options;
470    unsigned map_buffer_alignment;
471    unsigned ubo_alignment;
472 
473    struct list_head unflushed_queries;
474 
475    /* Counters for the HUD. */
476    unsigned num_offloaded_slots;
477    unsigned num_direct_slots;
478    unsigned num_syncs;
479 
480    bool use_forced_staging_uploads;
481    bool add_all_gfx_bindings_to_buffer_list;
482    bool add_all_compute_bindings_to_buffer_list;
483 
484    /* Estimation of how much vram/gtt bytes are mmap'd in
485     * the current tc_batch.
486     */
487    uint64_t bytes_mapped_estimate;
488    uint64_t bytes_mapped_limit;
489 
490    struct util_queue queue;
491    struct util_queue_fence *fence;
492 
493 #ifndef NDEBUG
494    /**
495     * The driver thread is normally the queue thread, but
496     * there are cases where the queue is flushed directly
497     * from the frontend thread
498     */
499    thread_id driver_thread;
500 #endif
501 
502    bool seen_tcs;
503    bool seen_tes;
504    bool seen_gs;
505 
506    bool seen_streamout_buffers;
507    bool seen_shader_buffers[PIPE_SHADER_TYPES];
508    bool seen_image_buffers[PIPE_SHADER_TYPES];
509    bool seen_sampler_buffers[PIPE_SHADER_TYPES];
510 
511    unsigned max_vertex_buffers;
512    unsigned max_const_buffers;
513    unsigned max_shader_buffers;
514    unsigned max_images;
515    unsigned max_samplers;
516 
517    unsigned last, next, next_buf_list;
518 
519    /* The list fences that the driver should signal after the next flush.
520     * If this is empty, all driver command buffers have been flushed.
521     */
522    struct util_queue_fence *signal_fences_next_flush[TC_MAX_BUFFER_LISTS];
523    unsigned num_signal_fences_next_flush;
524 
525    /* Bound buffers are tracked here using threaded_resource::buffer_id_hash.
526     * 0 means unbound.
527     */
528    uint32_t vertex_buffers[PIPE_MAX_ATTRIBS];
529    uint32_t streamout_buffers[PIPE_MAX_SO_BUFFERS];
530    uint32_t const_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
531    uint32_t shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
532    uint32_t image_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
533    uint32_t shader_buffers_writeable_mask[PIPE_SHADER_TYPES];
534    uint64_t image_buffers_writeable_mask[PIPE_SHADER_TYPES];
535    uint32_t sampler_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
536 
537    struct tc_batch batch_slots[TC_MAX_BATCHES];
538    struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS];
539 };
540 
541 void threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage);
542 void threaded_resource_deinit(struct pipe_resource *res);
543 struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe);
544 void tc_driver_internal_flush_notify(struct threaded_context *tc);
545 
546 struct pipe_context *
547 threaded_context_create(struct pipe_context *pipe,
548                         struct slab_parent_pool *parent_transfer_pool,
549                         tc_replace_buffer_storage_func replace_buffer,
550                         const struct threaded_context_options *options,
551                         struct threaded_context **out);
552 
553 void
554 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor);
555 
556 void
557 threaded_context_flush(struct pipe_context *_pipe,
558                        struct tc_unflushed_batch_token *token,
559                        bool prefer_async);
560 
561 void
562 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
563             unsigned drawid_offset,
564             const struct pipe_draw_indirect_info *indirect,
565             const struct pipe_draw_start_count_bias *draws,
566             unsigned num_draws);
567 
568 static inline struct threaded_context *
threaded_context(struct pipe_context * pipe)569 threaded_context(struct pipe_context *pipe)
570 {
571    return (struct threaded_context*)pipe;
572 }
573 
574 static inline struct threaded_resource *
threaded_resource(struct pipe_resource * res)575 threaded_resource(struct pipe_resource *res)
576 {
577    return (struct threaded_resource*)res;
578 }
579 
580 static inline struct threaded_query *
threaded_query(struct pipe_query * q)581 threaded_query(struct pipe_query *q)
582 {
583    return (struct threaded_query*)q;
584 }
585 
586 static inline struct threaded_transfer *
threaded_transfer(struct pipe_transfer * transfer)587 threaded_transfer(struct pipe_transfer *transfer)
588 {
589    return (struct threaded_transfer*)transfer;
590 }
591 
592 static inline void
tc_unflushed_batch_token_reference(struct tc_unflushed_batch_token ** dst,struct tc_unflushed_batch_token * src)593 tc_unflushed_batch_token_reference(struct tc_unflushed_batch_token **dst,
594                                    struct tc_unflushed_batch_token *src)
595 {
596    if (pipe_reference((struct pipe_reference *)*dst, (struct pipe_reference *)src))
597       free(*dst);
598    *dst = src;
599 }
600 
601 /**
602  * Helper for !NDEBUG builds to assert that it is called from driver
603  * thread.  This is to help drivers ensure that various code-paths
604  * are not hit indirectly from pipe entry points that are called from
605  * front-end/state-tracker thread.
606  */
607 static inline void
tc_assert_driver_thread(struct threaded_context * tc)608 tc_assert_driver_thread(struct threaded_context *tc)
609 {
610    if (!tc)
611       return;
612 #ifndef NDEBUG
613    assert(util_thread_id_equal(tc->driver_thread, util_get_thread_id()));
614 #endif
615 }
616 
617 /**
618  * This is called before GPU stores to disable the CPU storage because
619  * the CPU storage doesn't mirror the GPU storage.
620  *
621  * Drivers should also call it before exporting a DMABUF of a buffer.
622  */
623 static inline void
tc_buffer_disable_cpu_storage(struct pipe_resource * buf)624 tc_buffer_disable_cpu_storage(struct pipe_resource *buf)
625 {
626    struct threaded_resource *tres = threaded_resource(buf);
627 
628    if (tres->cpu_storage) {
629       align_free(tres->cpu_storage);
630       tres->cpu_storage = NULL;
631    }
632    tres->allow_cpu_storage = false;
633 }
634 
635 static inline void
tc_buffer_write(struct pipe_context * pipe,struct pipe_resource * buf,unsigned offset,unsigned size,const void * data)636 tc_buffer_write(struct pipe_context *pipe,
637                 struct pipe_resource *buf,
638                 unsigned offset,
639                 unsigned size,
640                 const void *data)
641 {
642    pipe->buffer_subdata(pipe, buf, PIPE_MAP_WRITE | TC_TRANSFER_MAP_NO_INVALIDATE, offset, size, data);
643 }
644 
645 #ifdef __cplusplus
646 }
647 #endif
648 
649 #endif
650