• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "compiler/shader_info.h"
36 
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42 
43 #if TC_DEBUG >= 2
44 #define tc_printf mesa_logi
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52 
53 #define TC_SENTINEL 0x5ca1ab1e
54 
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59    TC_NUM_CALLS,
60 };
61 
62 #if TC_DEBUG >= 3
63 static const char *tc_call_names[] = {
64 #define CALL(name) #name,
65 #include "u_threaded_context_calls.h"
66 #undef CALL
67 };
68 #endif
69 
70 typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last);
71 
72 static const tc_execute execute_func[TC_NUM_CALLS];
73 
74 static void
75 tc_buffer_subdata(struct pipe_context *_pipe,
76                   struct pipe_resource *resource,
77                   unsigned usage, unsigned offset,
78                   unsigned size, const void *data);
79 
80 static void
tc_batch_check(UNUSED struct tc_batch * batch)81 tc_batch_check(UNUSED struct tc_batch *batch)
82 {
83    tc_assert(batch->sentinel == TC_SENTINEL);
84    tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
85 }
86 
87 static void
tc_debug_check(struct threaded_context * tc)88 tc_debug_check(struct threaded_context *tc)
89 {
90    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
91       tc_batch_check(&tc->batch_slots[i]);
92       tc_assert(tc->batch_slots[i].tc == tc);
93    }
94 }
95 
96 static void
tc_set_driver_thread(struct threaded_context * tc)97 tc_set_driver_thread(struct threaded_context *tc)
98 {
99 #ifndef NDEBUG
100    tc->driver_thread = util_get_thread_id();
101 #endif
102 }
103 
104 static void
tc_clear_driver_thread(struct threaded_context * tc)105 tc_clear_driver_thread(struct threaded_context *tc)
106 {
107 #ifndef NDEBUG
108    memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
109 #endif
110 }
111 
112 static void *
to_call_check(void * ptr,unsigned num_slots)113 to_call_check(void *ptr, unsigned num_slots)
114 {
115 #if TC_DEBUG >= 1
116    struct tc_call_base *call = ptr;
117    tc_assert(call->num_slots == num_slots);
118 #endif
119    return ptr;
120 }
121 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
122 
123 #define size_to_slots(size)      DIV_ROUND_UP(size, 8)
124 #define call_size(type)          size_to_slots(sizeof(struct type))
125 #define call_size_with_slots(type, num_slots) size_to_slots( \
126    sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
127 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
128 
129 /* Assign src to dst while dst is uninitialized. */
130 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)131 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
132 {
133    *dst = src;
134    pipe_reference(NULL, &src->reference); /* only increment refcount */
135 }
136 
137 /* Assign src to dst while dst is uninitialized. */
138 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)139 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
140                               struct pipe_vertex_state *src)
141 {
142    *dst = src;
143    pipe_reference(NULL, &src->reference); /* only increment refcount */
144 }
145 
146 /* Unreference dst but don't touch the dst pointer. */
147 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)148 tc_drop_resource_reference(struct pipe_resource *dst)
149 {
150    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
151       pipe_resource_destroy(dst);
152 }
153 
154 /* Unreference dst but don't touch the dst pointer. */
155 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)156 tc_drop_surface_reference(struct pipe_surface *dst)
157 {
158    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
159       dst->context->surface_destroy(dst->context, dst);
160 }
161 
162 /* Unreference dst but don't touch the dst pointer. */
163 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)164 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
165 {
166    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
167       dst->context->stream_output_target_destroy(dst->context, dst);
168 }
169 
170 /**
171  * Subtract the given number of references.
172  */
173 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)174 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
175 {
176    int count = p_atomic_add_return(&dst->reference.count, -num_refs);
177 
178    assert(count >= 0);
179    /* Underflows shouldn't happen, but let's be safe. */
180    if (count <= 0)
181       dst->screen->vertex_state_destroy(dst->screen, dst);
182 }
183 
184 /* We don't want to read or write min_index and max_index, because
185  * it shouldn't be needed by drivers at this point.
186  */
187 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
188    offsetof(struct pipe_draw_info, min_index)
189 
190 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)191 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
192 {
193    struct tc_batch *batch = job;
194    struct pipe_context *pipe = batch->tc->pipe;
195    uint64_t *last = &batch->slots[batch->num_total_slots];
196 
197    tc_batch_check(batch);
198    tc_set_driver_thread(batch->tc);
199 
200    assert(!batch->token);
201 
202    for (uint64_t *iter = batch->slots; iter != last;) {
203       struct tc_call_base *call = (struct tc_call_base *)iter;
204 
205       tc_assert(call->sentinel == TC_SENTINEL);
206 
207 #if TC_DEBUG >= 3
208       tc_printf("CALL: %s", tc_call_names[call->call_id]);
209 #endif
210 
211       iter += execute_func[call->call_id](pipe, call, last);
212    }
213 
214    /* Add the fence to the list of fences for the driver to signal at the next
215     * flush, which we use for tracking which buffers are referenced by
216     * an unflushed command buffer.
217     */
218    struct threaded_context *tc = batch->tc;
219    struct util_queue_fence *fence =
220       &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
221 
222    if (tc->options.driver_calls_flush_notify) {
223       tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
224 
225       /* Since our buffer lists are chained as a ring, we need to flush
226        * the context twice as we go around the ring to make the driver signal
227        * the buffer list fences, so that the producer thread can reuse the buffer
228        * list structures for the next batches without waiting.
229        */
230       unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
231       if (batch->buffer_list_index % half_ring == half_ring - 1)
232          pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
233    } else {
234       util_queue_fence_signal(fence);
235    }
236 
237    tc_clear_driver_thread(batch->tc);
238    tc_batch_check(batch);
239    batch->num_total_slots = 0;
240 }
241 
242 static void
tc_begin_next_buffer_list(struct threaded_context * tc)243 tc_begin_next_buffer_list(struct threaded_context *tc)
244 {
245    tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
246 
247    tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
248 
249    /* Clear the buffer list in the new empty batch. */
250    struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
251    assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
252    util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
253    BITSET_ZERO(buf_list->buffer_list);
254 
255    tc->add_all_gfx_bindings_to_buffer_list = true;
256    tc->add_all_compute_bindings_to_buffer_list = true;
257 }
258 
259 static void
tc_batch_flush(struct threaded_context * tc)260 tc_batch_flush(struct threaded_context *tc)
261 {
262    struct tc_batch *next = &tc->batch_slots[tc->next];
263 
264    tc_assert(next->num_total_slots != 0);
265    tc_batch_check(next);
266    tc_debug_check(tc);
267    tc->bytes_mapped_estimate = 0;
268    p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
269 
270    if (next->token) {
271       next->token->tc = NULL;
272       tc_unflushed_batch_token_reference(&next->token, NULL);
273    }
274 
275    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
276                       NULL, 0);
277    tc->last = tc->next;
278    tc->next = (tc->next + 1) % TC_MAX_BATCHES;
279    tc_begin_next_buffer_list(tc);
280 }
281 
282 /* This is the function that adds variable-sized calls into the current
283  * batch. It also flushes the batch if there is not enough space there.
284  * All other higher-level "add" functions use it.
285  */
286 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)287 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
288                   unsigned num_slots)
289 {
290    struct tc_batch *next = &tc->batch_slots[tc->next];
291    assert(num_slots <= TC_SLOTS_PER_BATCH);
292    tc_debug_check(tc);
293 
294    if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
295       tc_batch_flush(tc);
296       next = &tc->batch_slots[tc->next];
297       tc_assert(next->num_total_slots == 0);
298    }
299 
300    tc_assert(util_queue_fence_is_signalled(&next->fence));
301 
302    struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
303    next->num_total_slots += num_slots;
304 
305 #if !defined(NDEBUG) && TC_DEBUG >= 1
306    call->sentinel = TC_SENTINEL;
307 #endif
308    call->call_id = id;
309    call->num_slots = num_slots;
310 
311 #if TC_DEBUG >= 3
312    tc_printf("ENQUEUE: %s", tc_call_names[id]);
313 #endif
314 
315    tc_debug_check(tc);
316    return call;
317 }
318 
319 #define tc_add_call(tc, execute, type) \
320    ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
321 
322 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
323    ((struct type*)tc_add_sized_call(tc, execute, \
324                                     call_size_with_slots(type, num_slots)))
325 
326 static bool
tc_is_sync(struct threaded_context * tc)327 tc_is_sync(struct threaded_context *tc)
328 {
329    struct tc_batch *last = &tc->batch_slots[tc->last];
330    struct tc_batch *next = &tc->batch_slots[tc->next];
331 
332    return util_queue_fence_is_signalled(&last->fence) &&
333           !next->num_total_slots;
334 }
335 
336 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)337 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
338 {
339    struct tc_batch *last = &tc->batch_slots[tc->last];
340    struct tc_batch *next = &tc->batch_slots[tc->next];
341    bool synced = false;
342 
343    tc_debug_check(tc);
344 
345    /* Only wait for queued calls... */
346    if (!util_queue_fence_is_signalled(&last->fence)) {
347       util_queue_fence_wait(&last->fence);
348       synced = true;
349    }
350 
351    tc_debug_check(tc);
352 
353    if (next->token) {
354       next->token->tc = NULL;
355       tc_unflushed_batch_token_reference(&next->token, NULL);
356    }
357 
358    /* .. and execute unflushed calls directly. */
359    if (next->num_total_slots) {
360       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
361       tc->bytes_mapped_estimate = 0;
362       tc_batch_execute(next, NULL, 0);
363       tc_begin_next_buffer_list(tc);
364       synced = true;
365    }
366 
367    if (synced) {
368       p_atomic_inc(&tc->num_syncs);
369 
370       if (tc_strcmp(func, "tc_destroy") != 0) {
371          tc_printf("sync %s %s", func, info);
372 	  }
373    }
374 
375    tc_debug_check(tc);
376 }
377 
378 #define tc_sync(tc) _tc_sync(tc, "", __func__)
379 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
380 
381 /**
382  * Call this from fence_finish for same-context fence waits of deferred fences
383  * that haven't been flushed yet.
384  *
385  * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
386  * i.e., the wrapped one.
387  */
388 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)389 threaded_context_flush(struct pipe_context *_pipe,
390                        struct tc_unflushed_batch_token *token,
391                        bool prefer_async)
392 {
393    struct threaded_context *tc = threaded_context(_pipe);
394 
395    /* This is called from the gallium frontend / application thread. */
396    if (token->tc && token->tc == tc) {
397       struct tc_batch *last = &tc->batch_slots[tc->last];
398 
399       /* Prefer to do the flush in the driver thread if it is already
400        * running. That should be better for cache locality.
401        */
402       if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
403          tc_batch_flush(tc);
404       else
405          tc_sync(token->tc);
406    }
407 }
408 
409 /* Must be called before TC binds, maps, invalidates, or adds a buffer to a buffer list. */
tc_touch_buffer(struct threaded_context * tc,struct threaded_resource * buf)410 static void tc_touch_buffer(struct threaded_context *tc, struct threaded_resource *buf)
411 {
412    const struct threaded_context *first_user = buf->first_user;
413 
414    /* Fast path exit to avoid additional branches */
415    if (likely(first_user == tc))
416       return;
417 
418    if (!first_user)
419       first_user = p_atomic_cmpxchg_ptr(&buf->first_user, NULL, tc);
420 
421    /* The NULL check might seem unnecessary here but it's actually critical:
422     * p_atomic_cmpxchg will return NULL if it succeeds, meaning that NULL is
423     * equivalent to "we're the first user" here. (It's equally important not
424     * to ignore the result of the cmpxchg above, since it might fail.)
425     * Without the NULL check, we'd set the flag unconditionally, which is bad.
426     */
427    if (first_user && first_user != tc && !buf->used_by_multiple_contexts)
428       buf->used_by_multiple_contexts = true;
429 }
430 
tc_is_buffer_shared(struct threaded_resource * buf)431 static bool tc_is_buffer_shared(struct threaded_resource *buf)
432 {
433    return buf->is_shared || buf->used_by_multiple_contexts;
434 }
435 
436 static void
tc_add_to_buffer_list(struct threaded_context * tc,struct tc_buffer_list * next,struct pipe_resource * buf)437 tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next, struct pipe_resource *buf)
438 {
439    struct threaded_resource *tbuf = threaded_resource(buf);
440    tc_touch_buffer(tc, tbuf);
441 
442    uint32_t id = tbuf->buffer_id_unique;
443    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
444 }
445 
446 /* Set a buffer binding and add it to the buffer list. */
447 static void
tc_bind_buffer(struct threaded_context * tc,uint32_t * binding,struct tc_buffer_list * next,struct pipe_resource * buf)448 tc_bind_buffer(struct threaded_context *tc, uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
449 {
450    struct threaded_resource *tbuf = threaded_resource(buf);
451    tc_touch_buffer(tc, tbuf);
452 
453    uint32_t id = tbuf->buffer_id_unique;
454    *binding = id;
455    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
456 }
457 
458 /* Reset a buffer binding. */
459 static void
tc_unbind_buffer(uint32_t * binding)460 tc_unbind_buffer(uint32_t *binding)
461 {
462    *binding = 0;
463 }
464 
465 /* Reset a range of buffer binding slots. */
466 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)467 tc_unbind_buffers(uint32_t *binding, unsigned count)
468 {
469    if (count)
470       memset(binding, 0, sizeof(*binding) * count);
471 }
472 
473 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)474 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
475                                unsigned count)
476 {
477    for (unsigned i = 0; i < count; i++) {
478       if (bindings[i])
479          BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
480    }
481 }
482 
483 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)484 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
485                    unsigned count)
486 {
487    unsigned rebind_count = 0;
488 
489    for (unsigned i = 0; i < count; i++) {
490       if (bindings[i] == old_id) {
491          bindings[i] = new_id;
492          rebind_count++;
493       }
494    }
495    return rebind_count;
496 }
497 
498 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)499 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
500                                       BITSET_WORD *buffer_list,
501                                       enum pipe_shader_type shader)
502 {
503    tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
504                                   tc->max_const_buffers);
505    if (tc->seen_shader_buffers[shader]) {
506       tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
507                                      tc->max_shader_buffers);
508    }
509    if (tc->seen_image_buffers[shader]) {
510       tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
511                                      tc->max_images);
512    }
513    if (tc->seen_sampler_buffers[shader]) {
514       tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
515                                      tc->max_samplers);
516    }
517 }
518 
519 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)520 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
521                           uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
522 {
523    unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
524 
525    ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
526                             tc->max_const_buffers);
527    if (ubo)
528       *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
529    if (tc->seen_shader_buffers[shader]) {
530       ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
531                                 tc->max_shader_buffers);
532       if (ssbo)
533          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
534    }
535    if (tc->seen_image_buffers[shader]) {
536       img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
537                                tc->max_images);
538       if (img)
539          *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
540    }
541    if (tc->seen_sampler_buffers[shader]) {
542       sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
543                                    tc->max_samplers);
544       if (sampler)
545          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
546    }
547    return ubo + ssbo + img + sampler;
548 }
549 
550 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
551  * This is called by the first draw call in a batch when we want to inherit
552  * all bindings set by the previous batch.
553  */
554 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)555 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
556 {
557    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
558 
559    tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
560    if (tc->seen_streamout_buffers)
561       tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
562 
563    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
564    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
565 
566    if (tc->seen_tcs)
567       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
568    if (tc->seen_tes)
569       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
570    if (tc->seen_gs)
571       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
572 
573    tc->add_all_gfx_bindings_to_buffer_list = false;
574 }
575 
576 /* Add all bound buffers used by compute to the buffer list.
577  * This is called by the first compute call in a batch when we want to inherit
578  * all bindings set by the previous batch.
579  */
580 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)581 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
582 {
583    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
584 
585    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
586    tc->add_all_compute_bindings_to_buffer_list = false;
587 }
588 
589 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)590 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
591 {
592    unsigned vbo = 0, so = 0;
593 
594    vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
595                             tc->max_vertex_buffers);
596    if (vbo)
597       *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
598 
599    if (tc->seen_streamout_buffers) {
600       so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
601                               PIPE_MAX_SO_BUFFERS);
602       if (so)
603          *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
604    }
605    unsigned rebound = vbo + so;
606 
607    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
608    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
609 
610    if (tc->seen_tcs)
611       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
612    if (tc->seen_tes)
613       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
614    if (tc->seen_gs)
615       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
616 
617    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
618 
619    if (rebound)
620       BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
621    return rebound;
622 }
623 
624 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)625 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
626 {
627    while (binding_mask) {
628       if (bindings[u_bit_scan(&binding_mask)] == id)
629          return true;
630    }
631    return false;
632 }
633 
634 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)635 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
636                                     enum pipe_shader_type shader)
637 {
638    if (tc->seen_shader_buffers[shader] &&
639        tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
640                                     tc->shader_buffers_writeable_mask[shader]))
641       return true;
642 
643    if (tc->seen_image_buffers[shader] &&
644        tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
645                                     tc->image_buffers_writeable_mask[shader]))
646       return true;
647 
648    return false;
649 }
650 
651 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)652 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
653 {
654    if (tc->seen_streamout_buffers &&
655        tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
656                                     BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
657       return true;
658 
659    if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
660        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
661        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
662       return true;
663 
664    if (tc->seen_tcs &&
665        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
666       return true;
667 
668    if (tc->seen_tes &&
669        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
670       return true;
671 
672    if (tc->seen_gs &&
673        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
674       return true;
675 
676    return false;
677 }
678 
679 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)680 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
681                   unsigned map_usage)
682 {
683    if (!tc->options.is_resource_busy)
684       return true;
685 
686    uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
687 
688    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
689       struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
690 
691       /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
692        * then the buffer is considered busy. */
693       if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
694           BITSET_TEST(buf_list->buffer_list, id_hash))
695          return true;
696    }
697 
698    /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
699     * this buffer is busy or not. */
700    return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
701 }
702 
703 /**
704  * allow_cpu_storage should be false for user memory and imported buffers.
705  */
706 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)707 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
708 {
709    struct threaded_resource *tres = threaded_resource(res);
710 
711    tres->first_user = NULL;
712    tres->used_by_multiple_contexts = false;
713    tres->latest = &tres->b;
714    tres->cpu_storage = NULL;
715    util_range_init(&tres->valid_buffer_range);
716    tres->is_shared = false;
717    tres->is_user_ptr = false;
718    tres->buffer_id_unique = 0;
719    tres->pending_staging_uploads = 0;
720    util_range_init(&tres->pending_staging_uploads_range);
721 
722    if (allow_cpu_storage &&
723        !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
724                        PIPE_RESOURCE_FLAG_SPARSE |
725                        PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
726        /* We need buffer invalidation and buffer busyness tracking for the CPU
727         * storage, which aren't supported with pipe_vertex_state. */
728        !(res->bind & PIPE_BIND_VERTEX_STATE))
729       tres->allow_cpu_storage = true;
730    else
731       tres->allow_cpu_storage = false;
732 }
733 
734 void
threaded_resource_deinit(struct pipe_resource * res)735 threaded_resource_deinit(struct pipe_resource *res)
736 {
737    struct threaded_resource *tres = threaded_resource(res);
738 
739    if (tres->latest != &tres->b)
740            pipe_resource_reference(&tres->latest, NULL);
741    util_range_destroy(&tres->valid_buffer_range);
742    util_range_destroy(&tres->pending_staging_uploads_range);
743    align_free(tres->cpu_storage);
744 }
745 
746 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)747 threaded_context_unwrap_sync(struct pipe_context *pipe)
748 {
749    if (!pipe || !pipe->priv)
750       return pipe;
751 
752    tc_sync(threaded_context(pipe));
753    return (struct pipe_context*)pipe->priv;
754 }
755 
756 
757 /********************************************************************
758  * simple functions
759  */
760 
761 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
762    struct tc_call_##func { \
763       struct tc_call_base base; \
764       type state; \
765    }; \
766    \
767    static uint16_t \
768    tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \
769    { \
770       pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
771       return call_size(tc_call_##func); \
772    } \
773    \
774    static void \
775    tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
776    { \
777       struct threaded_context *tc = threaded_context(_pipe); \
778       struct tc_call_##func *p = (struct tc_call_##func*) \
779                      tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
780       p->state = deref(param); \
781       __VA_ARGS__; \
782    }
783 
784 TC_FUNC1(set_active_query_state, , bool, , )
785 
786 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
787 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
788 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
789 TC_FUNC1(set_sample_mask, , unsigned, , )
790 TC_FUNC1(set_min_samples, , unsigned, , )
791 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
792 
793 TC_FUNC1(texture_barrier, , unsigned, , )
794 TC_FUNC1(memory_barrier, , unsigned, , )
795 TC_FUNC1(delete_texture_handle, , uint64_t, , )
796 TC_FUNC1(delete_image_handle, , uint64_t, , )
797 TC_FUNC1(set_frontend_noop, , bool, , )
798 
799 
800 /********************************************************************
801  * queries
802  */
803 
804 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)805 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
806                 unsigned index)
807 {
808    struct threaded_context *tc = threaded_context(_pipe);
809    struct pipe_context *pipe = tc->pipe;
810 
811    return pipe->create_query(pipe, query_type, index);
812 }
813 
814 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)815 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
816                       unsigned *query_types)
817 {
818    struct threaded_context *tc = threaded_context(_pipe);
819    struct pipe_context *pipe = tc->pipe;
820 
821    return pipe->create_batch_query(pipe, num_queries, query_types);
822 }
823 
824 struct tc_query_call {
825    struct tc_call_base base;
826    struct pipe_query *query;
827 };
828 
829 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call,uint64_t * last)830 tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last)
831 {
832    struct pipe_query *query = to_call(call, tc_query_call)->query;
833    struct threaded_query *tq = threaded_query(query);
834 
835    if (list_is_linked(&tq->head_unflushed))
836       list_del(&tq->head_unflushed);
837 
838    pipe->destroy_query(pipe, query);
839    return call_size(tc_query_call);
840 }
841 
842 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)843 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
844 {
845    struct threaded_context *tc = threaded_context(_pipe);
846 
847    tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
848 }
849 
850 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call,uint64_t * last)851 tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last)
852 {
853    pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
854    return call_size(tc_query_call);
855 }
856 
857 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)858 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
859 {
860    struct threaded_context *tc = threaded_context(_pipe);
861 
862    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
863    return true; /* we don't care about the return value for this call */
864 }
865 
866 struct tc_end_query_call {
867    struct tc_call_base base;
868    struct threaded_context *tc;
869    struct pipe_query *query;
870 };
871 
872 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call,uint64_t * last)873 tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last)
874 {
875    struct tc_end_query_call *p = to_call(call, tc_end_query_call);
876    struct threaded_query *tq = threaded_query(p->query);
877 
878    if (!list_is_linked(&tq->head_unflushed))
879       list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
880 
881    pipe->end_query(pipe, p->query);
882    return call_size(tc_end_query_call);
883 }
884 
885 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)886 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
887 {
888    struct threaded_context *tc = threaded_context(_pipe);
889    struct threaded_query *tq = threaded_query(query);
890    struct tc_end_query_call *call =
891       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
892 
893    call->tc = tc;
894    call->query = query;
895 
896    tq->flushed = false;
897 
898    return true; /* we don't care about the return value for this call */
899 }
900 
901 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)902 tc_get_query_result(struct pipe_context *_pipe,
903                     struct pipe_query *query, bool wait,
904                     union pipe_query_result *result)
905 {
906    struct threaded_context *tc = threaded_context(_pipe);
907    struct threaded_query *tq = threaded_query(query);
908    struct pipe_context *pipe = tc->pipe;
909    bool flushed = tq->flushed;
910 
911    if (!flushed) {
912       tc_sync_msg(tc, wait ? "wait" : "nowait");
913       tc_set_driver_thread(tc);
914    }
915 
916    bool success = pipe->get_query_result(pipe, query, wait, result);
917 
918    if (!flushed)
919       tc_clear_driver_thread(tc);
920 
921    if (success) {
922       tq->flushed = true;
923       if (list_is_linked(&tq->head_unflushed)) {
924          /* This is safe because it can only happen after we sync'd. */
925          list_del(&tq->head_unflushed);
926       }
927    }
928    return success;
929 }
930 
931 struct tc_query_result_resource {
932    struct tc_call_base base;
933    enum pipe_query_flags flags:8;
934    enum pipe_query_value_type result_type:8;
935    int8_t index; /* it can be -1 */
936    unsigned offset;
937    struct pipe_query *query;
938    struct pipe_resource *resource;
939 };
940 
941 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call,uint64_t * last)942 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last)
943 {
944    struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
945 
946    pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
947                                    p->index, p->resource, p->offset);
948    tc_drop_resource_reference(p->resource);
949    return call_size(tc_query_result_resource);
950 }
951 
952 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)953 tc_get_query_result_resource(struct pipe_context *_pipe,
954                              struct pipe_query *query,
955                              enum pipe_query_flags flags,
956                              enum pipe_query_value_type result_type, int index,
957                              struct pipe_resource *resource, unsigned offset)
958 {
959    struct threaded_context *tc = threaded_context(_pipe);
960 
961    tc_buffer_disable_cpu_storage(resource);
962 
963    struct tc_query_result_resource *p =
964       tc_add_call(tc, TC_CALL_get_query_result_resource,
965                   tc_query_result_resource);
966    p->query = query;
967    p->flags = flags;
968    p->result_type = result_type;
969    p->index = index;
970    tc_set_resource_reference(&p->resource, resource);
971    tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource);
972    p->offset = offset;
973 }
974 
975 struct tc_render_condition {
976    struct tc_call_base base;
977    bool condition;
978    unsigned mode;
979    struct pipe_query *query;
980 };
981 
982 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call,uint64_t * last)983 tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last)
984 {
985    struct tc_render_condition *p = to_call(call, tc_render_condition);
986    pipe->render_condition(pipe, p->query, p->condition, p->mode);
987    return call_size(tc_render_condition);
988 }
989 
990 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)991 tc_render_condition(struct pipe_context *_pipe,
992                     struct pipe_query *query, bool condition,
993                     enum pipe_render_cond_flag mode)
994 {
995    struct threaded_context *tc = threaded_context(_pipe);
996    struct tc_render_condition *p =
997       tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
998 
999    p->query = query;
1000    p->condition = condition;
1001    p->mode = mode;
1002 }
1003 
1004 
1005 /********************************************************************
1006  * constant (immutable) states
1007  */
1008 
1009 #define TC_CSO_CREATE(name, sname) \
1010    static void * \
1011    tc_create_##name##_state(struct pipe_context *_pipe, \
1012                             const struct pipe_##sname##_state *state) \
1013    { \
1014       struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1015       return pipe->create_##name##_state(pipe, state); \
1016    }
1017 
1018 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1019 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1020 
1021 #define TC_CSO(name, sname, ...) \
1022    TC_CSO_CREATE(name, sname) \
1023    TC_CSO_BIND(name, ##__VA_ARGS__) \
1024    TC_CSO_DELETE(name)
1025 
1026 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1027 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1028 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1029 
1030 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1031 TC_CSO_WHOLE(rasterizer)
1032 TC_CSO_WHOLE(depth_stencil_alpha)
1033 TC_CSO_WHOLE(compute)
1034 TC_CSO_SHADER(fs)
1035 TC_CSO_SHADER(vs)
1036 TC_CSO_SHADER_TRACK(gs)
1037 TC_CSO_SHADER_TRACK(tcs)
1038 TC_CSO_SHADER_TRACK(tes)
1039 TC_CSO_CREATE(sampler, sampler)
1040 TC_CSO_DELETE(sampler)
1041 TC_CSO_BIND(vertex_elements)
1042 TC_CSO_DELETE(vertex_elements)
1043 
1044 static void *
1045 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1046                                 const struct pipe_vertex_element *elems)
1047 {
1048    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1049 
1050    return pipe->create_vertex_elements_state(pipe, count, elems);
1051 }
1052 
1053 struct tc_sampler_states {
1054    struct tc_call_base base;
1055    ubyte shader, start, count;
1056    void *slot[0]; /* more will be allocated if needed */
1057 };
1058 
1059 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call,uint64_t * last)1060 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last)
1061 {
1062    struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1063 
1064    pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1065    return p->base.num_slots;
1066 }
1067 
1068 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1069 tc_bind_sampler_states(struct pipe_context *_pipe,
1070                        enum pipe_shader_type shader,
1071                        unsigned start, unsigned count, void **states)
1072 {
1073    if (!count)
1074       return;
1075 
1076    struct threaded_context *tc = threaded_context(_pipe);
1077    struct tc_sampler_states *p =
1078       tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1079 
1080    p->shader = shader;
1081    p->start = start;
1082    p->count = count;
1083    memcpy(p->slot, states, count * sizeof(states[0]));
1084 }
1085 
1086 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1087 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1088 {
1089    struct threaded_context *tc = threaded_context(_pipe);
1090    tc->pipe->link_shader(tc->pipe, shaders);
1091 }
1092 /********************************************************************
1093  * immediate states
1094  */
1095 
1096 struct tc_framebuffer {
1097    struct tc_call_base base;
1098    struct pipe_framebuffer_state state;
1099 };
1100 
1101 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call,uint64_t * last)1102 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last)
1103 {
1104    struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1105 
1106    pipe->set_framebuffer_state(pipe, p);
1107 
1108    unsigned nr_cbufs = p->nr_cbufs;
1109    for (unsigned i = 0; i < nr_cbufs; i++)
1110       tc_drop_surface_reference(p->cbufs[i]);
1111    tc_drop_surface_reference(p->zsbuf);
1112    return call_size(tc_framebuffer);
1113 }
1114 
1115 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1116 tc_set_framebuffer_state(struct pipe_context *_pipe,
1117                          const struct pipe_framebuffer_state *fb)
1118 {
1119    struct threaded_context *tc = threaded_context(_pipe);
1120    struct tc_framebuffer *p =
1121       tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1122    unsigned nr_cbufs = fb->nr_cbufs;
1123 
1124    p->state.width = fb->width;
1125    p->state.height = fb->height;
1126    p->state.samples = fb->samples;
1127    p->state.layers = fb->layers;
1128    p->state.nr_cbufs = nr_cbufs;
1129 
1130    for (unsigned i = 0; i < nr_cbufs; i++) {
1131       p->state.cbufs[i] = NULL;
1132       pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1133    }
1134    p->state.zsbuf = NULL;
1135    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1136 }
1137 
1138 struct tc_tess_state {
1139    struct tc_call_base base;
1140    float state[6];
1141 };
1142 
1143 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call,uint64_t * last)1144 tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last)
1145 {
1146    float *p = to_call(call, tc_tess_state)->state;
1147 
1148    pipe->set_tess_state(pipe, p, p + 4);
1149    return call_size(tc_tess_state);
1150 }
1151 
1152 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1153 tc_set_tess_state(struct pipe_context *_pipe,
1154                   const float default_outer_level[4],
1155                   const float default_inner_level[2])
1156 {
1157    struct threaded_context *tc = threaded_context(_pipe);
1158    float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1159 
1160    memcpy(p, default_outer_level, 4 * sizeof(float));
1161    memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1162 }
1163 
1164 struct tc_patch_vertices {
1165    struct tc_call_base base;
1166    ubyte patch_vertices;
1167 };
1168 
1169 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call,uint64_t * last)1170 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last)
1171 {
1172    uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1173 
1174    pipe->set_patch_vertices(pipe, patch_vertices);
1175    return call_size(tc_patch_vertices);
1176 }
1177 
1178 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1179 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1180 {
1181    struct threaded_context *tc = threaded_context(_pipe);
1182 
1183    tc_add_call(tc, TC_CALL_set_patch_vertices,
1184                tc_patch_vertices)->patch_vertices = patch_vertices;
1185 }
1186 
1187 struct tc_constant_buffer_base {
1188    struct tc_call_base base;
1189    ubyte shader, index;
1190    bool is_null;
1191 };
1192 
1193 struct tc_constant_buffer {
1194    struct tc_constant_buffer_base base;
1195    struct pipe_constant_buffer cb;
1196 };
1197 
1198 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call,uint64_t * last)1199 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
1200 {
1201    struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1202 
1203    if (unlikely(p->base.is_null)) {
1204       pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1205       return call_size(tc_constant_buffer_base);
1206    }
1207 
1208    pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1209    return call_size(tc_constant_buffer);
1210 }
1211 
1212 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1213 tc_set_constant_buffer(struct pipe_context *_pipe,
1214                        enum pipe_shader_type shader, uint index,
1215                        bool take_ownership,
1216                        const struct pipe_constant_buffer *cb)
1217 {
1218    struct threaded_context *tc = threaded_context(_pipe);
1219 
1220    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1221       struct tc_constant_buffer_base *p =
1222          tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1223       p->shader = shader;
1224       p->index = index;
1225       p->is_null = true;
1226       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1227       return;
1228    }
1229 
1230    struct pipe_resource *buffer;
1231    unsigned offset;
1232 
1233    if (cb->user_buffer) {
1234       /* This must be done before adding set_constant_buffer, because it could
1235        * generate e.g. transfer_unmap and flush partially-uninitialized
1236        * set_constant_buffer to the driver if it was done afterwards.
1237        */
1238       buffer = NULL;
1239       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1240                     tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1241       u_upload_unmap(tc->base.const_uploader);
1242       take_ownership = true;
1243    } else {
1244       buffer = cb->buffer;
1245       offset = cb->buffer_offset;
1246    }
1247 
1248    struct tc_constant_buffer *p =
1249       tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1250    p->base.shader = shader;
1251    p->base.index = index;
1252    p->base.is_null = false;
1253    p->cb.user_buffer = NULL;
1254    p->cb.buffer_offset = offset;
1255    p->cb.buffer_size = cb->buffer_size;
1256 
1257    if (take_ownership)
1258       p->cb.buffer = buffer;
1259    else
1260       tc_set_resource_reference(&p->cb.buffer, buffer);
1261 
1262    if (buffer) {
1263       tc_bind_buffer(tc, &tc->const_buffers[shader][index],
1264                      &tc->buffer_lists[tc->next_buf_list], buffer);
1265    } else {
1266       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1267    }
1268 }
1269 
1270 struct tc_inlinable_constants {
1271    struct tc_call_base base;
1272    ubyte shader;
1273    ubyte num_values;
1274    uint32_t values[MAX_INLINABLE_UNIFORMS];
1275 };
1276 
1277 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call,uint64_t * last)1278 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last)
1279 {
1280    struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1281 
1282    pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1283    return call_size(tc_inlinable_constants);
1284 }
1285 
1286 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1287 tc_set_inlinable_constants(struct pipe_context *_pipe,
1288                            enum pipe_shader_type shader,
1289                            uint num_values, uint32_t *values)
1290 {
1291    struct threaded_context *tc = threaded_context(_pipe);
1292    struct tc_inlinable_constants *p =
1293       tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1294    p->shader = shader;
1295    p->num_values = num_values;
1296    memcpy(p->values, values, num_values * 4);
1297 }
1298 
1299 struct tc_sample_locations {
1300    struct tc_call_base base;
1301    uint16_t size;
1302    uint8_t slot[0];
1303 };
1304 
1305 
1306 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call,uint64_t * last)1307 tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last)
1308 {
1309    struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1310 
1311    pipe->set_sample_locations(pipe, p->size, p->slot);
1312    return p->base.num_slots;
1313 }
1314 
1315 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1316 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1317 {
1318    struct threaded_context *tc = threaded_context(_pipe);
1319    struct tc_sample_locations *p =
1320       tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1321                              tc_sample_locations, size);
1322 
1323    p->size = size;
1324    memcpy(p->slot, locations, size);
1325 }
1326 
1327 struct tc_scissors {
1328    struct tc_call_base base;
1329    ubyte start, count;
1330    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1331 };
1332 
1333 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call,uint64_t * last)1334 tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last)
1335 {
1336    struct tc_scissors *p = (struct tc_scissors *)call;
1337 
1338    pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1339    return p->base.num_slots;
1340 }
1341 
1342 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1343 tc_set_scissor_states(struct pipe_context *_pipe,
1344                       unsigned start, unsigned count,
1345                       const struct pipe_scissor_state *states)
1346 {
1347    struct threaded_context *tc = threaded_context(_pipe);
1348    struct tc_scissors *p =
1349       tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1350 
1351    p->start = start;
1352    p->count = count;
1353    memcpy(&p->slot, states, count * sizeof(states[0]));
1354 }
1355 
1356 struct tc_viewports {
1357    struct tc_call_base base;
1358    ubyte start, count;
1359    struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1360 };
1361 
1362 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call,uint64_t * last)1363 tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last)
1364 {
1365    struct tc_viewports *p = (struct tc_viewports *)call;
1366 
1367    pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1368    return p->base.num_slots;
1369 }
1370 
1371 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1372 tc_set_viewport_states(struct pipe_context *_pipe,
1373                        unsigned start, unsigned count,
1374                        const struct pipe_viewport_state *states)
1375 {
1376    if (!count)
1377       return;
1378 
1379    struct threaded_context *tc = threaded_context(_pipe);
1380    struct tc_viewports *p =
1381       tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1382 
1383    p->start = start;
1384    p->count = count;
1385    memcpy(&p->slot, states, count * sizeof(states[0]));
1386 }
1387 
1388 struct tc_window_rects {
1389    struct tc_call_base base;
1390    bool include;
1391    ubyte count;
1392    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1393 };
1394 
1395 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call,uint64_t * last)1396 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last)
1397 {
1398    struct tc_window_rects *p = (struct tc_window_rects *)call;
1399 
1400    pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1401    return p->base.num_slots;
1402 }
1403 
1404 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1405 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1406                          unsigned count,
1407                          const struct pipe_scissor_state *rects)
1408 {
1409    struct threaded_context *tc = threaded_context(_pipe);
1410    struct tc_window_rects *p =
1411       tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1412 
1413    p->include = include;
1414    p->count = count;
1415    memcpy(p->slot, rects, count * sizeof(rects[0]));
1416 }
1417 
1418 struct tc_sampler_views {
1419    struct tc_call_base base;
1420    ubyte shader, start, count, unbind_num_trailing_slots;
1421    struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1422 };
1423 
1424 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call,uint64_t * last)1425 tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last)
1426 {
1427    struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1428 
1429    pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1430                            p->unbind_num_trailing_slots, true, p->slot);
1431    return p->base.num_slots;
1432 }
1433 
1434 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1435 tc_set_sampler_views(struct pipe_context *_pipe,
1436                      enum pipe_shader_type shader,
1437                      unsigned start, unsigned count,
1438                      unsigned unbind_num_trailing_slots, bool take_ownership,
1439                      struct pipe_sampler_view **views)
1440 {
1441    if (!count && !unbind_num_trailing_slots)
1442       return;
1443 
1444    struct threaded_context *tc = threaded_context(_pipe);
1445    struct tc_sampler_views *p =
1446       tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1447                              views ? count : 0);
1448 
1449    p->shader = shader;
1450    p->start = start;
1451 
1452    if (views) {
1453       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1454 
1455       p->count = count;
1456       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1457 
1458       if (take_ownership) {
1459          memcpy(p->slot, views, sizeof(*views) * count);
1460 
1461          for (unsigned i = 0; i < count; i++) {
1462             if (views[i] && views[i]->target == PIPE_BUFFER) {
1463                tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next,
1464                               views[i]->texture);
1465             } else {
1466                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1467             }
1468          }
1469       } else {
1470          for (unsigned i = 0; i < count; i++) {
1471             p->slot[i] = NULL;
1472             pipe_sampler_view_reference(&p->slot[i], views[i]);
1473 
1474             if (views[i] && views[i]->target == PIPE_BUFFER) {
1475                tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next,
1476                               views[i]->texture);
1477             } else {
1478                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1479             }
1480          }
1481       }
1482 
1483       tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1484                         unbind_num_trailing_slots);
1485       tc->seen_sampler_buffers[shader] = true;
1486    } else {
1487       p->count = 0;
1488       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1489 
1490       tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1491                         count + unbind_num_trailing_slots);
1492    }
1493 }
1494 
1495 struct tc_shader_images {
1496    struct tc_call_base base;
1497    ubyte shader, start, count;
1498    ubyte unbind_num_trailing_slots;
1499    struct pipe_image_view slot[0]; /* more will be allocated if needed */
1500 };
1501 
1502 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call,uint64_t * last)1503 tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last)
1504 {
1505    struct tc_shader_images *p = (struct tc_shader_images *)call;
1506    unsigned count = p->count;
1507 
1508    if (!p->count) {
1509       pipe->set_shader_images(pipe, p->shader, p->start, 0,
1510                               p->unbind_num_trailing_slots, NULL);
1511       return call_size(tc_shader_images);
1512    }
1513 
1514    pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1515                            p->unbind_num_trailing_slots, p->slot);
1516 
1517    for (unsigned i = 0; i < count; i++)
1518       tc_drop_resource_reference(p->slot[i].resource);
1519 
1520    return p->base.num_slots;
1521 }
1522 
1523 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1524 tc_set_shader_images(struct pipe_context *_pipe,
1525                      enum pipe_shader_type shader,
1526                      unsigned start, unsigned count,
1527                      unsigned unbind_num_trailing_slots,
1528                      const struct pipe_image_view *images)
1529 {
1530    if (!count && !unbind_num_trailing_slots)
1531       return;
1532 
1533    struct threaded_context *tc = threaded_context(_pipe);
1534    struct tc_shader_images *p =
1535       tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1536                              images ? count : 0);
1537    unsigned writable_buffers = 0;
1538 
1539    p->shader = shader;
1540    p->start = start;
1541 
1542    if (images) {
1543       p->count = count;
1544       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1545 
1546       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1547 
1548       for (unsigned i = 0; i < count; i++) {
1549          struct pipe_resource *resource = images[i].resource;
1550 
1551          tc_set_resource_reference(&p->slot[i].resource, resource);
1552 
1553          if (resource && resource->target == PIPE_BUFFER) {
1554             tc_bind_buffer(tc, &tc->image_buffers[shader][start + i], next, resource);
1555 
1556             if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1557                struct threaded_resource *tres = threaded_resource(resource);
1558 
1559                tc_buffer_disable_cpu_storage(resource);
1560                util_range_add(&tres->b, &tres->valid_buffer_range,
1561                               images[i].u.buf.offset,
1562                               images[i].u.buf.offset + images[i].u.buf.size);
1563                writable_buffers |= BITFIELD_BIT(start + i);
1564             }
1565          } else {
1566             tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1567          }
1568       }
1569       memcpy(p->slot, images, count * sizeof(images[0]));
1570 
1571       tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1572                         unbind_num_trailing_slots);
1573       tc->seen_image_buffers[shader] = true;
1574    } else {
1575       p->count = 0;
1576       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1577 
1578       tc_unbind_buffers(&tc->image_buffers[shader][start],
1579                         count + unbind_num_trailing_slots);
1580    }
1581 
1582    tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1583    tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1584 }
1585 
1586 struct tc_shader_buffers {
1587    struct tc_call_base base;
1588    ubyte shader, start, count;
1589    bool unbind;
1590    unsigned writable_bitmask;
1591    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1592 };
1593 
1594 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1595 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1596 {
1597    struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1598    unsigned count = p->count;
1599 
1600    if (p->unbind) {
1601       pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1602       return call_size(tc_shader_buffers);
1603    }
1604 
1605    pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1606                             p->writable_bitmask);
1607 
1608    for (unsigned i = 0; i < count; i++)
1609       tc_drop_resource_reference(p->slot[i].buffer);
1610 
1611    return p->base.num_slots;
1612 }
1613 
1614 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)1615 tc_set_shader_buffers(struct pipe_context *_pipe,
1616                       enum pipe_shader_type shader,
1617                       unsigned start, unsigned count,
1618                       const struct pipe_shader_buffer *buffers,
1619                       unsigned writable_bitmask)
1620 {
1621    if (!count)
1622       return;
1623 
1624    struct threaded_context *tc = threaded_context(_pipe);
1625    struct tc_shader_buffers *p =
1626       tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1627                              buffers ? count : 0);
1628 
1629    p->shader = shader;
1630    p->start = start;
1631    p->count = count;
1632    p->unbind = buffers == NULL;
1633    p->writable_bitmask = writable_bitmask;
1634 
1635    if (buffers) {
1636       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1637 
1638       for (unsigned i = 0; i < count; i++) {
1639          struct pipe_shader_buffer *dst = &p->slot[i];
1640          const struct pipe_shader_buffer *src = buffers + i;
1641 
1642          tc_set_resource_reference(&dst->buffer, src->buffer);
1643          dst->buffer_offset = src->buffer_offset;
1644          dst->buffer_size = src->buffer_size;
1645 
1646          if (src->buffer) {
1647             struct threaded_resource *tres = threaded_resource(src->buffer);
1648 
1649             tc_bind_buffer(tc, &tc->shader_buffers[shader][start + i], next, &tres->b);
1650 
1651             if (writable_bitmask & BITFIELD_BIT(i)) {
1652                tc_buffer_disable_cpu_storage(src->buffer);
1653                util_range_add(&tres->b, &tres->valid_buffer_range,
1654                               src->buffer_offset,
1655                               src->buffer_offset + src->buffer_size);
1656             }
1657          } else {
1658             tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
1659          }
1660       }
1661       tc->seen_shader_buffers[shader] = true;
1662    } else {
1663       tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
1664    }
1665 
1666    tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1667    tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
1668 }
1669 
1670 struct tc_vertex_buffers {
1671    struct tc_call_base base;
1672    ubyte start, count;
1673    ubyte unbind_num_trailing_slots;
1674    struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
1675 };
1676 
1677 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1678 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1679 {
1680    struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
1681    unsigned count = p->count;
1682 
1683    if (!count) {
1684       pipe->set_vertex_buffers(pipe, p->start, 0,
1685                                p->unbind_num_trailing_slots, false, NULL);
1686       return call_size(tc_vertex_buffers);
1687    }
1688 
1689    for (unsigned i = 0; i < count; i++)
1690       tc_assert(!p->slot[i].is_user_buffer);
1691 
1692    pipe->set_vertex_buffers(pipe, p->start, count,
1693                             p->unbind_num_trailing_slots, true, p->slot);
1694    return p->base.num_slots;
1695 }
1696 
1697 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,const struct pipe_vertex_buffer * buffers)1698 tc_set_vertex_buffers(struct pipe_context *_pipe,
1699                       unsigned start, unsigned count,
1700                       unsigned unbind_num_trailing_slots,
1701                       bool take_ownership,
1702                       const struct pipe_vertex_buffer *buffers)
1703 {
1704    struct threaded_context *tc = threaded_context(_pipe);
1705 
1706    if (!count && !unbind_num_trailing_slots)
1707       return;
1708 
1709    if (count && buffers) {
1710       struct tc_vertex_buffers *p =
1711          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
1712       p->start = start;
1713       p->count = count;
1714       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1715 
1716       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1717 
1718       if (take_ownership) {
1719          memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
1720 
1721          for (unsigned i = 0; i < count; i++) {
1722             struct pipe_resource *buf = buffers[i].buffer.resource;
1723 
1724             if (buf) {
1725                tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf);
1726             } else {
1727                tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1728             }
1729          }
1730       } else {
1731          for (unsigned i = 0; i < count; i++) {
1732             struct pipe_vertex_buffer *dst = &p->slot[i];
1733             const struct pipe_vertex_buffer *src = buffers + i;
1734             struct pipe_resource *buf = src->buffer.resource;
1735 
1736             tc_assert(!src->is_user_buffer);
1737             dst->stride = src->stride;
1738             dst->is_user_buffer = false;
1739             tc_set_resource_reference(&dst->buffer.resource, buf);
1740             dst->buffer_offset = src->buffer_offset;
1741 
1742             if (buf) {
1743                tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf);
1744             } else {
1745                tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1746             }
1747          }
1748       }
1749 
1750       tc_unbind_buffers(&tc->vertex_buffers[start + count],
1751                         unbind_num_trailing_slots);
1752    } else {
1753       struct tc_vertex_buffers *p =
1754          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
1755       p->start = start;
1756       p->count = 0;
1757       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1758 
1759       tc_unbind_buffers(&tc->vertex_buffers[start],
1760                         count + unbind_num_trailing_slots);
1761    }
1762 }
1763 
1764 struct tc_stream_outputs {
1765    struct tc_call_base base;
1766    unsigned count;
1767    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1768    unsigned offsets[PIPE_MAX_SO_BUFFERS];
1769 };
1770 
1771 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call,uint64_t * last)1772 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last)
1773 {
1774    struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
1775    unsigned count = p->count;
1776 
1777    pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1778    for (unsigned i = 0; i < count; i++)
1779       tc_drop_so_target_reference(p->targets[i]);
1780 
1781    return call_size(tc_stream_outputs);
1782 }
1783 
1784 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)1785 tc_set_stream_output_targets(struct pipe_context *_pipe,
1786                              unsigned count,
1787                              struct pipe_stream_output_target **tgs,
1788                              const unsigned *offsets)
1789 {
1790    struct threaded_context *tc = threaded_context(_pipe);
1791    struct tc_stream_outputs *p =
1792       tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
1793    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1794 
1795    for (unsigned i = 0; i < count; i++) {
1796       p->targets[i] = NULL;
1797       pipe_so_target_reference(&p->targets[i], tgs[i]);
1798       if (tgs[i]) {
1799          tc_buffer_disable_cpu_storage(tgs[i]->buffer);
1800          tc_bind_buffer(tc, &tc->streamout_buffers[i], next, tgs[i]->buffer);
1801       } else {
1802          tc_unbind_buffer(&tc->streamout_buffers[i]);
1803       }
1804    }
1805    p->count = count;
1806    memcpy(p->offsets, offsets, count * sizeof(unsigned));
1807 
1808    tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
1809    if (count)
1810       tc->seen_streamout_buffers = true;
1811 }
1812 
1813 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1814 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1815                          unsigned count, struct pipe_surface **resources)
1816 {
1817    struct threaded_context *tc = threaded_context(_pipe);
1818    struct pipe_context *pipe = tc->pipe;
1819 
1820    tc_sync(tc);
1821    pipe->set_compute_resources(pipe, start, count, resources);
1822 }
1823 
1824 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1825 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1826                       unsigned count, struct pipe_resource **resources,
1827                       uint32_t **handles)
1828 {
1829    struct threaded_context *tc = threaded_context(_pipe);
1830    struct pipe_context *pipe = tc->pipe;
1831 
1832    tc_sync(tc);
1833    pipe->set_global_binding(pipe, first, count, resources, handles);
1834 }
1835 
1836 
1837 /********************************************************************
1838  * views
1839  */
1840 
1841 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)1842 tc_create_surface(struct pipe_context *_pipe,
1843                   struct pipe_resource *resource,
1844                   const struct pipe_surface *surf_tmpl)
1845 {
1846    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1847    struct pipe_surface *view =
1848          pipe->create_surface(pipe, resource, surf_tmpl);
1849 
1850    if (view)
1851       view->context = _pipe;
1852    return view;
1853 }
1854 
1855 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)1856 tc_surface_destroy(struct pipe_context *_pipe,
1857                    struct pipe_surface *surf)
1858 {
1859    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1860 
1861    pipe->surface_destroy(pipe, surf);
1862 }
1863 
1864 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)1865 tc_create_sampler_view(struct pipe_context *_pipe,
1866                        struct pipe_resource *resource,
1867                        const struct pipe_sampler_view *templ)
1868 {
1869    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1870    struct pipe_sampler_view *view =
1871          pipe->create_sampler_view(pipe, resource, templ);
1872 
1873    if (view)
1874       view->context = _pipe;
1875    return view;
1876 }
1877 
1878 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)1879 tc_sampler_view_destroy(struct pipe_context *_pipe,
1880                         struct pipe_sampler_view *view)
1881 {
1882    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1883 
1884    pipe->sampler_view_destroy(pipe, view);
1885 }
1886 
1887 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)1888 tc_create_stream_output_target(struct pipe_context *_pipe,
1889                                struct pipe_resource *res,
1890                                unsigned buffer_offset,
1891                                unsigned buffer_size)
1892 {
1893    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1894    struct threaded_resource *tres = threaded_resource(res);
1895    struct pipe_stream_output_target *view;
1896 
1897    util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
1898                   buffer_offset + buffer_size);
1899 
1900    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1901                                             buffer_size);
1902    if (view)
1903       view->context = _pipe;
1904    return view;
1905 }
1906 
1907 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)1908 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1909                                 struct pipe_stream_output_target *target)
1910 {
1911    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1912 
1913    pipe->stream_output_target_destroy(pipe, target);
1914 }
1915 
1916 
1917 /********************************************************************
1918  * bindless
1919  */
1920 
1921 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)1922 tc_create_texture_handle(struct pipe_context *_pipe,
1923                          struct pipe_sampler_view *view,
1924                          const struct pipe_sampler_state *state)
1925 {
1926    struct threaded_context *tc = threaded_context(_pipe);
1927    struct pipe_context *pipe = tc->pipe;
1928 
1929    tc_sync(tc);
1930    return pipe->create_texture_handle(pipe, view, state);
1931 }
1932 
1933 struct tc_make_texture_handle_resident {
1934    struct tc_call_base base;
1935    bool resident;
1936    uint64_t handle;
1937 };
1938 
1939 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1940 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1941 {
1942    struct tc_make_texture_handle_resident *p =
1943       to_call(call, tc_make_texture_handle_resident);
1944 
1945    pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1946    return call_size(tc_make_texture_handle_resident);
1947 }
1948 
1949 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)1950 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1951                                 bool resident)
1952 {
1953    struct threaded_context *tc = threaded_context(_pipe);
1954    struct tc_make_texture_handle_resident *p =
1955       tc_add_call(tc, TC_CALL_make_texture_handle_resident,
1956                   tc_make_texture_handle_resident);
1957 
1958    p->handle = handle;
1959    p->resident = resident;
1960 }
1961 
1962 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)1963 tc_create_image_handle(struct pipe_context *_pipe,
1964                        const struct pipe_image_view *image)
1965 {
1966    struct threaded_context *tc = threaded_context(_pipe);
1967    struct pipe_context *pipe = tc->pipe;
1968 
1969    if (image->resource->target == PIPE_BUFFER)
1970       tc_buffer_disable_cpu_storage(image->resource);
1971 
1972    tc_sync(tc);
1973    return pipe->create_image_handle(pipe, image);
1974 }
1975 
1976 struct tc_make_image_handle_resident {
1977    struct tc_call_base base;
1978    bool resident;
1979    unsigned access;
1980    uint64_t handle;
1981 };
1982 
1983 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1984 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1985 {
1986    struct tc_make_image_handle_resident *p =
1987       to_call(call, tc_make_image_handle_resident);
1988 
1989    pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1990    return call_size(tc_make_image_handle_resident);
1991 }
1992 
1993 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)1994 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1995                               unsigned access, bool resident)
1996 {
1997    struct threaded_context *tc = threaded_context(_pipe);
1998    struct tc_make_image_handle_resident *p =
1999       tc_add_call(tc, TC_CALL_make_image_handle_resident,
2000                   tc_make_image_handle_resident);
2001 
2002    p->handle = handle;
2003    p->access = access;
2004    p->resident = resident;
2005 }
2006 
2007 
2008 /********************************************************************
2009  * transfer
2010  */
2011 
2012 struct tc_replace_buffer_storage {
2013    struct tc_call_base base;
2014    uint16_t num_rebinds;
2015    uint32_t rebind_mask;
2016    uint32_t delete_buffer_id;
2017    struct pipe_resource *dst;
2018    struct pipe_resource *src;
2019    tc_replace_buffer_storage_func func;
2020 };
2021 
2022 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call,uint64_t * last)2023 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last)
2024 {
2025    struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2026 
2027    p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2028 
2029    tc_drop_resource_reference(p->dst);
2030    tc_drop_resource_reference(p->src);
2031    return call_size(tc_replace_buffer_storage);
2032 }
2033 
2034 /* Return true if the buffer has been invalidated or is idle.
2035  * Note that callers must've called tc_touch_buffer before calling
2036  * this function. */
2037 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2038 tc_invalidate_buffer(struct threaded_context *tc,
2039                      struct threaded_resource *tbuf)
2040 {
2041    if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2042       /* It's idle, so invalidation would be a no-op, but we can still clear
2043        * the valid range because we are technically doing invalidation, but
2044        * skipping it because it's useless.
2045        *
2046        * If the buffer is bound for write, we can't invalidate the range.
2047        */
2048       if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2049          util_range_set_empty(&tbuf->valid_buffer_range);
2050       return true;
2051    }
2052 
2053    struct pipe_screen *screen = tc->base.screen;
2054    struct pipe_resource *new_buf;
2055 
2056    /* Shared, pinned, and sparse buffers can't be reallocated. */
2057    if (tc_is_buffer_shared(tbuf) ||
2058        tbuf->is_user_ptr ||
2059        tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2060       return false;
2061 
2062    /* Allocate a new one. */
2063    new_buf = screen->resource_create(screen, &tbuf->b);
2064    if (!new_buf)
2065       return false;
2066 
2067    /* Replace the "latest" pointer. */
2068    if (tbuf->latest != &tbuf->b)
2069       pipe_resource_reference(&tbuf->latest, NULL);
2070 
2071    tbuf->latest = new_buf;
2072 
2073    uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2074 
2075    /* Enqueue storage replacement of the original buffer. */
2076    struct tc_replace_buffer_storage *p =
2077       tc_add_call(tc, TC_CALL_replace_buffer_storage,
2078                   tc_replace_buffer_storage);
2079 
2080    p->func = tc->replace_buffer_storage;
2081    tc_set_resource_reference(&p->dst, &tbuf->b);
2082    tc_set_resource_reference(&p->src, new_buf);
2083    p->delete_buffer_id = delete_buffer_id;
2084    p->rebind_mask = 0;
2085 
2086    /* Treat the current buffer as the new buffer. */
2087    bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2088    p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2089                                      threaded_resource(new_buf)->buffer_id_unique,
2090                                      &p->rebind_mask);
2091 
2092    /* If the buffer is not bound for write, clear the valid range. */
2093    if (!bound_for_write)
2094       util_range_set_empty(&tbuf->valid_buffer_range);
2095 
2096    tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2097    threaded_resource(new_buf)->buffer_id_unique = 0;
2098 
2099    return true;
2100 }
2101 
2102 /* Note that callers must've called tc_touch_buffer first before
2103  * calling tc_improve_map_buffer_flags. */
2104 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2105 tc_improve_map_buffer_flags(struct threaded_context *tc,
2106                             struct threaded_resource *tres, unsigned usage,
2107                             unsigned offset, unsigned size)
2108 {
2109    /* Never invalidate inside the driver and never infer "unsynchronized". */
2110    unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2111                        TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2112 
2113    /* Prevent a reentry. */
2114    if (usage & tc_flags)
2115       return usage;
2116 
2117    /* Use the staging upload if it's preferred. */
2118    if (usage & (PIPE_MAP_DISCARD_RANGE |
2119                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2120        !(usage & PIPE_MAP_PERSISTENT) &&
2121        tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2122        tc->use_forced_staging_uploads) {
2123       usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2124                  PIPE_MAP_UNSYNCHRONIZED);
2125 
2126       return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2127    }
2128 
2129    /* Sparse buffers can't be mapped directly and can't be reallocated
2130     * (fully invalidated). That may just be a radeonsi limitation, but
2131     * the threaded context must obey it with radeonsi.
2132     */
2133    if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2134       /* We can use DISCARD_RANGE instead of full discard. This is the only
2135        * fast path for sparse buffers that doesn't need thread synchronization.
2136        */
2137       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2138          usage |= PIPE_MAP_DISCARD_RANGE;
2139 
2140       /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2141        * The threaded context doesn't do unsychronized mappings and invalida-
2142        * tions of sparse buffers, therefore a correct driver behavior won't
2143        * result in an incorrect behavior with the threaded context.
2144        */
2145       return usage;
2146    }
2147 
2148    usage |= tc_flags;
2149 
2150    /* Handle CPU reads trivially. */
2151    if (usage & PIPE_MAP_READ) {
2152       if (usage & PIPE_MAP_UNSYNCHRONIZED)
2153          usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2154 
2155       /* Drivers aren't allowed to do buffer invalidations. */
2156       return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2157    }
2158 
2159    /* See if the buffer range being mapped has never been initialized or
2160     * the buffer is idle, in which case it can be mapped unsynchronized. */
2161    if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2162        ((!tres->is_shared &&
2163          !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2164         !tc_is_buffer_busy(tc, tres, usage)))
2165       usage |= PIPE_MAP_UNSYNCHRONIZED;
2166 
2167    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2168       /* If discarding the entire range, discard the whole resource instead. */
2169       if (usage & PIPE_MAP_DISCARD_RANGE &&
2170           offset == 0 && size == tres->b.width0)
2171          usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2172 
2173       /* Discard the whole resource if needed. */
2174       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2175          if (tc_invalidate_buffer(tc, tres))
2176             usage |= PIPE_MAP_UNSYNCHRONIZED;
2177          else
2178             usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2179       }
2180    }
2181 
2182    /* We won't need this flag anymore. */
2183    /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2184    usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2185 
2186    /* GL_AMD_pinned_memory and persistent mappings can't use staging
2187     * buffers. */
2188    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2189                 PIPE_MAP_PERSISTENT) ||
2190        tres->is_user_ptr)
2191       usage &= ~PIPE_MAP_DISCARD_RANGE;
2192 
2193    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2194    if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2195       usage &= ~PIPE_MAP_DISCARD_RANGE;
2196       usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2197    }
2198 
2199    return usage;
2200 }
2201 
2202 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2203 tc_buffer_map(struct pipe_context *_pipe,
2204               struct pipe_resource *resource, unsigned level,
2205               unsigned usage, const struct pipe_box *box,
2206               struct pipe_transfer **transfer)
2207 {
2208    struct threaded_context *tc = threaded_context(_pipe);
2209    struct threaded_resource *tres = threaded_resource(resource);
2210    struct pipe_context *pipe = tc->pipe;
2211 
2212    /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2213     * this shouldn't normally be necessary because glthread only uses large buffers.
2214     */
2215    if (usage & PIPE_MAP_THREAD_SAFE)
2216       tc_buffer_disable_cpu_storage(resource);
2217 
2218    tc_touch_buffer(tc, tres);
2219 
2220    /* CPU storage relies on buffer invalidation never failing. With shared buffers,
2221     * invalidation might not always be possible, so CPU storage can't be used.
2222     */
2223    if (tc_is_buffer_shared(tres))
2224       tc_buffer_disable_cpu_storage(resource);
2225 
2226    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2227 
2228    /* If the CPU storage is enabled, return it directly. */
2229    if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2230       /* We can't let resource_copy_region disable the CPU storage. */
2231       assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2232 
2233       if (!tres->cpu_storage) {
2234          tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2235 
2236          if (tres->cpu_storage && tres->valid_buffer_range.end) {
2237             /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2238             struct pipe_box box2;
2239             struct pipe_transfer *transfer2;
2240 
2241             unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2242             u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2243 
2244             tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2245             tc_set_driver_thread(tc);
2246 
2247             void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2248                                          0, PIPE_MAP_READ, &box2, &transfer2);
2249             memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2250                    ret,
2251                    valid_range_len);
2252             pipe->buffer_unmap(pipe, transfer2);
2253 
2254             tc_clear_driver_thread(tc);
2255          }
2256       }
2257 
2258       if (tres->cpu_storage) {
2259          struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2260          ttrans->b.resource = resource;
2261          ttrans->b.usage = usage;
2262          ttrans->b.box = *box;
2263          ttrans->valid_buffer_range = &tres->valid_buffer_range;
2264          ttrans->cpu_storage_mapped = true;
2265          *transfer = &ttrans->b;
2266 
2267          return (uint8_t*)tres->cpu_storage + box->x;
2268       } else {
2269          tres->allow_cpu_storage = false;
2270       }
2271    }
2272 
2273    /* Do a staging transfer within the threaded context. The driver should
2274     * only get resource_copy_region.
2275     */
2276    if (usage & PIPE_MAP_DISCARD_RANGE) {
2277       struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2278       uint8_t *map;
2279 
2280       u_upload_alloc(tc->base.stream_uploader, 0,
2281                      box->width + (box->x % tc->map_buffer_alignment),
2282                      tc->map_buffer_alignment, &ttrans->b.offset,
2283                      &ttrans->staging, (void**)&map);
2284       if (!map) {
2285          slab_free(&tc->pool_transfers, ttrans);
2286          return NULL;
2287       }
2288 
2289       ttrans->b.resource = resource;
2290       ttrans->b.level = 0;
2291       ttrans->b.usage = usage;
2292       ttrans->b.box = *box;
2293       ttrans->b.stride = 0;
2294       ttrans->b.layer_stride = 0;
2295       ttrans->valid_buffer_range = &tres->valid_buffer_range;
2296       ttrans->cpu_storage_mapped = false;
2297       *transfer = &ttrans->b;
2298 
2299       p_atomic_inc(&tres->pending_staging_uploads);
2300       util_range_add(resource, &tres->pending_staging_uploads_range,
2301                      box->x, box->x + box->width);
2302 
2303       return map + (box->x % tc->map_buffer_alignment);
2304    }
2305 
2306    if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2307        p_atomic_read(&tres->pending_staging_uploads) &&
2308        util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2309       /* Write conflict detected between a staging transfer and the direct mapping we're
2310        * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2311        * will have to wait for the staging transfer completion.
2312        * Note: The conflict detection is only based on the mapped range, not on the actual
2313        * written range(s).
2314        */
2315       usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2316       tc->use_forced_staging_uploads = false;
2317    }
2318 
2319    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2320    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2321       tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2322                       usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2323       tc_set_driver_thread(tc);
2324    }
2325 
2326    tc->bytes_mapped_estimate += box->width;
2327 
2328    void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2329                                 level, usage, box, transfer);
2330    threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2331    threaded_transfer(*transfer)->cpu_storage_mapped = false;
2332 
2333    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2334       tc_clear_driver_thread(tc);
2335 
2336    return ret;
2337 }
2338 
2339 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2340 tc_texture_map(struct pipe_context *_pipe,
2341                struct pipe_resource *resource, unsigned level,
2342                unsigned usage, const struct pipe_box *box,
2343                struct pipe_transfer **transfer)
2344 {
2345    struct threaded_context *tc = threaded_context(_pipe);
2346    struct threaded_resource *tres = threaded_resource(resource);
2347    struct pipe_context *pipe = tc->pipe;
2348 
2349    tc_sync_msg(tc, "texture");
2350    tc_set_driver_thread(tc);
2351 
2352    tc->bytes_mapped_estimate += box->width;
2353 
2354    void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2355                                  level, usage, box, transfer);
2356 
2357    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2358       tc_clear_driver_thread(tc);
2359 
2360    return ret;
2361 }
2362 
2363 struct tc_transfer_flush_region {
2364    struct tc_call_base base;
2365    struct pipe_box box;
2366    struct pipe_transfer *transfer;
2367 };
2368 
2369 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call,uint64_t * last)2370 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last)
2371 {
2372    struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2373 
2374    pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2375    return call_size(tc_transfer_flush_region);
2376 }
2377 
2378 struct tc_resource_copy_region {
2379    struct tc_call_base base;
2380    unsigned dst_level;
2381    unsigned dstx, dsty, dstz;
2382    unsigned src_level;
2383    struct pipe_box src_box;
2384    struct pipe_resource *dst;
2385    struct pipe_resource *src;
2386 };
2387 
2388 static void
2389 tc_resource_copy_region(struct pipe_context *_pipe,
2390                         struct pipe_resource *dst, unsigned dst_level,
2391                         unsigned dstx, unsigned dsty, unsigned dstz,
2392                         struct pipe_resource *src, unsigned src_level,
2393                         const struct pipe_box *src_box);
2394 
2395 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2396 tc_buffer_do_flush_region(struct threaded_context *tc,
2397                           struct threaded_transfer *ttrans,
2398                           const struct pipe_box *box)
2399 {
2400    struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2401 
2402    if (ttrans->staging) {
2403       struct pipe_box src_box;
2404 
2405       u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2406                (box->x - ttrans->b.box.x),
2407                box->width, &src_box);
2408 
2409       /* Copy the staging buffer into the original one. */
2410       tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2411                               ttrans->staging, 0, &src_box);
2412    }
2413 
2414    /* Don't update the valid range when we're uploading the CPU storage
2415     * because it includes the uninitialized range too.
2416     */
2417    if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2418       util_range_add(&tres->b, ttrans->valid_buffer_range,
2419                      box->x, box->x + box->width);
2420    }
2421 }
2422 
2423 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2424 tc_transfer_flush_region(struct pipe_context *_pipe,
2425                          struct pipe_transfer *transfer,
2426                          const struct pipe_box *rel_box)
2427 {
2428    struct threaded_context *tc = threaded_context(_pipe);
2429    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2430    struct threaded_resource *tres = threaded_resource(transfer->resource);
2431    unsigned required_usage = PIPE_MAP_WRITE |
2432                              PIPE_MAP_FLUSH_EXPLICIT;
2433 
2434    if (tres->b.target == PIPE_BUFFER) {
2435       if ((transfer->usage & required_usage) == required_usage) {
2436          struct pipe_box box;
2437 
2438          u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2439          tc_buffer_do_flush_region(tc, ttrans, &box);
2440       }
2441 
2442       /* Staging transfers don't send the call to the driver.
2443        *
2444        * Transfers using the CPU storage shouldn't call transfer_flush_region
2445        * in the driver because the buffer is not really mapped on the driver
2446        * side and the CPU storage always re-uploads everything (flush_region
2447        * makes no difference).
2448        */
2449       if (ttrans->staging || ttrans->cpu_storage_mapped)
2450          return;
2451    }
2452 
2453    struct tc_transfer_flush_region *p =
2454       tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2455    p->transfer = transfer;
2456    p->box = *rel_box;
2457 }
2458 
2459 static void
2460 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2461          unsigned flags);
2462 
2463 struct tc_buffer_unmap {
2464    struct tc_call_base base;
2465    bool was_staging_transfer;
2466    union {
2467       struct pipe_transfer *transfer;
2468       struct pipe_resource *resource;
2469    };
2470 };
2471 
2472 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2473 tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2474 {
2475    struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2476 
2477    if (p->was_staging_transfer) {
2478       struct threaded_resource *tres = threaded_resource(p->resource);
2479       /* Nothing to do except keeping track of staging uploads */
2480       assert(tres->pending_staging_uploads > 0);
2481       p_atomic_dec(&tres->pending_staging_uploads);
2482       tc_drop_resource_reference(p->resource);
2483    } else {
2484       pipe->buffer_unmap(pipe, p->transfer);
2485    }
2486 
2487    return call_size(tc_buffer_unmap);
2488 }
2489 
2490 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2491 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2492 {
2493    struct threaded_context *tc = threaded_context(_pipe);
2494    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2495    struct threaded_resource *tres = threaded_resource(transfer->resource);
2496 
2497    /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2498     * called from any thread and bypasses all multithreaded queues.
2499     */
2500    if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2501       assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2502       assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2503                                   PIPE_MAP_DISCARD_RANGE)));
2504 
2505       struct pipe_context *pipe = tc->pipe;
2506       util_range_add(&tres->b, ttrans->valid_buffer_range,
2507                       transfer->box.x, transfer->box.x + transfer->box.width);
2508 
2509       pipe->buffer_unmap(pipe, transfer);
2510       return;
2511    }
2512 
2513    if (transfer->usage & PIPE_MAP_WRITE &&
2514        !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2515       tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2516 
2517    if (ttrans->cpu_storage_mapped) {
2518       /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2519        * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2520        * If that happens, we just ignore the unmap call and don't upload anything to prevent
2521        * a crash.
2522        *
2523        * Disallow the CPU storage in the driver to work around this.
2524        */
2525       assert(tres->cpu_storage);
2526 
2527       if (tres->cpu_storage) {
2528          /* Invalidations shouldn't fail as long as CPU storage is allowed. */
2529          ASSERTED bool invalidated = tc_invalidate_buffer(tc, tres);
2530          assert(invalidated);
2531 
2532          tc_buffer_subdata(&tc->base, &tres->b,
2533                            PIPE_MAP_UNSYNCHRONIZED |
2534                            TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2535                            0, tres->b.width0, tres->cpu_storage);
2536          /* This shouldn't have been freed by buffer_subdata. */
2537          assert(tres->cpu_storage);
2538       } else {
2539          static bool warned_once = false;
2540          if (!warned_once) {
2541             fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2542             fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2543             warned_once = true;
2544          }
2545       }
2546 
2547       tc_drop_resource_reference(ttrans->staging);
2548       slab_free(&tc->pool_transfers, ttrans);
2549       return;
2550    }
2551 
2552    bool was_staging_transfer = false;
2553 
2554    if (ttrans->staging) {
2555       was_staging_transfer = true;
2556 
2557       tc_drop_resource_reference(ttrans->staging);
2558       slab_free(&tc->pool_transfers, ttrans);
2559    }
2560 
2561    struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2562                                            tc_buffer_unmap);
2563    if (was_staging_transfer) {
2564       tc_set_resource_reference(&p->resource, &tres->b);
2565       p->was_staging_transfer = true;
2566    } else {
2567       p->transfer = transfer;
2568       p->was_staging_transfer = false;
2569    }
2570 
2571    /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2572     * defers the unmap operation to the batch execution.
2573     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2574     * and if it goes over an optional limit the current batch is flushed,
2575     * to reclaim some RAM. */
2576    if (!ttrans->staging && tc->bytes_mapped_limit &&
2577        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2578       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2579    }
2580 }
2581 
2582 struct tc_texture_unmap {
2583    struct tc_call_base base;
2584    struct pipe_transfer *transfer;
2585 };
2586 
2587 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2588 tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2589 {
2590    struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2591 
2592    pipe->texture_unmap(pipe, p->transfer);
2593    return call_size(tc_texture_unmap);
2594 }
2595 
2596 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2597 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2598 {
2599    struct threaded_context *tc = threaded_context(_pipe);
2600    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2601 
2602    tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2603 
2604    /* tc_texture_map directly maps the textures, but tc_texture_unmap
2605     * defers the unmap operation to the batch execution.
2606     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2607     * and if it goes over an optional limit the current batch is flushed,
2608     * to reclaim some RAM. */
2609    if (!ttrans->staging && tc->bytes_mapped_limit &&
2610        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2611       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2612    }
2613 }
2614 
2615 struct tc_buffer_subdata {
2616    struct tc_call_base base;
2617    unsigned usage, offset, size;
2618    struct pipe_resource *resource;
2619    char slot[0]; /* more will be allocated if needed */
2620 };
2621 
2622 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2623 tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2624 {
2625    struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2626 
2627    pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2628                         p->slot);
2629    tc_drop_resource_reference(p->resource);
2630    return p->base.num_slots;
2631 }
2632 
2633 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)2634 tc_buffer_subdata(struct pipe_context *_pipe,
2635                   struct pipe_resource *resource,
2636                   unsigned usage, unsigned offset,
2637                   unsigned size, const void *data)
2638 {
2639    struct threaded_context *tc = threaded_context(_pipe);
2640    struct threaded_resource *tres = threaded_resource(resource);
2641 
2642    if (!size)
2643       return;
2644 
2645    tc_touch_buffer(tc, tres);
2646 
2647    usage |= PIPE_MAP_WRITE;
2648 
2649    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
2650    if (!(usage & PIPE_MAP_DIRECTLY))
2651       usage |= PIPE_MAP_DISCARD_RANGE;
2652 
2653    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
2654 
2655    /* Unsychronized and big transfers should use transfer_map. Also handle
2656     * full invalidations, because drivers aren't allowed to do them.
2657     */
2658    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2659                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
2660        size > TC_MAX_SUBDATA_BYTES ||
2661        tres->cpu_storage) {
2662       struct pipe_transfer *transfer;
2663       struct pipe_box box;
2664       uint8_t *map = NULL;
2665 
2666       u_box_1d(offset, size, &box);
2667 
2668       /* CPU storage is only useful for partial updates. It can add overhead
2669        * on glBufferData calls so avoid using it.
2670        */
2671       if (!tres->cpu_storage && offset == 0 && size == resource->width0)
2672          usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
2673 
2674       map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
2675       if (map) {
2676          memcpy(map, data, size);
2677          tc_buffer_unmap(_pipe, transfer);
2678       }
2679       return;
2680    }
2681 
2682    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
2683 
2684    /* The upload is small. Enqueue it. */
2685    struct tc_buffer_subdata *p =
2686       tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
2687 
2688    tc_set_resource_reference(&p->resource, resource);
2689    /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
2690     * _flags would set UNSYNCHRONIZED and we wouldn't get here.
2691     */
2692    tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource);
2693    p->usage = usage;
2694    p->offset = offset;
2695    p->size = size;
2696    memcpy(p->slot, data, size);
2697 }
2698 
2699 struct tc_texture_subdata {
2700    struct tc_call_base base;
2701    unsigned level, usage, stride, layer_stride;
2702    struct pipe_box box;
2703    struct pipe_resource *resource;
2704    char slot[0]; /* more will be allocated if needed */
2705 };
2706 
2707 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2708 tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2709 {
2710    struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
2711 
2712    pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
2713                          p->slot, p->stride, p->layer_stride);
2714    tc_drop_resource_reference(p->resource);
2715    return p->base.num_slots;
2716 }
2717 
2718 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,unsigned layer_stride)2719 tc_texture_subdata(struct pipe_context *_pipe,
2720                    struct pipe_resource *resource,
2721                    unsigned level, unsigned usage,
2722                    const struct pipe_box *box,
2723                    const void *data, unsigned stride,
2724                    unsigned layer_stride)
2725 {
2726    struct threaded_context *tc = threaded_context(_pipe);
2727    unsigned size;
2728 
2729    assert(box->height >= 1);
2730    assert(box->depth >= 1);
2731 
2732    size = (box->depth - 1) * layer_stride +
2733           (box->height - 1) * stride +
2734           box->width * util_format_get_blocksize(resource->format);
2735    if (!size)
2736       return;
2737 
2738    /* Small uploads can be enqueued, big uploads must sync. */
2739    if (size <= TC_MAX_SUBDATA_BYTES) {
2740       struct tc_texture_subdata *p =
2741          tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
2742 
2743       tc_set_resource_reference(&p->resource, resource);
2744       p->level = level;
2745       p->usage = usage;
2746       p->box = *box;
2747       p->stride = stride;
2748       p->layer_stride = layer_stride;
2749       memcpy(p->slot, data, size);
2750    } else {
2751       struct pipe_context *pipe = tc->pipe;
2752 
2753       tc_sync(tc);
2754       tc_set_driver_thread(tc);
2755       pipe->texture_subdata(pipe, resource, level, usage, box, data,
2756                             stride, layer_stride);
2757       tc_clear_driver_thread(tc);
2758    }
2759 }
2760 
2761 
2762 /********************************************************************
2763  * miscellaneous
2764  */
2765 
2766 #define TC_FUNC_SYNC_RET0(ret_type, func) \
2767    static ret_type \
2768    tc_##func(struct pipe_context *_pipe) \
2769    { \
2770       struct threaded_context *tc = threaded_context(_pipe); \
2771       struct pipe_context *pipe = tc->pipe; \
2772       tc_sync(tc); \
2773       return pipe->func(pipe); \
2774    }
2775 
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)2776 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
2777 
2778 static void
2779 tc_get_sample_position(struct pipe_context *_pipe,
2780                        unsigned sample_count, unsigned sample_index,
2781                        float *out_value)
2782 {
2783    struct threaded_context *tc = threaded_context(_pipe);
2784    struct pipe_context *pipe = tc->pipe;
2785 
2786    tc_sync(tc);
2787    pipe->get_sample_position(pipe, sample_count, sample_index,
2788                              out_value);
2789 }
2790 
2791 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)2792 tc_get_device_reset_status(struct pipe_context *_pipe)
2793 {
2794    struct threaded_context *tc = threaded_context(_pipe);
2795    struct pipe_context *pipe = tc->pipe;
2796 
2797    if (!tc->options.unsynchronized_get_device_reset_status)
2798       tc_sync(tc);
2799 
2800    return pipe->get_device_reset_status(pipe);
2801 }
2802 
2803 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)2804 tc_set_device_reset_callback(struct pipe_context *_pipe,
2805                              const struct pipe_device_reset_callback *cb)
2806 {
2807    struct threaded_context *tc = threaded_context(_pipe);
2808    struct pipe_context *pipe = tc->pipe;
2809 
2810    tc_sync(tc);
2811    pipe->set_device_reset_callback(pipe, cb);
2812 }
2813 
2814 struct tc_string_marker {
2815    struct tc_call_base base;
2816    int len;
2817    char slot[0]; /* more will be allocated if needed */
2818 };
2819 
2820 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call,uint64_t * last)2821 tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last)
2822 {
2823    struct tc_string_marker *p = (struct tc_string_marker *)call;
2824    pipe->emit_string_marker(pipe, p->slot, p->len);
2825    return p->base.num_slots;
2826 }
2827 
2828 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)2829 tc_emit_string_marker(struct pipe_context *_pipe,
2830                       const char *string, int len)
2831 {
2832    struct threaded_context *tc = threaded_context(_pipe);
2833 
2834    if (len <= TC_MAX_STRING_MARKER_BYTES) {
2835       struct tc_string_marker *p =
2836          tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
2837 
2838       memcpy(p->slot, string, len);
2839       p->len = len;
2840    } else {
2841       struct pipe_context *pipe = tc->pipe;
2842 
2843       tc_sync(tc);
2844       tc_set_driver_thread(tc);
2845       pipe->emit_string_marker(pipe, string, len);
2846       tc_clear_driver_thread(tc);
2847    }
2848 }
2849 
2850 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)2851 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
2852                     unsigned flags)
2853 {
2854    struct threaded_context *tc = threaded_context(_pipe);
2855    struct pipe_context *pipe = tc->pipe;
2856 
2857    tc_sync(tc);
2858    pipe->dump_debug_state(pipe, stream, flags);
2859 }
2860 
2861 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)2862 tc_set_debug_callback(struct pipe_context *_pipe,
2863                       const struct util_debug_callback *cb)
2864 {
2865    struct threaded_context *tc = threaded_context(_pipe);
2866    struct pipe_context *pipe = tc->pipe;
2867 
2868    tc_sync(tc);
2869 
2870    /* Drop all synchronous debug callbacks. Drivers are expected to be OK
2871     * with this. shader-db will use an environment variable to disable
2872     * the threaded context.
2873     */
2874    if (cb && !cb->async)
2875       pipe->set_debug_callback(pipe, NULL);
2876    else
2877       pipe->set_debug_callback(pipe, cb);
2878 }
2879 
2880 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)2881 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
2882 {
2883    struct threaded_context *tc = threaded_context(_pipe);
2884    struct pipe_context *pipe = tc->pipe;
2885 
2886    tc_sync(tc);
2887    pipe->set_log_context(pipe, log);
2888 }
2889 
2890 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)2891 tc_create_fence_fd(struct pipe_context *_pipe,
2892                    struct pipe_fence_handle **fence, int fd,
2893                    enum pipe_fd_type type)
2894 {
2895    struct threaded_context *tc = threaded_context(_pipe);
2896    struct pipe_context *pipe = tc->pipe;
2897 
2898    tc_sync(tc);
2899    pipe->create_fence_fd(pipe, fence, fd, type);
2900 }
2901 
2902 struct tc_fence_call {
2903    struct tc_call_base base;
2904    struct pipe_fence_handle *fence;
2905 };
2906 
2907 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call,uint64_t * last)2908 tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last)
2909 {
2910    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2911 
2912    pipe->fence_server_sync(pipe, fence);
2913    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2914    return call_size(tc_fence_call);
2915 }
2916 
2917 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2918 tc_fence_server_sync(struct pipe_context *_pipe,
2919                      struct pipe_fence_handle *fence)
2920 {
2921    struct threaded_context *tc = threaded_context(_pipe);
2922    struct pipe_screen *screen = tc->pipe->screen;
2923    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
2924                                             tc_fence_call);
2925 
2926    call->fence = NULL;
2927    screen->fence_reference(screen, &call->fence, fence);
2928 }
2929 
2930 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2931 tc_fence_server_signal(struct pipe_context *_pipe,
2932                            struct pipe_fence_handle *fence)
2933 {
2934    struct threaded_context *tc = threaded_context(_pipe);
2935    struct pipe_context *pipe = tc->pipe;
2936    tc_sync(tc);
2937    pipe->fence_server_signal(pipe, fence);
2938 }
2939 
2940 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)2941 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
2942                       UNUSED const struct pipe_video_codec *templ)
2943 {
2944    unreachable("Threaded context should not be enabled for video APIs");
2945    return NULL;
2946 }
2947 
2948 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)2949 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
2950                        UNUSED const struct pipe_video_buffer *templ)
2951 {
2952    unreachable("Threaded context should not be enabled for video APIs");
2953    return NULL;
2954 }
2955 
2956 struct tc_context_param {
2957    struct tc_call_base base;
2958    enum pipe_context_param param;
2959    unsigned value;
2960 };
2961 
2962 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call,uint64_t * last)2963 tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last)
2964 {
2965    struct tc_context_param *p = to_call(call, tc_context_param);
2966 
2967    if (pipe->set_context_param)
2968       pipe->set_context_param(pipe, p->param, p->value);
2969 
2970    return call_size(tc_context_param);
2971 }
2972 
2973 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)2974 tc_set_context_param(struct pipe_context *_pipe,
2975                            enum pipe_context_param param,
2976                            unsigned value)
2977 {
2978    struct threaded_context *tc = threaded_context(_pipe);
2979 
2980    if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
2981       /* Pin the gallium thread as requested. */
2982       util_set_thread_affinity(tc->queue.threads[0],
2983                                util_get_cpu_caps()->L3_affinity_mask[value],
2984                                NULL, util_get_cpu_caps()->num_cpu_mask_bits);
2985 
2986       /* Execute this immediately (without enqueuing).
2987        * It's required to be thread-safe.
2988        */
2989       struct pipe_context *pipe = tc->pipe;
2990       if (pipe->set_context_param)
2991          pipe->set_context_param(pipe, param, value);
2992       return;
2993    }
2994 
2995    if (tc->pipe->set_context_param) {
2996       struct tc_context_param *call =
2997          tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
2998 
2999       call->param = param;
3000       call->value = value;
3001    }
3002 }
3003 
3004 
3005 /********************************************************************
3006  * draw, launch, clear, blit, copy, flush
3007  */
3008 
3009 struct tc_flush_call {
3010    struct tc_call_base base;
3011    unsigned flags;
3012    struct threaded_context *tc;
3013    struct pipe_fence_handle *fence;
3014 };
3015 
3016 static void
tc_flush_queries(struct threaded_context * tc)3017 tc_flush_queries(struct threaded_context *tc)
3018 {
3019    struct threaded_query *tq, *tmp;
3020    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3021       list_del(&tq->head_unflushed);
3022 
3023       /* Memory release semantics: due to a possible race with
3024        * tc_get_query_result, we must ensure that the linked list changes
3025        * are visible before setting tq->flushed.
3026        */
3027       p_atomic_set(&tq->flushed, true);
3028    }
3029 }
3030 
3031 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call,uint64_t * last)3032 tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last)
3033 {
3034    struct tc_flush_call *p = to_call(call, tc_flush_call);
3035    struct pipe_screen *screen = pipe->screen;
3036 
3037    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3038    screen->fence_reference(screen, &p->fence, NULL);
3039 
3040    if (!(p->flags & PIPE_FLUSH_DEFERRED))
3041       tc_flush_queries(p->tc);
3042 
3043    return call_size(tc_flush_call);
3044 }
3045 
3046 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3047 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3048          unsigned flags)
3049 {
3050    struct threaded_context *tc = threaded_context(_pipe);
3051    struct pipe_context *pipe = tc->pipe;
3052    struct pipe_screen *screen = pipe->screen;
3053    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3054 
3055    if (async && tc->options.create_fence) {
3056       if (fence) {
3057          struct tc_batch *next = &tc->batch_slots[tc->next];
3058 
3059          if (!next->token) {
3060             next->token = malloc(sizeof(*next->token));
3061             if (!next->token)
3062                goto out_of_memory;
3063 
3064             pipe_reference_init(&next->token->ref, 1);
3065             next->token->tc = tc;
3066          }
3067 
3068          screen->fence_reference(screen, fence,
3069                                  tc->options.create_fence(pipe, next->token));
3070          if (!*fence)
3071             goto out_of_memory;
3072       }
3073 
3074       struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3075       p->tc = tc;
3076       p->fence = fence ? *fence : NULL;
3077       p->flags = flags | TC_FLUSH_ASYNC;
3078 
3079       if (!(flags & PIPE_FLUSH_DEFERRED))
3080          tc_batch_flush(tc);
3081       return;
3082    }
3083 
3084 out_of_memory:
3085    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3086                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3087 
3088    if (!(flags & PIPE_FLUSH_DEFERRED))
3089       tc_flush_queries(tc);
3090    tc_set_driver_thread(tc);
3091    pipe->flush(pipe, fence, flags);
3092    tc_clear_driver_thread(tc);
3093 }
3094 
3095 struct tc_draw_single {
3096    struct tc_call_base base;
3097    unsigned index_bias;
3098    struct pipe_draw_info info;
3099 };
3100 
3101 struct tc_draw_single_drawid {
3102    struct tc_draw_single base;
3103    unsigned drawid_offset;
3104 };
3105 
3106 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call,uint64_t * last)3107 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last)
3108 {
3109    struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3110    struct tc_draw_single *info = &info_drawid->base;
3111 
3112    /* u_threaded_context stores start/count in min/max_index for single draws. */
3113    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3114    struct pipe_draw_start_count_bias draw;
3115 
3116    draw.start = info->info.min_index;
3117    draw.count = info->info.max_index;
3118    draw.index_bias = info->index_bias;
3119 
3120    info->info.index_bounds_valid = false;
3121    info->info.has_user_indices = false;
3122    info->info.take_index_buffer_ownership = false;
3123 
3124    pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3125    if (info->info.index_size)
3126       tc_drop_resource_reference(info->info.index.resource);
3127 
3128    return call_size(tc_draw_single_drawid);
3129 }
3130 
3131 static void
simplify_draw_info(struct pipe_draw_info * info)3132 simplify_draw_info(struct pipe_draw_info *info)
3133 {
3134    /* Clear these fields to facilitate draw merging.
3135     * Drivers shouldn't use them.
3136     */
3137    info->has_user_indices = false;
3138    info->index_bounds_valid = false;
3139    info->take_index_buffer_ownership = false;
3140    info->index_bias_varies = false;
3141    info->_pad = 0;
3142 
3143    /* This shouldn't be set when merging single draws. */
3144    info->increment_draw_id = false;
3145 
3146    if (info->index_size) {
3147       if (!info->primitive_restart)
3148          info->restart_index = 0;
3149    } else {
3150       assert(!info->primitive_restart);
3151       info->primitive_restart = false;
3152       info->restart_index = 0;
3153       info->index.resource = NULL;
3154    }
3155 }
3156 
3157 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3158 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3159                               struct tc_draw_single *next)
3160 {
3161    if (next->base.call_id != TC_CALL_draw_single)
3162       return false;
3163 
3164    simplify_draw_info(&next->info);
3165 
3166    STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3167                  sizeof(struct pipe_draw_info) - 8);
3168    STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3169                  sizeof(struct pipe_draw_info) - 4);
3170    /* All fields must be the same except start and count. */
3171    /* u_threaded_context stores start/count in min/max_index for single draws. */
3172    return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3173                  DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3174 }
3175 
3176 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3177 tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3178 {
3179    /* Draw call merging. */
3180    struct tc_draw_single *first = to_call(call, tc_draw_single);
3181    struct tc_draw_single *last = (struct tc_draw_single *)last_ptr;
3182    struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3183 
3184    /* If at least 2 consecutive draw calls can be merged... */
3185    if (next != last &&
3186        next->base.call_id == TC_CALL_draw_single) {
3187       simplify_draw_info(&first->info);
3188 
3189       if (is_next_call_a_mergeable_draw(first, next)) {
3190          /* The maximum number of merged draws is given by the batch size. */
3191          struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3192          unsigned num_draws = 2;
3193          bool index_bias_varies = first->index_bias != next->index_bias;
3194 
3195          /* u_threaded_context stores start/count in min/max_index for single draws. */
3196          multi[0].start = first->info.min_index;
3197          multi[0].count = first->info.max_index;
3198          multi[0].index_bias = first->index_bias;
3199          multi[1].start = next->info.min_index;
3200          multi[1].count = next->info.max_index;
3201          multi[1].index_bias = next->index_bias;
3202 
3203          /* Find how many other draws can be merged. */
3204          next = get_next_call(next, tc_draw_single);
3205          for (; next != last && is_next_call_a_mergeable_draw(first, next);
3206               next = get_next_call(next, tc_draw_single), num_draws++) {
3207             /* u_threaded_context stores start/count in min/max_index for single draws. */
3208             multi[num_draws].start = next->info.min_index;
3209             multi[num_draws].count = next->info.max_index;
3210             multi[num_draws].index_bias = next->index_bias;
3211             index_bias_varies |= first->index_bias != next->index_bias;
3212          }
3213 
3214          first->info.index_bias_varies = index_bias_varies;
3215          pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3216 
3217          /* Since all draws use the same index buffer, drop all references at once. */
3218          if (first->info.index_size)
3219             pipe_drop_resource_references(first->info.index.resource, num_draws);
3220 
3221          return call_size(tc_draw_single) * num_draws;
3222       }
3223    }
3224 
3225    /* u_threaded_context stores start/count in min/max_index for single draws. */
3226    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3227    struct pipe_draw_start_count_bias draw;
3228 
3229    draw.start = first->info.min_index;
3230    draw.count = first->info.max_index;
3231    draw.index_bias = first->index_bias;
3232 
3233    first->info.index_bounds_valid = false;
3234    first->info.has_user_indices = false;
3235    first->info.take_index_buffer_ownership = false;
3236 
3237    pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3238    if (first->info.index_size)
3239       tc_drop_resource_reference(first->info.index.resource);
3240 
3241    return call_size(tc_draw_single);
3242 }
3243 
3244 struct tc_draw_indirect {
3245    struct tc_call_base base;
3246    struct pipe_draw_start_count_bias draw;
3247    struct pipe_draw_info info;
3248    struct pipe_draw_indirect_info indirect;
3249 };
3250 
3251 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call,uint64_t * last)3252 tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last)
3253 {
3254    struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3255 
3256    info->info.index_bounds_valid = false;
3257    info->info.take_index_buffer_ownership = false;
3258 
3259    pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3260    if (info->info.index_size)
3261       tc_drop_resource_reference(info->info.index.resource);
3262 
3263    tc_drop_resource_reference(info->indirect.buffer);
3264    tc_drop_resource_reference(info->indirect.indirect_draw_count);
3265    tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3266    return call_size(tc_draw_indirect);
3267 }
3268 
3269 struct tc_draw_multi {
3270    struct tc_call_base base;
3271    unsigned num_draws;
3272    struct pipe_draw_info info;
3273    struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3274 };
3275 
3276 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call,uint64_t * last)3277 tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3278 {
3279    struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3280 
3281    info->info.has_user_indices = false;
3282    info->info.index_bounds_valid = false;
3283    info->info.take_index_buffer_ownership = false;
3284 
3285    pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3286    if (info->info.index_size)
3287       tc_drop_resource_reference(info->info.index.resource);
3288 
3289    return info->base.num_slots;
3290 }
3291 
3292 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3293    offsetof(struct pipe_draw_info, index)
3294 
3295 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3296 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3297             unsigned drawid_offset,
3298             const struct pipe_draw_indirect_info *indirect,
3299             const struct pipe_draw_start_count_bias *draws,
3300             unsigned num_draws)
3301 {
3302    STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
3303                  sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
3304 
3305    struct threaded_context *tc = threaded_context(_pipe);
3306    unsigned index_size = info->index_size;
3307    bool has_user_indices = info->has_user_indices;
3308 
3309    if (unlikely(indirect)) {
3310       assert(!has_user_indices);
3311       assert(num_draws == 1);
3312 
3313       struct tc_draw_indirect *p =
3314          tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
3315       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3316 
3317       if (index_size) {
3318          if (!info->take_index_buffer_ownership) {
3319             tc_set_resource_reference(&p->info.index.resource,
3320                                       info->index.resource);
3321          }
3322          tc_add_to_buffer_list(tc, next, info->index.resource);
3323       }
3324       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3325 
3326       tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
3327       tc_set_resource_reference(&p->indirect.indirect_draw_count,
3328                                 indirect->indirect_draw_count);
3329       p->indirect.count_from_stream_output = NULL;
3330       pipe_so_target_reference(&p->indirect.count_from_stream_output,
3331                                indirect->count_from_stream_output);
3332 
3333       if (indirect->buffer)
3334          tc_add_to_buffer_list(tc, next, indirect->buffer);
3335       if (indirect->indirect_draw_count)
3336          tc_add_to_buffer_list(tc, next, indirect->indirect_draw_count);
3337       if (indirect->count_from_stream_output)
3338          tc_add_to_buffer_list(tc, next, indirect->count_from_stream_output->buffer);
3339 
3340       memcpy(&p->indirect, indirect, sizeof(*indirect));
3341       p->draw.start = draws[0].start;
3342 
3343       /* This must be after tc_add_call, which can flush the batch. */
3344       if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3345          tc_add_all_gfx_bindings_to_buffer_list(tc);
3346       return;
3347    }
3348 
3349    if (num_draws == 1) {
3350       /* Single draw. */
3351       if (index_size && has_user_indices) {
3352          unsigned size = draws[0].count * index_size;
3353          struct pipe_resource *buffer = NULL;
3354          unsigned offset;
3355 
3356          if (!size)
3357             return;
3358 
3359          /* This must be done before adding draw_vbo, because it could generate
3360           * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3361           * to the driver if it was done afterwards.
3362           */
3363          u_upload_data(tc->base.stream_uploader, 0, size, 4,
3364                        (uint8_t*)info->index.user + draws[0].start * index_size,
3365                        &offset, &buffer);
3366          if (unlikely(!buffer))
3367             return;
3368 
3369          struct tc_draw_single *p = drawid_offset > 0 ?
3370             &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3371             tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3372          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3373          p->info.index.resource = buffer;
3374          if (drawid_offset > 0)
3375             ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3376          /* u_threaded_context stores start/count in min/max_index for single draws. */
3377          p->info.min_index = offset >> util_logbase2(index_size);
3378          p->info.max_index = draws[0].count;
3379          p->index_bias = draws[0].index_bias;
3380       } else {
3381          /* Non-indexed call or indexed with a real index buffer. */
3382          struct tc_draw_single *p = drawid_offset > 0 ?
3383             &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3384             tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3385          if (index_size) {
3386             if (!info->take_index_buffer_ownership) {
3387                tc_set_resource_reference(&p->info.index.resource,
3388                                          info->index.resource);
3389             }
3390             tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource);
3391          }
3392          if (drawid_offset > 0)
3393             ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3394          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3395          /* u_threaded_context stores start/count in min/max_index for single draws. */
3396          p->info.min_index = draws[0].start;
3397          p->info.max_index = draws[0].count;
3398          p->index_bias = draws[0].index_bias;
3399       }
3400 
3401       /* This must be after tc_add_call, which can flush the batch. */
3402       if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3403          tc_add_all_gfx_bindings_to_buffer_list(tc);
3404       return;
3405    }
3406 
3407    const int draw_overhead_bytes = sizeof(struct tc_draw_multi);
3408    const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]);
3409    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3410                                                sizeof(struct tc_call_base));
3411    /* Multi draw. */
3412    if (index_size && has_user_indices) {
3413       struct pipe_resource *buffer = NULL;
3414       unsigned buffer_offset, total_count = 0;
3415       unsigned index_size_shift = util_logbase2(index_size);
3416       uint8_t *ptr = NULL;
3417 
3418       /* Get the total count. */
3419       for (unsigned i = 0; i < num_draws; i++)
3420          total_count += draws[i].count;
3421 
3422       if (!total_count)
3423          return;
3424 
3425       /* Allocate space for all index buffers.
3426        *
3427        * This must be done before adding draw_vbo, because it could generate
3428        * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3429        * to the driver if it was done afterwards.
3430        */
3431       u_upload_alloc(tc->base.stream_uploader, 0,
3432                      total_count << index_size_shift, 4,
3433                      &buffer_offset, &buffer, (void**)&ptr);
3434       if (unlikely(!buffer))
3435          return;
3436 
3437       int total_offset = 0;
3438       unsigned offset = 0;
3439       while (num_draws) {
3440          struct tc_batch *next = &tc->batch_slots[tc->next];
3441 
3442          int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3443          /* If there isn't enough place for one draw, try to fill the next one */
3444          if (nb_slots_left < slots_for_one_draw)
3445             nb_slots_left = TC_SLOTS_PER_BATCH;
3446          const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3447 
3448          /* How many draws can we fit in the current batch */
3449          const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3450 
3451          struct tc_draw_multi *p =
3452             tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3453                                    dr);
3454          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3455 
3456          if (total_offset == 0)
3457             /* the first slot inherits the reference from u_upload_alloc() */
3458             p->info.index.resource = buffer;
3459          else
3460             /* all following slots need a new reference */
3461             tc_set_resource_reference(&p->info.index.resource, buffer);
3462 
3463          p->num_draws = dr;
3464 
3465          /* Upload index buffers. */
3466          for (unsigned i = 0; i < dr; i++) {
3467             unsigned count = draws[i + total_offset].count;
3468 
3469             if (!count) {
3470                p->slot[i].start = 0;
3471                p->slot[i].count = 0;
3472                p->slot[i].index_bias = 0;
3473                continue;
3474             }
3475 
3476             unsigned size = count << index_size_shift;
3477             memcpy(ptr + offset,
3478                    (uint8_t*)info->index.user +
3479                    (draws[i + total_offset].start << index_size_shift), size);
3480             p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
3481             p->slot[i].count = count;
3482             p->slot[i].index_bias = draws[i + total_offset].index_bias;
3483             offset += size;
3484          }
3485 
3486          total_offset += dr;
3487          num_draws -= dr;
3488       }
3489    } else {
3490       int total_offset = 0;
3491       bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3492       while (num_draws) {
3493          struct tc_batch *next = &tc->batch_slots[tc->next];
3494 
3495          int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3496          /* If there isn't enough place for one draw, try to fill the next one */
3497          if (nb_slots_left < slots_for_one_draw)
3498             nb_slots_left = TC_SLOTS_PER_BATCH;
3499          const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3500 
3501          /* How many draws can we fit in the current batch */
3502          const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3503 
3504          /* Non-indexed call or indexed with a real index buffer. */
3505          struct tc_draw_multi *p =
3506             tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3507                                    dr);
3508          if (index_size) {
3509             if (!take_index_buffer_ownership) {
3510                tc_set_resource_reference(&p->info.index.resource,
3511                                          info->index.resource);
3512             }
3513             tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource);
3514          }
3515          take_index_buffer_ownership = false;
3516          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3517          p->num_draws = dr;
3518          memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3519          num_draws -= dr;
3520 
3521          total_offset += dr;
3522       }
3523    }
3524 
3525    /* This must be after tc_add_*call, which can flush the batch. */
3526    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3527       tc_add_all_gfx_bindings_to_buffer_list(tc);
3528 }
3529 
3530 struct tc_draw_vstate_single {
3531    struct tc_call_base base;
3532    struct pipe_draw_start_count_bias draw;
3533 
3534    /* The following states must be together without holes because they are
3535     * compared by draw merging.
3536     */
3537    struct pipe_vertex_state *state;
3538    uint32_t partial_velem_mask;
3539    struct pipe_draw_vertex_state_info info;
3540 };
3541 
3542 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)3543 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
3544                                      struct tc_draw_vstate_single *next)
3545 {
3546    if (next->base.call_id != TC_CALL_draw_vstate_single)
3547       return false;
3548 
3549    return !memcmp(&first->state, &next->state,
3550                   offsetof(struct tc_draw_vstate_single, info) +
3551                   sizeof(struct pipe_draw_vertex_state_info) -
3552                   offsetof(struct tc_draw_vstate_single, state));
3553 }
3554 
3555 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3556 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3557 {
3558    /* Draw call merging. */
3559    struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
3560    struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr;
3561    struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
3562 
3563    /* If at least 2 consecutive draw calls can be merged... */
3564    if (next != last &&
3565        is_next_call_a_mergeable_draw_vstate(first, next)) {
3566       /* The maximum number of merged draws is given by the batch size. */
3567       struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
3568                                               call_size(tc_draw_vstate_single)];
3569       unsigned num_draws = 2;
3570 
3571       draws[0] = first->draw;
3572       draws[1] = next->draw;
3573 
3574       /* Find how many other draws can be merged. */
3575       next = get_next_call(next, tc_draw_vstate_single);
3576       for (; next != last &&
3577            is_next_call_a_mergeable_draw_vstate(first, next);
3578            next = get_next_call(next, tc_draw_vstate_single),
3579            num_draws++)
3580          draws[num_draws] = next->draw;
3581 
3582       pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3583                               first->info, draws, num_draws);
3584       /* Since all draws use the same state, drop all references at once. */
3585       tc_drop_vertex_state_references(first->state, num_draws);
3586 
3587       return call_size(tc_draw_vstate_single) * num_draws;
3588    }
3589 
3590    pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3591                            first->info, &first->draw, 1);
3592    tc_drop_vertex_state_references(first->state, 1);
3593    return call_size(tc_draw_vstate_single);
3594 }
3595 
3596 struct tc_draw_vstate_multi {
3597    struct tc_call_base base;
3598    uint32_t partial_velem_mask;
3599    struct pipe_draw_vertex_state_info info;
3600    unsigned num_draws;
3601    struct pipe_vertex_state *state;
3602    struct pipe_draw_start_count_bias slot[0];
3603 };
3604 
3605 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call,uint64_t * last)3606 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3607 {
3608    struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
3609 
3610    pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
3611                            info->info, info->slot, info->num_draws);
3612    tc_drop_vertex_state_references(info->state, 1);
3613    return info->base.num_slots;
3614 }
3615 
3616 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3617 tc_draw_vertex_state(struct pipe_context *_pipe,
3618                      struct pipe_vertex_state *state,
3619                      uint32_t partial_velem_mask,
3620                      struct pipe_draw_vertex_state_info info,
3621                      const struct pipe_draw_start_count_bias *draws,
3622                      unsigned num_draws)
3623 {
3624    struct threaded_context *tc = threaded_context(_pipe);
3625 
3626    if (num_draws == 1) {
3627       /* Single draw. */
3628       struct tc_draw_vstate_single *p =
3629          tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
3630       p->partial_velem_mask = partial_velem_mask;
3631       p->draw = draws[0];
3632       p->info.mode = info.mode;
3633       p->info.take_vertex_state_ownership = false;
3634 
3635       /* This should be always 0 for simplicity because we assume that
3636        * index_bias doesn't vary.
3637        */
3638       assert(draws[0].index_bias == 0);
3639 
3640       if (!info.take_vertex_state_ownership)
3641          tc_set_vertex_state_reference(&p->state, state);
3642       else
3643          p->state = state;
3644 
3645 
3646       /* This must be after tc_add_*call, which can flush the batch. */
3647       if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3648          tc_add_all_gfx_bindings_to_buffer_list(tc);
3649       return;
3650    }
3651 
3652    const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
3653    const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
3654    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3655                                                sizeof(struct tc_call_base));
3656    /* Multi draw. */
3657    int total_offset = 0;
3658    bool take_vertex_state_ownership = info.take_vertex_state_ownership;
3659    while (num_draws) {
3660       struct tc_batch *next = &tc->batch_slots[tc->next];
3661 
3662       int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3663       /* If there isn't enough place for one draw, try to fill the next one */
3664       if (nb_slots_left < slots_for_one_draw)
3665          nb_slots_left = TC_SLOTS_PER_BATCH;
3666       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3667 
3668       /* How many draws can we fit in the current batch */
3669       const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3670 
3671       /* Non-indexed call or indexed with a real index buffer. */
3672       struct tc_draw_vstate_multi *p =
3673          tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
3674 
3675       if (!take_vertex_state_ownership)
3676          tc_set_vertex_state_reference(&p->state, state);
3677       else
3678          p->state = state;
3679 
3680       take_vertex_state_ownership = false;
3681       p->partial_velem_mask = partial_velem_mask;
3682       p->info.mode = info.mode;
3683       p->info.take_vertex_state_ownership = false;
3684       p->num_draws = dr;
3685       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3686       num_draws -= dr;
3687 
3688       total_offset += dr;
3689    }
3690 
3691 
3692    /* This must be after tc_add_*call, which can flush the batch. */
3693    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3694       tc_add_all_gfx_bindings_to_buffer_list(tc);
3695 }
3696 
3697 struct tc_launch_grid_call {
3698    struct tc_call_base base;
3699    struct pipe_grid_info info;
3700 };
3701 
3702 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call,uint64_t * last)3703 tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last)
3704 {
3705    struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
3706 
3707    pipe->launch_grid(pipe, p);
3708    tc_drop_resource_reference(p->indirect);
3709    return call_size(tc_launch_grid_call);
3710 }
3711 
3712 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)3713 tc_launch_grid(struct pipe_context *_pipe,
3714                const struct pipe_grid_info *info)
3715 {
3716    struct threaded_context *tc = threaded_context(_pipe);
3717    struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
3718                                                tc_launch_grid_call);
3719    assert(info->input == NULL);
3720 
3721    tc_set_resource_reference(&p->info.indirect, info->indirect);
3722    memcpy(&p->info, info, sizeof(*info));
3723 
3724    if (info->indirect)
3725       tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->indirect);
3726 
3727    /* This must be after tc_add_*call, which can flush the batch. */
3728    if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
3729       tc_add_all_compute_bindings_to_buffer_list(tc);
3730 }
3731 
3732 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call,uint64_t * last)3733 tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last)
3734 {
3735    struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
3736 
3737    pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
3738                               p->dstz, p->src, p->src_level, &p->src_box);
3739    tc_drop_resource_reference(p->dst);
3740    tc_drop_resource_reference(p->src);
3741    return call_size(tc_resource_copy_region);
3742 }
3743 
3744 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)3745 tc_resource_copy_region(struct pipe_context *_pipe,
3746                         struct pipe_resource *dst, unsigned dst_level,
3747                         unsigned dstx, unsigned dsty, unsigned dstz,
3748                         struct pipe_resource *src, unsigned src_level,
3749                         const struct pipe_box *src_box)
3750 {
3751    struct threaded_context *tc = threaded_context(_pipe);
3752    struct threaded_resource *tdst = threaded_resource(dst);
3753    struct tc_resource_copy_region *p =
3754       tc_add_call(tc, TC_CALL_resource_copy_region,
3755                   tc_resource_copy_region);
3756 
3757    if (dst->target == PIPE_BUFFER)
3758       tc_buffer_disable_cpu_storage(dst);
3759 
3760    tc_set_resource_reference(&p->dst, dst);
3761    p->dst_level = dst_level;
3762    p->dstx = dstx;
3763    p->dsty = dsty;
3764    p->dstz = dstz;
3765    tc_set_resource_reference(&p->src, src);
3766    p->src_level = src_level;
3767    p->src_box = *src_box;
3768 
3769    if (dst->target == PIPE_BUFFER) {
3770       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3771 
3772       tc_add_to_buffer_list(tc, next, src);
3773       tc_add_to_buffer_list(tc, next, dst);
3774 
3775       util_range_add(&tdst->b, &tdst->valid_buffer_range,
3776                      dstx, dstx + src_box->width);
3777    }
3778 }
3779 
3780 struct tc_blit_call {
3781    struct tc_call_base base;
3782    struct pipe_blit_info info;
3783 };
3784 
3785 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call,uint64_t * last)3786 tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last)
3787 {
3788    struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
3789 
3790    pipe->blit(pipe, blit);
3791    tc_drop_resource_reference(blit->dst.resource);
3792    tc_drop_resource_reference(blit->src.resource);
3793    return call_size(tc_blit_call);
3794 }
3795 
3796 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)3797 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
3798 {
3799    struct threaded_context *tc = threaded_context(_pipe);
3800    struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
3801 
3802    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
3803    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
3804    memcpy(&blit->info, info, sizeof(*info));
3805 }
3806 
3807 struct tc_generate_mipmap {
3808    struct tc_call_base base;
3809    enum pipe_format format;
3810    unsigned base_level;
3811    unsigned last_level;
3812    unsigned first_layer;
3813    unsigned last_layer;
3814    struct pipe_resource *res;
3815 };
3816 
3817 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call,uint64_t * last)3818 tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last)
3819 {
3820    struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
3821    ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
3822                                                     p->base_level,
3823                                                     p->last_level,
3824                                                     p->first_layer,
3825                                                     p->last_layer);
3826    assert(result);
3827    tc_drop_resource_reference(p->res);
3828    return call_size(tc_generate_mipmap);
3829 }
3830 
3831 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)3832 tc_generate_mipmap(struct pipe_context *_pipe,
3833                    struct pipe_resource *res,
3834                    enum pipe_format format,
3835                    unsigned base_level,
3836                    unsigned last_level,
3837                    unsigned first_layer,
3838                    unsigned last_layer)
3839 {
3840    struct threaded_context *tc = threaded_context(_pipe);
3841    struct pipe_context *pipe = tc->pipe;
3842    struct pipe_screen *screen = pipe->screen;
3843    unsigned bind = PIPE_BIND_SAMPLER_VIEW;
3844 
3845    if (util_format_is_depth_or_stencil(format))
3846       bind = PIPE_BIND_DEPTH_STENCIL;
3847    else
3848       bind = PIPE_BIND_RENDER_TARGET;
3849 
3850    if (!screen->is_format_supported(screen, format, res->target,
3851                                     res->nr_samples, res->nr_storage_samples,
3852                                     bind))
3853       return false;
3854 
3855    struct tc_generate_mipmap *p =
3856       tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
3857 
3858    tc_set_resource_reference(&p->res, res);
3859    p->format = format;
3860    p->base_level = base_level;
3861    p->last_level = last_level;
3862    p->first_layer = first_layer;
3863    p->last_layer = last_layer;
3864    return true;
3865 }
3866 
3867 struct tc_resource_call {
3868    struct tc_call_base base;
3869    struct pipe_resource *resource;
3870 };
3871 
3872 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call,uint64_t * last)3873 tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3874 {
3875    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3876 
3877    pipe->flush_resource(pipe, resource);
3878    tc_drop_resource_reference(resource);
3879    return call_size(tc_resource_call);
3880 }
3881 
3882 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3883 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
3884 {
3885    struct threaded_context *tc = threaded_context(_pipe);
3886    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
3887                                                tc_resource_call);
3888 
3889    tc_set_resource_reference(&call->resource, resource);
3890 }
3891 
3892 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call,uint64_t * last)3893 tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3894 {
3895    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3896 
3897    pipe->invalidate_resource(pipe, resource);
3898    tc_drop_resource_reference(resource);
3899    return call_size(tc_resource_call);
3900 }
3901 
3902 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3903 tc_invalidate_resource(struct pipe_context *_pipe,
3904                        struct pipe_resource *resource)
3905 {
3906    struct threaded_context *tc = threaded_context(_pipe);
3907 
3908    if (resource->target == PIPE_BUFFER) {
3909       /* This can fail, in which case we simply ignore the invalidation request. */
3910       struct threaded_resource *tbuf = threaded_resource(resource);
3911       tc_touch_buffer(tc, tbuf);
3912       tc_invalidate_buffer(tc, tbuf);
3913       return;
3914    }
3915 
3916    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
3917                                                tc_resource_call);
3918    tc_set_resource_reference(&call->resource, resource);
3919 }
3920 
3921 struct tc_clear {
3922    struct tc_call_base base;
3923    bool scissor_state_set;
3924    uint8_t stencil;
3925    uint16_t buffers;
3926    float depth;
3927    struct pipe_scissor_state scissor_state;
3928    union pipe_color_union color;
3929 };
3930 
3931 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call,uint64_t * last)3932 tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last)
3933 {
3934    struct tc_clear *p = to_call(call, tc_clear);
3935 
3936    pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
3937    return call_size(tc_clear);
3938 }
3939 
3940 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)3941 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
3942          const union pipe_color_union *color, double depth,
3943          unsigned stencil)
3944 {
3945    struct threaded_context *tc = threaded_context(_pipe);
3946    struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
3947 
3948    p->buffers = buffers;
3949    if (scissor_state)
3950       p->scissor_state = *scissor_state;
3951    p->scissor_state_set = !!scissor_state;
3952    p->color = *color;
3953    p->depth = depth;
3954    p->stencil = stencil;
3955 }
3956 
3957 struct tc_clear_render_target {
3958    struct tc_call_base base;
3959    bool render_condition_enabled;
3960    unsigned dstx;
3961    unsigned dsty;
3962    unsigned width;
3963    unsigned height;
3964    union pipe_color_union color;
3965    struct pipe_surface *dst;
3966 };
3967 
3968 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call,uint64_t * last)3969 tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last)
3970 {
3971    struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
3972 
3973    pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
3974                              p->render_condition_enabled);
3975    tc_drop_surface_reference(p->dst);
3976    return call_size(tc_clear_render_target);
3977 }
3978 
3979 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3980 tc_clear_render_target(struct pipe_context *_pipe,
3981                        struct pipe_surface *dst,
3982                        const union pipe_color_union *color,
3983                        unsigned dstx, unsigned dsty,
3984                        unsigned width, unsigned height,
3985                        bool render_condition_enabled)
3986 {
3987    struct threaded_context *tc = threaded_context(_pipe);
3988    struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
3989    p->dst = NULL;
3990    pipe_surface_reference(&p->dst, dst);
3991    p->color = *color;
3992    p->dstx = dstx;
3993    p->dsty = dsty;
3994    p->width = width;
3995    p->height = height;
3996    p->render_condition_enabled = render_condition_enabled;
3997 }
3998 
3999 
4000 struct tc_clear_depth_stencil {
4001    struct tc_call_base base;
4002    bool render_condition_enabled;
4003    float depth;
4004    unsigned clear_flags;
4005    unsigned stencil;
4006    unsigned dstx;
4007    unsigned dsty;
4008    unsigned width;
4009    unsigned height;
4010    struct pipe_surface *dst;
4011 };
4012 
4013 
4014 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call,uint64_t * last)4015 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last)
4016 {
4017    struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4018 
4019    pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4020                              p->dstx, p->dsty, p->width, p->height,
4021                              p->render_condition_enabled);
4022    tc_drop_surface_reference(p->dst);
4023    return call_size(tc_clear_depth_stencil);
4024 }
4025 
4026 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4027 tc_clear_depth_stencil(struct pipe_context *_pipe,
4028                        struct pipe_surface *dst, unsigned clear_flags,
4029                        double depth, unsigned stencil, unsigned dstx,
4030                        unsigned dsty, unsigned width, unsigned height,
4031                        bool render_condition_enabled)
4032 {
4033    struct threaded_context *tc = threaded_context(_pipe);
4034    struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4035    p->dst = NULL;
4036    pipe_surface_reference(&p->dst, dst);
4037    p->clear_flags = clear_flags;
4038    p->depth = depth;
4039    p->stencil = stencil;
4040    p->dstx = dstx;
4041    p->dsty = dsty;
4042    p->width = width;
4043    p->height = height;
4044    p->render_condition_enabled = render_condition_enabled;
4045 }
4046 
4047 struct tc_clear_buffer {
4048    struct tc_call_base base;
4049    uint8_t clear_value_size;
4050    unsigned offset;
4051    unsigned size;
4052    char clear_value[16];
4053    struct pipe_resource *res;
4054 };
4055 
4056 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call,uint64_t * last)4057 tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
4058 {
4059    struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4060 
4061    pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4062                       p->clear_value_size);
4063    tc_drop_resource_reference(p->res);
4064    return call_size(tc_clear_buffer);
4065 }
4066 
4067 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4068 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4069                 unsigned offset, unsigned size,
4070                 const void *clear_value, int clear_value_size)
4071 {
4072    struct threaded_context *tc = threaded_context(_pipe);
4073    struct threaded_resource *tres = threaded_resource(res);
4074    struct tc_clear_buffer *p =
4075       tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4076 
4077    tc_buffer_disable_cpu_storage(res);
4078 
4079    tc_set_resource_reference(&p->res, res);
4080    tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], res);
4081    p->offset = offset;
4082    p->size = size;
4083    memcpy(p->clear_value, clear_value, clear_value_size);
4084    p->clear_value_size = clear_value_size;
4085 
4086    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4087 }
4088 
4089 struct tc_clear_texture {
4090    struct tc_call_base base;
4091    unsigned level;
4092    struct pipe_box box;
4093    char data[16];
4094    struct pipe_resource *res;
4095 };
4096 
4097 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call,uint64_t * last)4098 tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last)
4099 {
4100    struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4101 
4102    pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4103    tc_drop_resource_reference(p->res);
4104    return call_size(tc_clear_texture);
4105 }
4106 
4107 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4108 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4109                  unsigned level, const struct pipe_box *box, const void *data)
4110 {
4111    struct threaded_context *tc = threaded_context(_pipe);
4112    struct tc_clear_texture *p =
4113       tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4114 
4115    tc_set_resource_reference(&p->res, res);
4116    p->level = level;
4117    p->box = *box;
4118    memcpy(p->data, data,
4119           util_format_get_blocksize(res->format));
4120 }
4121 
4122 struct tc_resource_commit {
4123    struct tc_call_base base;
4124    bool commit;
4125    unsigned level;
4126    struct pipe_box box;
4127    struct pipe_resource *res;
4128 };
4129 
4130 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call,uint64_t * last)4131 tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last)
4132 {
4133    struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4134 
4135    pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4136    tc_drop_resource_reference(p->res);
4137    return call_size(tc_resource_commit);
4138 }
4139 
4140 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4141 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4142                    unsigned level, struct pipe_box *box, bool commit)
4143 {
4144    struct threaded_context *tc = threaded_context(_pipe);
4145    struct tc_resource_commit *p =
4146       tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4147 
4148    tc_set_resource_reference(&p->res, res);
4149    p->level = level;
4150    p->box = *box;
4151    p->commit = commit;
4152    return true; /* we don't care about the return value for this call */
4153 }
4154 
4155 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4156 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4157 {
4158    struct threaded_context *tc = threaded_context(_pipe);
4159    struct pipe_context *pipe = tc->pipe;
4160 
4161    return pipe->init_intel_perf_query_info(pipe);
4162 }
4163 
4164 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4165 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4166                              unsigned query_index,
4167                              const char **name,
4168                              uint32_t *data_size,
4169                              uint32_t *n_counters,
4170                              uint32_t *n_active)
4171 {
4172    struct threaded_context *tc = threaded_context(_pipe);
4173    struct pipe_context *pipe = tc->pipe;
4174 
4175    tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4176    pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4177          n_counters, n_active);
4178 }
4179 
4180 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4181 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4182                                      unsigned query_index,
4183                                      unsigned counter_index,
4184                                      const char **name,
4185                                      const char **desc,
4186                                      uint32_t *offset,
4187                                      uint32_t *data_size,
4188                                      uint32_t *type_enum,
4189                                      uint32_t *data_type_enum,
4190                                      uint64_t *raw_max)
4191 {
4192    struct threaded_context *tc = threaded_context(_pipe);
4193    struct pipe_context *pipe = tc->pipe;
4194 
4195    pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4196          name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4197 }
4198 
4199 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4200 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4201 {
4202    struct threaded_context *tc = threaded_context(_pipe);
4203    struct pipe_context *pipe = tc->pipe;
4204 
4205    return pipe->new_intel_perf_query_obj(pipe, query_index);
4206 }
4207 
4208 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4209 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4210 {
4211    (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4212    return call_size(tc_query_call);
4213 }
4214 
4215 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4216 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4217 {
4218    struct threaded_context *tc = threaded_context(_pipe);
4219 
4220    tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4221 
4222    /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4223    return true;
4224 }
4225 
4226 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4227 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4228 {
4229    pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4230    return call_size(tc_query_call);
4231 }
4232 
4233 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4234 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4235 {
4236    struct threaded_context *tc = threaded_context(_pipe);
4237 
4238    tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4239 }
4240 
4241 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4242 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4243 {
4244    struct threaded_context *tc = threaded_context(_pipe);
4245    struct pipe_context *pipe = tc->pipe;
4246 
4247    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4248    pipe->delete_intel_perf_query(pipe, q);
4249 }
4250 
4251 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4252 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4253 {
4254    struct threaded_context *tc = threaded_context(_pipe);
4255    struct pipe_context *pipe = tc->pipe;
4256 
4257    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4258    pipe->wait_intel_perf_query(pipe, q);
4259 }
4260 
4261 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)4262 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4263 {
4264    struct threaded_context *tc = threaded_context(_pipe);
4265    struct pipe_context *pipe = tc->pipe;
4266 
4267    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4268    return pipe->is_intel_perf_query_ready(pipe, q);
4269 }
4270 
4271 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)4272 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4273                              struct pipe_query *q,
4274                              size_t data_size,
4275                              uint32_t *data,
4276                              uint32_t *bytes_written)
4277 {
4278    struct threaded_context *tc = threaded_context(_pipe);
4279    struct pipe_context *pipe = tc->pipe;
4280 
4281    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4282    return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4283 }
4284 
4285 /********************************************************************
4286  * callback
4287  */
4288 
4289 struct tc_callback_call {
4290    struct tc_call_base base;
4291    void (*fn)(void *data);
4292    void *data;
4293 };
4294 
4295 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call,uint64_t * last)4296 tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last)
4297 {
4298    struct tc_callback_call *p = to_call(call, tc_callback_call);
4299 
4300    p->fn(p->data);
4301    return call_size(tc_callback_call);
4302 }
4303 
4304 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)4305 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
4306             bool asap)
4307 {
4308    struct threaded_context *tc = threaded_context(_pipe);
4309 
4310    if (asap && tc_is_sync(tc)) {
4311       fn(data);
4312       return;
4313    }
4314 
4315    struct tc_callback_call *p =
4316       tc_add_call(tc, TC_CALL_callback, tc_callback_call);
4317    p->fn = fn;
4318    p->data = data;
4319 }
4320 
4321 
4322 /********************************************************************
4323  * create & destroy
4324  */
4325 
4326 static void
tc_destroy(struct pipe_context * _pipe)4327 tc_destroy(struct pipe_context *_pipe)
4328 {
4329    struct threaded_context *tc = threaded_context(_pipe);
4330    struct pipe_context *pipe = tc->pipe;
4331 
4332    if (tc->base.const_uploader &&
4333        tc->base.stream_uploader != tc->base.const_uploader)
4334       u_upload_destroy(tc->base.const_uploader);
4335 
4336    if (tc->base.stream_uploader)
4337       u_upload_destroy(tc->base.stream_uploader);
4338 
4339    tc_sync(tc);
4340 
4341    if (util_queue_is_initialized(&tc->queue)) {
4342       util_queue_destroy(&tc->queue);
4343 
4344       for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4345          util_queue_fence_destroy(&tc->batch_slots[i].fence);
4346          assert(!tc->batch_slots[i].token);
4347       }
4348    }
4349 
4350    slab_destroy_child(&tc->pool_transfers);
4351    assert(tc->batch_slots[tc->next].num_total_slots == 0);
4352    pipe->destroy(pipe);
4353 
4354    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
4355       if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
4356          util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
4357       util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
4358    }
4359 
4360    FREE(tc);
4361 }
4362 
4363 static const tc_execute execute_func[TC_NUM_CALLS] = {
4364 #define CALL(name) tc_call_##name,
4365 #include "u_threaded_context_calls.h"
4366 #undef CALL
4367 };
4368 
tc_driver_internal_flush_notify(struct threaded_context * tc)4369 void tc_driver_internal_flush_notify(struct threaded_context *tc)
4370 {
4371    /* Allow drivers to call this function even for internal contexts that
4372     * don't have tc. It simplifies drivers.
4373     */
4374    if (!tc)
4375       return;
4376 
4377    /* Signal fences set by tc_batch_execute. */
4378    for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
4379       util_queue_fence_signal(tc->signal_fences_next_flush[i]);
4380 
4381    tc->num_signal_fences_next_flush = 0;
4382 }
4383 
4384 /**
4385  * Wrap an existing pipe_context into a threaded_context.
4386  *
4387  * \param pipe                 pipe_context to wrap
4388  * \param parent_transfer_pool parent slab pool set up for creating pipe_-
4389  *                             transfer objects; the driver should have one
4390  *                             in pipe_screen.
4391  * \param replace_buffer  callback for replacing a pipe_resource's storage
4392  *                        with another pipe_resource's storage.
4393  * \param options         optional TC options/callbacks
4394  * \param out  if successful, the threaded_context will be returned here in
4395  *             addition to the return value if "out" != NULL
4396  */
4397 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)4398 threaded_context_create(struct pipe_context *pipe,
4399                         struct slab_parent_pool *parent_transfer_pool,
4400                         tc_replace_buffer_storage_func replace_buffer,
4401                         const struct threaded_context_options *options,
4402                         struct threaded_context **out)
4403 {
4404    struct threaded_context *tc;
4405 
4406    if (!pipe)
4407       return NULL;
4408 
4409    if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
4410       return pipe;
4411 
4412    tc = CALLOC_STRUCT(threaded_context);
4413    if (!tc) {
4414       pipe->destroy(pipe);
4415       return NULL;
4416    }
4417 
4418    if (options)
4419       tc->options = *options;
4420 
4421    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
4422 
4423    /* The driver context isn't wrapped, so set its "priv" to NULL. */
4424    pipe->priv = NULL;
4425 
4426    tc->pipe = pipe;
4427    tc->replace_buffer_storage = replace_buffer;
4428    tc->map_buffer_alignment =
4429       pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
4430    tc->ubo_alignment =
4431       MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
4432    tc->base.priv = pipe; /* priv points to the wrapped driver context */
4433    tc->base.screen = pipe->screen;
4434    tc->base.destroy = tc_destroy;
4435    tc->base.callback = tc_callback;
4436 
4437    tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
4438    if (pipe->stream_uploader == pipe->const_uploader)
4439       tc->base.const_uploader = tc->base.stream_uploader;
4440    else
4441       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
4442 
4443    if (!tc->base.stream_uploader || !tc->base.const_uploader)
4444       goto fail;
4445 
4446    tc->use_forced_staging_uploads = true;
4447 
4448    /* The queue size is the number of batches "waiting". Batches are removed
4449     * from the queue before being executed, so keep one tc_batch slot for that
4450     * execution. Also, keep one unused slot for an unflushed batch.
4451     */
4452    if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
4453       goto fail;
4454 
4455    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4456 #if !defined(NDEBUG) && TC_DEBUG >= 1
4457       tc->batch_slots[i].sentinel = TC_SENTINEL;
4458 #endif
4459       tc->batch_slots[i].tc = tc;
4460       util_queue_fence_init(&tc->batch_slots[i].fence);
4461    }
4462    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
4463       util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
4464 
4465    list_inithead(&tc->unflushed_queries);
4466 
4467    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
4468 
4469    /* If you have different limits in each shader stage, set the maximum. */
4470    struct pipe_screen *screen = pipe->screen;;
4471    tc->max_vertex_buffers =
4472       screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
4473    tc->max_const_buffers =
4474       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4475                                PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
4476    tc->max_shader_buffers =
4477       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4478                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
4479    tc->max_images =
4480       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4481                                PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
4482    tc->max_samplers =
4483       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4484                                PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
4485 
4486    tc->base.set_context_param = tc_set_context_param; /* always set this */
4487 
4488 #define CTX_INIT(_member) \
4489    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
4490 
4491    CTX_INIT(flush);
4492    CTX_INIT(draw_vbo);
4493    CTX_INIT(draw_vertex_state);
4494    CTX_INIT(launch_grid);
4495    CTX_INIT(resource_copy_region);
4496    CTX_INIT(blit);
4497    CTX_INIT(clear);
4498    CTX_INIT(clear_render_target);
4499    CTX_INIT(clear_depth_stencil);
4500    CTX_INIT(clear_buffer);
4501    CTX_INIT(clear_texture);
4502    CTX_INIT(flush_resource);
4503    CTX_INIT(generate_mipmap);
4504    CTX_INIT(render_condition);
4505    CTX_INIT(create_query);
4506    CTX_INIT(create_batch_query);
4507    CTX_INIT(destroy_query);
4508    CTX_INIT(begin_query);
4509    CTX_INIT(end_query);
4510    CTX_INIT(get_query_result);
4511    CTX_INIT(get_query_result_resource);
4512    CTX_INIT(set_active_query_state);
4513    CTX_INIT(create_blend_state);
4514    CTX_INIT(bind_blend_state);
4515    CTX_INIT(delete_blend_state);
4516    CTX_INIT(create_sampler_state);
4517    CTX_INIT(bind_sampler_states);
4518    CTX_INIT(delete_sampler_state);
4519    CTX_INIT(create_rasterizer_state);
4520    CTX_INIT(bind_rasterizer_state);
4521    CTX_INIT(delete_rasterizer_state);
4522    CTX_INIT(create_depth_stencil_alpha_state);
4523    CTX_INIT(bind_depth_stencil_alpha_state);
4524    CTX_INIT(delete_depth_stencil_alpha_state);
4525    CTX_INIT(link_shader);
4526    CTX_INIT(create_fs_state);
4527    CTX_INIT(bind_fs_state);
4528    CTX_INIT(delete_fs_state);
4529    CTX_INIT(create_vs_state);
4530    CTX_INIT(bind_vs_state);
4531    CTX_INIT(delete_vs_state);
4532    CTX_INIT(create_gs_state);
4533    CTX_INIT(bind_gs_state);
4534    CTX_INIT(delete_gs_state);
4535    CTX_INIT(create_tcs_state);
4536    CTX_INIT(bind_tcs_state);
4537    CTX_INIT(delete_tcs_state);
4538    CTX_INIT(create_tes_state);
4539    CTX_INIT(bind_tes_state);
4540    CTX_INIT(delete_tes_state);
4541    CTX_INIT(create_compute_state);
4542    CTX_INIT(bind_compute_state);
4543    CTX_INIT(delete_compute_state);
4544    CTX_INIT(create_vertex_elements_state);
4545    CTX_INIT(bind_vertex_elements_state);
4546    CTX_INIT(delete_vertex_elements_state);
4547    CTX_INIT(set_blend_color);
4548    CTX_INIT(set_stencil_ref);
4549    CTX_INIT(set_sample_mask);
4550    CTX_INIT(set_min_samples);
4551    CTX_INIT(set_clip_state);
4552    CTX_INIT(set_constant_buffer);
4553    CTX_INIT(set_inlinable_constants);
4554    CTX_INIT(set_framebuffer_state);
4555    CTX_INIT(set_polygon_stipple);
4556    CTX_INIT(set_sample_locations);
4557    CTX_INIT(set_scissor_states);
4558    CTX_INIT(set_viewport_states);
4559    CTX_INIT(set_window_rectangles);
4560    CTX_INIT(set_sampler_views);
4561    CTX_INIT(set_tess_state);
4562    CTX_INIT(set_patch_vertices);
4563    CTX_INIT(set_shader_buffers);
4564    CTX_INIT(set_shader_images);
4565    CTX_INIT(set_vertex_buffers);
4566    CTX_INIT(create_stream_output_target);
4567    CTX_INIT(stream_output_target_destroy);
4568    CTX_INIT(set_stream_output_targets);
4569    CTX_INIT(create_sampler_view);
4570    CTX_INIT(sampler_view_destroy);
4571    CTX_INIT(create_surface);
4572    CTX_INIT(surface_destroy);
4573    CTX_INIT(buffer_map);
4574    CTX_INIT(texture_map);
4575    CTX_INIT(transfer_flush_region);
4576    CTX_INIT(buffer_unmap);
4577    CTX_INIT(texture_unmap);
4578    CTX_INIT(buffer_subdata);
4579    CTX_INIT(texture_subdata);
4580    CTX_INIT(texture_barrier);
4581    CTX_INIT(memory_barrier);
4582    CTX_INIT(resource_commit);
4583    CTX_INIT(create_video_codec);
4584    CTX_INIT(create_video_buffer);
4585    CTX_INIT(set_compute_resources);
4586    CTX_INIT(set_global_binding);
4587    CTX_INIT(get_sample_position);
4588    CTX_INIT(invalidate_resource);
4589    CTX_INIT(get_device_reset_status);
4590    CTX_INIT(set_device_reset_callback);
4591    CTX_INIT(dump_debug_state);
4592    CTX_INIT(set_log_context);
4593    CTX_INIT(emit_string_marker);
4594    CTX_INIT(set_debug_callback);
4595    CTX_INIT(create_fence_fd);
4596    CTX_INIT(fence_server_sync);
4597    CTX_INIT(fence_server_signal);
4598    CTX_INIT(get_timestamp);
4599    CTX_INIT(create_texture_handle);
4600    CTX_INIT(delete_texture_handle);
4601    CTX_INIT(make_texture_handle_resident);
4602    CTX_INIT(create_image_handle);
4603    CTX_INIT(delete_image_handle);
4604    CTX_INIT(make_image_handle_resident);
4605    CTX_INIT(set_frontend_noop);
4606    CTX_INIT(init_intel_perf_query_info);
4607    CTX_INIT(get_intel_perf_query_info);
4608    CTX_INIT(get_intel_perf_query_counter_info);
4609    CTX_INIT(new_intel_perf_query_obj);
4610    CTX_INIT(begin_intel_perf_query);
4611    CTX_INIT(end_intel_perf_query);
4612    CTX_INIT(delete_intel_perf_query);
4613    CTX_INIT(wait_intel_perf_query);
4614    CTX_INIT(is_intel_perf_query_ready);
4615    CTX_INIT(get_intel_perf_query_data);
4616 #undef CTX_INIT
4617 
4618    if (out)
4619       *out = tc;
4620 
4621    tc_begin_next_buffer_list(tc);
4622    return &tc->base;
4623 
4624 fail:
4625    tc_destroy(&tc->base);
4626    return NULL;
4627 }
4628 
4629 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)4630 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
4631 {
4632    uint64_t total_ram;
4633    if (os_get_total_physical_memory(&total_ram)) {
4634       tc->bytes_mapped_limit = total_ram / divisor;
4635       if (sizeof(void*) == 4)
4636          tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
4637    }
4638 }
4639