• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "util/perf/cpu_trace.h"
36 #include "util/thread_sched.h"
37 #include "compiler/shader_info.h"
38 
39 #if TC_DEBUG >= 1
40 #define tc_assert assert
41 #else
42 #define tc_assert(x)
43 #endif
44 
45 #if TC_DEBUG >= 2
46 #define tc_printf mesa_logi
47 #define tc_asprintf asprintf
48 #define tc_strcmp strcmp
49 #else
50 #define tc_printf(...)
51 #define tc_asprintf(...) 0
52 #define tc_strcmp(...) 0
53 #endif
54 
55 #define TC_SENTINEL 0x5ca1ab1e
56 
57 #if TC_DEBUG >= 3 || defined(TC_TRACE)
58 static const char *tc_call_names[] = {
59 #define CALL(name) #name,
60 #include "u_threaded_context_calls.h"
61 #undef CALL
62 };
63 #endif
64 
65 #ifdef TC_TRACE
66 #  define TC_TRACE_SCOPE(call_id) MESA_TRACE_SCOPE(tc_call_names[call_id])
67 #else
68 #  define TC_TRACE_SCOPE(call_id)
69 #endif
70 
71 static void
72 tc_buffer_subdata(struct pipe_context *_pipe,
73                   struct pipe_resource *resource,
74                   unsigned usage, unsigned offset,
75                   unsigned size, const void *data);
76 
77 static void
tc_batch_check(UNUSED struct tc_batch * batch)78 tc_batch_check(UNUSED struct tc_batch *batch)
79 {
80    tc_assert(batch->sentinel == TC_SENTINEL);
81    tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
82 }
83 
84 static void
tc_debug_check(struct threaded_context * tc)85 tc_debug_check(struct threaded_context *tc)
86 {
87    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
88       tc_batch_check(&tc->batch_slots[i]);
89       tc_assert(tc->batch_slots[i].tc == tc);
90    }
91 }
92 
93 static void
tc_set_driver_thread(struct threaded_context * tc)94 tc_set_driver_thread(struct threaded_context *tc)
95 {
96 #ifndef NDEBUG
97    tc->driver_thread = thrd_current();
98 #endif
99 }
100 
101 static void
tc_clear_driver_thread(struct threaded_context * tc)102 tc_clear_driver_thread(struct threaded_context *tc)
103 {
104 #ifndef NDEBUG
105    memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
106 #endif
107 }
108 
109 struct tc_batch_rp_info {
110    /* this is what drivers can see */
111    struct tc_renderpass_info info;
112    /* determines whether the info can be "safely" read by drivers or if it may still be in use */
113    struct util_queue_fence ready;
114    /* when a batch is full, the rp info rollsover onto 'next' */
115    struct tc_batch_rp_info *next;
116    /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */
117    struct tc_batch_rp_info *prev;
118 };
119 
120 static struct tc_batch_rp_info *
tc_batch_rp_info(struct tc_renderpass_info * info)121 tc_batch_rp_info(struct tc_renderpass_info *info)
122 {
123    return (struct tc_batch_rp_info *)info;
124 }
125 
126 static void
tc_sanitize_renderpass_info(struct threaded_context * tc)127 tc_sanitize_renderpass_info(struct threaded_context *tc)
128 {
129    tc->renderpass_info_recording->cbuf_invalidate = 0;
130    tc->renderpass_info_recording->zsbuf_invalidate = false;
131    tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS);
132    if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording))
133       /* this should be a "safe" way to indicate to the driver that both loads and stores are required;
134       * driver can always detect invalidation
135       */
136       tc->renderpass_info_recording->zsbuf_clear_partial = true;
137    if (tc->num_queries_active)
138       tc->renderpass_info_recording->has_query_ends = true;
139 }
140 
141 /* ensure the batch's array of renderpass data is large enough for the current index */
142 static void
tc_batch_renderpass_infos_resize(struct threaded_context * tc,struct tc_batch * batch)143 tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch)
144 {
145    unsigned size = batch->renderpass_infos.capacity;
146    unsigned cur_num = MAX2(batch->renderpass_info_idx, 0);
147 
148    if (size / sizeof(struct tc_batch_rp_info) > cur_num)
149       return;
150 
151    struct tc_batch_rp_info *infos = batch->renderpass_infos.data;
152    unsigned old_idx = batch->renderpass_info_idx - 1;
153    bool redo = tc->renderpass_info_recording &&
154                tc->renderpass_info_recording == &infos[old_idx].info;
155    if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10))
156       mesa_loge("tc: memory alloc fail!");
157 
158    if (size != batch->renderpass_infos.capacity) {
159       /* zero new allocation region */
160       uint8_t *data = batch->renderpass_infos.data;
161       memset(data + size, 0, batch->renderpass_infos.capacity - size);
162       unsigned start = size / sizeof(struct tc_batch_rp_info);
163       unsigned count = (batch->renderpass_infos.capacity - size) /
164                        sizeof(struct tc_batch_rp_info);
165       infos = batch->renderpass_infos.data;
166       if (infos->prev)
167          infos->prev->next = infos;
168       for (unsigned i = 0; i < count; i++)
169          util_queue_fence_init(&infos[start + i].ready);
170       /* re-set current recording info on resize */
171       if (redo)
172          tc->renderpass_info_recording = &infos[old_idx].info;
173    }
174 }
175 
176 /* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
177 static void
tc_signal_renderpass_info_ready(struct threaded_context * tc)178 tc_signal_renderpass_info_ready(struct threaded_context *tc)
179 {
180    if (tc->renderpass_info_recording &&
181        !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready))
182       util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready);
183 }
184 
185 /* increment the current renderpass info struct for recording
186  * 'full_copy' is used for preserving data across non-blocking tc batch flushes
187  */
188 static void
tc_batch_increment_renderpass_info(struct threaded_context * tc,unsigned batch_idx,bool full_copy)189 tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy)
190 {
191    struct tc_batch *batch = &tc->batch_slots[batch_idx];
192    struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data;
193 
194    if (tc_info[0].next || batch->num_total_slots) {
195       /* deadlock condition detected: all batches are in flight, renderpass hasn't ended
196        * (probably a cts case)
197        */
198       struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording);
199       if (!util_queue_fence_is_signalled(&info->ready)) {
200          /* this batch is actively executing and the driver is waiting on the recording fence to signal */
201          /* force all buffer usage to avoid data loss */
202          info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear);
203          info->info.zsbuf_clear_partial = true;
204          info->info.has_query_ends = tc->num_queries_active > 0;
205          /* ensure threaded_context_get_renderpass_info() won't deadlock */
206          info->next = NULL;
207          util_queue_fence_signal(&info->ready);
208       }
209       /* always wait on the batch to finish since this will otherwise overwrite thread data */
210       util_queue_fence_wait(&batch->fence);
211    }
212    /* increment rp info and initialize it */
213    batch->renderpass_info_idx++;
214    tc_batch_renderpass_infos_resize(tc, batch);
215    tc_info = batch->renderpass_infos.data;
216 
217    if (full_copy) {
218       /* this should only be called when changing batches */
219       assert(batch->renderpass_info_idx == 0);
220       /* copy the previous data in its entirety: this is still the same renderpass */
221       if (tc->renderpass_info_recording) {
222          tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data;
223          tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx];
224          tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording);
225          /* guard against deadlock scenario */
226          assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording);
227       } else {
228          tc_info[batch->renderpass_info_idx].info.data = 0;
229          tc_info[batch->renderpass_info_idx].prev = NULL;
230       }
231    } else {
232       /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
233       tc_info[batch->renderpass_info_idx].info.data = 0;
234       if (tc->renderpass_info_recording) {
235          tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2];
236          tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL;
237          tc_info[batch->renderpass_info_idx].prev = NULL;
238       }
239    }
240 
241    assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next);
242    /* signal existing info since it will not be used anymore */
243    tc_signal_renderpass_info_ready(tc);
244    util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
245    /* guard against deadlock scenario */
246    assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info);
247    /* this is now the current recording renderpass info */
248    tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info;
249    batch->max_renderpass_info_idx = batch->renderpass_info_idx;
250 }
251 
252 static ALWAYS_INLINE struct tc_renderpass_info *
tc_get_renderpass_info(struct threaded_context * tc)253 tc_get_renderpass_info(struct threaded_context *tc)
254 {
255    return tc->renderpass_info_recording;
256 }
257 
258 /* update metadata at draw time */
259 static void
tc_parse_draw(struct threaded_context * tc)260 tc_parse_draw(struct threaded_context *tc)
261 {
262    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
263 
264    if (info) {
265       /* all buffers that aren't cleared are considered loaded */
266       info->cbuf_load |= ~info->cbuf_clear;
267       if (!info->zsbuf_clear)
268          info->zsbuf_load = true;
269       /* previous invalidates are no longer relevant */
270       info->cbuf_invalidate = 0;
271       info->zsbuf_invalidate = false;
272       info->has_draw = true;
273       info->has_query_ends |= tc->query_ended;
274    }
275 
276    tc->in_renderpass = true;
277    tc->seen_fb_state = true;
278    tc->query_ended = false;
279 }
280 
281 static void *
to_call_check(void * ptr,unsigned num_slots)282 to_call_check(void *ptr, unsigned num_slots)
283 {
284 #if TC_DEBUG >= 1
285    struct tc_call_base *call = ptr;
286    tc_assert(call->num_slots == num_slots);
287 #endif
288    return ptr;
289 }
290 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
291 
292 #define size_to_slots(size)      DIV_ROUND_UP(size, 8)
293 #define call_size(type)          size_to_slots(sizeof(struct type))
294 #define call_size_with_slots(type, num_slots) size_to_slots( \
295    sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
296 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
297 
298 ALWAYS_INLINE static void
tc_set_resource_batch_usage(struct threaded_context * tc,struct pipe_resource * pres)299 tc_set_resource_batch_usage(struct threaded_context *tc, struct pipe_resource *pres)
300 {
301    /* ignore batch usage when persistent */
302    if (threaded_resource(pres)->last_batch_usage != INT8_MAX)
303       threaded_resource(pres)->last_batch_usage = tc->next;
304    threaded_resource(pres)->batch_generation = tc->batch_generation;
305 }
306 
307 ALWAYS_INLINE static void
tc_set_resource_batch_usage_persistent(struct threaded_context * tc,struct pipe_resource * pres,bool enable)308 tc_set_resource_batch_usage_persistent(struct threaded_context *tc, struct pipe_resource *pres, bool enable)
309 {
310    if (!pres)
311       return;
312    /* mark with special value to block any unsynchronized access */
313    threaded_resource(pres)->last_batch_usage = enable ? INT8_MAX : tc->next;
314    threaded_resource(pres)->batch_generation = tc->batch_generation;
315 }
316 
317 /* this can ONLY be used to check against the currently recording batch */
318 ALWAYS_INLINE static bool
tc_resource_batch_usage_test_busy(const struct threaded_context * tc,const struct pipe_resource * pres)319 tc_resource_batch_usage_test_busy(const struct threaded_context *tc, const struct pipe_resource *pres)
320 {
321    const struct threaded_resource *tbuf = (const struct threaded_resource*)pres;
322 
323    if (!tc->options.unsynchronized_texture_subdata)
324       return true;
325 
326    /* resource has persistent access: assume always busy */
327    if (tbuf->last_batch_usage == INT8_MAX)
328       return true;
329 
330    /* resource has never been seen */
331    if (tbuf->last_batch_usage == -1)
332       return false;
333 
334    /* resource has been seen but no batches have executed */
335    if (tc->last_completed == -1)
336       return true;
337 
338    /* begin comparisons checking number of times batches have cycled */
339    unsigned diff = tc->batch_generation - tbuf->batch_generation;
340    /* resource has been seen, batches have fully cycled at least once */
341    if (diff > 1)
342       return false;
343 
344    /* resource has been seen in current batch cycle: return whether batch has definitely completed */
345    if (diff == 0)
346       return tc->last_completed >= tbuf->last_batch_usage;
347 
348    /* resource has been seen within one batch cycle: check for batch wrapping */
349    if (tc->last_completed >= tbuf->last_batch_usage)
350       /* this or a subsequent pre-wrap batch was the last to definitely complete: resource is idle */
351       return false;
352 
353    /* batch execution has not definitely wrapped: resource is definitely not idle */
354    if (tc->last_completed > tc->next)
355       return true;
356 
357    /* resource was seen pre-wrap, batch execution has definitely wrapped: idle */
358    if (tbuf->last_batch_usage > tc->last_completed)
359       return false;
360 
361    /* tc->last_completed is not an exact measurement, so anything else is considered busy */
362    return true;
363 }
364 
365 /* Assign src to dst while dst is uninitialized. */
366 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)367 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
368 {
369    *dst = src;
370    pipe_reference(NULL, &src->reference); /* only increment refcount */
371 }
372 
373 /* Assign src to dst while dst is uninitialized. */
374 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)375 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
376                               struct pipe_vertex_state *src)
377 {
378    *dst = src;
379    pipe_reference(NULL, &src->reference); /* only increment refcount */
380 }
381 
382 /* Unreference dst but don't touch the dst pointer. */
383 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)384 tc_drop_resource_reference(struct pipe_resource *dst)
385 {
386    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
387       pipe_resource_destroy(dst);
388 }
389 
390 /* Unreference dst but don't touch the dst pointer. */
391 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)392 tc_drop_surface_reference(struct pipe_surface *dst)
393 {
394    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
395       dst->context->surface_destroy(dst->context, dst);
396 }
397 
398 /* Unreference dst but don't touch the dst pointer. */
399 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)400 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
401 {
402    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
403       dst->context->stream_output_target_destroy(dst->context, dst);
404 }
405 
406 /**
407  * Subtract the given number of references.
408  */
409 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)410 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
411 {
412    int count = p_atomic_add_return(&dst->reference.count, -num_refs);
413 
414    assert(count >= 0);
415    /* Underflows shouldn't happen, but let's be safe. */
416    if (count <= 0)
417       dst->screen->vertex_state_destroy(dst->screen, dst);
418 }
419 
420 /* We don't want to read or write min_index and max_index, because
421  * it shouldn't be needed by drivers at this point.
422  */
423 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
424    offsetof(struct pipe_draw_info, min_index)
425 
426 ALWAYS_INLINE static struct tc_renderpass_info *
incr_rp_info(struct tc_renderpass_info * tc_info)427 incr_rp_info(struct tc_renderpass_info *tc_info)
428 {
429    struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info);
430    return &info[1].info;
431 }
432 
433 ALWAYS_INLINE static void
batch_execute(struct tc_batch * batch,struct pipe_context * pipe,uint64_t * last,bool parsing)434 batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
435 {
436    /* if the framebuffer state is persisting from a previous batch,
437     * begin incrementing renderpass info on the first set_framebuffer_state call
438     */
439    bool first = !batch->first_set_fb;
440    const tc_execute *execute_func = batch->tc->execute_func;
441 
442    for (uint64_t *iter = batch->slots; iter != last;) {
443       struct tc_call_base *call = (struct tc_call_base *)iter;
444 
445       tc_assert(call->sentinel == TC_SENTINEL);
446 
447 #if TC_DEBUG >= 3
448       tc_printf("CALL: %s", tc_call_names[call->call_id]);
449 #endif
450 
451       TC_TRACE_SCOPE(call->call_id);
452 
453       iter += execute_func[call->call_id](pipe, call);
454 
455       if (parsing) {
456          if (call->call_id == TC_CALL_flush) {
457             /* always increment renderpass info for non-deferred flushes */
458             batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
459             /* if a flush happens, renderpass info is always incremented after */
460             first = false;
461          } else if (call->call_id == TC_CALL_set_framebuffer_state) {
462             /* the renderpass info pointer is already set at the start of the batch,
463              * so don't increment on the first set_framebuffer_state call
464              */
465             if (!first)
466                batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
467             first = false;
468          } else if (call->call_id >= TC_CALL_draw_single &&
469                     call->call_id <= TC_CALL_draw_vstate_multi) {
470             /* if a draw happens before a set_framebuffer_state on this batch,
471              * begin incrementing renderpass data
472              */
473             first = false;
474          }
475       }
476    }
477 }
478 
479 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)480 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
481 {
482    struct tc_batch *batch = job;
483    struct pipe_context *pipe = batch->tc->pipe;
484    uint64_t *last = &batch->slots[batch->num_total_slots];
485 
486    tc_batch_check(batch);
487    tc_set_driver_thread(batch->tc);
488 
489    assert(!batch->token);
490 
491    /* setup renderpass info */
492    batch->tc->renderpass_info = batch->renderpass_infos.data;
493 
494    if (batch->tc->options.parse_renderpass_info) {
495       batch_execute(batch, pipe, last, true);
496 
497       struct tc_batch_rp_info *info = batch->renderpass_infos.data;
498       for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) {
499          if (info[i].next)
500             info[i].next->prev = NULL;
501          info[i].next = NULL;
502       }
503    } else {
504       batch_execute(batch, pipe, last, false);
505    }
506 
507    /* Add the fence to the list of fences for the driver to signal at the next
508     * flush, which we use for tracking which buffers are referenced by
509     * an unflushed command buffer.
510     */
511    struct threaded_context *tc = batch->tc;
512    struct util_queue_fence *fence =
513       &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
514 
515    if (tc->options.driver_calls_flush_notify) {
516       tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
517 
518       /* Since our buffer lists are chained as a ring, we need to flush
519        * the context twice as we go around the ring to make the driver signal
520        * the buffer list fences, so that the producer thread can reuse the buffer
521        * list structures for the next batches without waiting.
522        */
523       unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
524       if (batch->buffer_list_index % half_ring == half_ring - 1)
525          pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
526    } else {
527       util_queue_fence_signal(fence);
528    }
529 
530    tc_clear_driver_thread(batch->tc);
531    tc_batch_check(batch);
532    batch->num_total_slots = 0;
533    batch->last_mergeable_call = NULL;
534    batch->first_set_fb = false;
535    batch->max_renderpass_info_idx = 0;
536    batch->tc->last_completed = batch->batch_idx;
537 }
538 
539 static void
tc_begin_next_buffer_list(struct threaded_context * tc)540 tc_begin_next_buffer_list(struct threaded_context *tc)
541 {
542    tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
543 
544    tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
545 
546    /* Clear the buffer list in the new empty batch. */
547    struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
548    assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
549    util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
550    BITSET_ZERO(buf_list->buffer_list);
551 
552    tc->add_all_gfx_bindings_to_buffer_list = true;
553    tc->add_all_compute_bindings_to_buffer_list = true;
554 }
555 
556 static void
tc_add_call_end(struct tc_batch * next)557 tc_add_call_end(struct tc_batch *next)
558 {
559    /* Add a dummy last call that won't be executed, but will indicate the end
560     * of the batch. It's for calls that always look at the next call and this
561     * stops them looking farther ahead.
562     */
563    assert(next->num_total_slots < TC_SLOTS_PER_BATCH);
564    struct tc_call_base *call =
565       (struct tc_call_base*)&next->slots[next->num_total_slots];
566    call->call_id = TC_NUM_CALLS;
567    call->num_slots = 1;
568 }
569 
570 static void
tc_batch_flush(struct threaded_context * tc,bool full_copy)571 tc_batch_flush(struct threaded_context *tc, bool full_copy)
572 {
573    struct tc_batch *next = &tc->batch_slots[tc->next];
574    unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES;
575 
576    tc_assert(next->num_total_slots != 0);
577    tc_add_call_end(next);
578 
579    tc_batch_check(next);
580    tc_debug_check(tc);
581    tc->bytes_mapped_estimate = 0;
582    tc->bytes_replaced_estimate = 0;
583    p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
584 
585    if (next->token) {
586       next->token->tc = NULL;
587       tc_unflushed_batch_token_reference(&next->token, NULL);
588    }
589    /* reset renderpass info index for subsequent use */
590    next->renderpass_info_idx = -1;
591 
592    /* always increment renderpass info on batch flush;
593     * renderpass info can only be accessed by its owner batch during execution
594     */
595    if (tc->renderpass_info_recording) {
596       tc->batch_slots[next_id].first_set_fb = full_copy;
597       tc_batch_increment_renderpass_info(tc, next_id, full_copy);
598    }
599 
600    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
601                       NULL, 0);
602    tc->last = tc->next;
603    tc->next = next_id;
604    if (next_id == 0)
605       tc->batch_generation++;
606    tc_begin_next_buffer_list(tc);
607 
608 }
609 
610 /* This is the function that adds variable-sized calls into the current
611  * batch. It also flushes the batch if there is not enough space there.
612  * All other higher-level "add" functions use it.
613  */
614 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)615 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
616                   unsigned num_slots)
617 {
618    TC_TRACE_SCOPE(id);
619    struct tc_batch *next = &tc->batch_slots[tc->next];
620    assert(num_slots <= TC_SLOTS_PER_BATCH - 1);
621    tc_debug_check(tc);
622 
623    if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH - 1)) {
624       /* copy existing renderpass info during flush */
625       tc_batch_flush(tc, true);
626       next = &tc->batch_slots[tc->next];
627       tc_assert(next->num_total_slots == 0);
628       tc_assert(next->last_mergeable_call == NULL);
629    }
630 
631    tc_assert(util_queue_fence_is_signalled(&next->fence));
632 
633    struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
634    next->num_total_slots += num_slots;
635 
636 #if !defined(NDEBUG) && TC_DEBUG >= 1
637    call->sentinel = TC_SENTINEL;
638 #endif
639    call->call_id = id;
640    call->num_slots = num_slots;
641 
642 #if TC_DEBUG >= 3
643    tc_printf("ENQUEUE: %s", tc_call_names[id]);
644 #endif
645 
646    tc_debug_check(tc);
647    return call;
648 }
649 
650 #define tc_add_call(tc, execute, type) \
651    ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
652 
653 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
654    ((struct type*)tc_add_sized_call(tc, execute, \
655                                     call_size_with_slots(type, num_slots)))
656 
657 /* Returns the last mergeable call that was added to the unflushed
658  * batch, or NULL if the address of that call is not currently known
659  * or no such call exists in the unflushed batch.
660  */
661 static struct tc_call_base *
tc_get_last_mergeable_call(struct threaded_context * tc)662 tc_get_last_mergeable_call(struct threaded_context *tc)
663 {
664    struct tc_batch *batch = &tc->batch_slots[tc->next];
665    struct tc_call_base *call = batch->last_mergeable_call;
666 
667    tc_assert(call == NULL || call->num_slots <= batch->num_total_slots);
668 
669    if (call && (uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots])
670       return call;
671    else
672       return NULL;
673 }
674 
675 /* Increases the size of the last call in the unflushed batch to the
676  * given number of slots, if possible, without changing the call's data.
677  */
678 static bool
tc_enlarge_last_mergeable_call(struct threaded_context * tc,unsigned desired_num_slots)679 tc_enlarge_last_mergeable_call(struct threaded_context *tc, unsigned desired_num_slots)
680 {
681    struct tc_batch *batch = &tc->batch_slots[tc->next];
682    struct tc_call_base *call = tc_get_last_mergeable_call(tc);
683 
684    tc_assert(call);
685    tc_assert(desired_num_slots >= call->num_slots);
686 
687    unsigned added_slots = desired_num_slots - call->num_slots;
688 
689    if (unlikely(batch->num_total_slots + added_slots > TC_SLOTS_PER_BATCH - 1))
690       return false;
691 
692    batch->num_total_slots += added_slots;
693    call->num_slots += added_slots;
694 
695    return true;
696 }
697 
698 static void
tc_mark_call_mergeable(struct threaded_context * tc,struct tc_call_base * call)699 tc_mark_call_mergeable(struct threaded_context *tc, struct tc_call_base *call)
700 {
701    struct tc_batch *batch = &tc->batch_slots[tc->next];
702    tc_assert(call->num_slots <= batch->num_total_slots);
703    tc_assert((uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots]);
704    batch->last_mergeable_call = call;
705 }
706 
707 static bool
tc_is_sync(struct threaded_context * tc)708 tc_is_sync(struct threaded_context *tc)
709 {
710    struct tc_batch *last = &tc->batch_slots[tc->last];
711    struct tc_batch *next = &tc->batch_slots[tc->next];
712 
713    return util_queue_fence_is_signalled(&last->fence) &&
714           !next->num_total_slots;
715 }
716 
717 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)718 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
719 {
720    struct tc_batch *last = &tc->batch_slots[tc->last];
721    struct tc_batch *next = &tc->batch_slots[tc->next];
722    bool synced = false;
723 
724    MESA_TRACE_SCOPE(func);
725 
726    tc_debug_check(tc);
727 
728    if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) {
729       /* corner case: if tc syncs for any reason but a driver flush during a renderpass,
730        * then the current renderpass info MUST be signaled to avoid deadlocking the driver
731        *
732        * this is not a "complete" signal operation, however, as it's unknown what calls may
733        * come after this one, which means that framebuffer attachment data is unreliable
734        *
735        * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination),
736        * force all attachments active and assume the app was going to get bad perf here anyway
737        */
738       tc_sanitize_renderpass_info(tc);
739    }
740    tc_signal_renderpass_info_ready(tc);
741 
742    /* Only wait for queued calls... */
743    if (!util_queue_fence_is_signalled(&last->fence)) {
744       util_queue_fence_wait(&last->fence);
745       synced = true;
746    }
747 
748    tc_debug_check(tc);
749 
750    if (next->token) {
751       next->token->tc = NULL;
752       tc_unflushed_batch_token_reference(&next->token, NULL);
753    }
754 
755    /* .. and execute unflushed calls directly. */
756    if (next->num_total_slots) {
757       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
758       tc->bytes_mapped_estimate = 0;
759       tc->bytes_replaced_estimate = 0;
760       tc_add_call_end(next);
761       tc_batch_execute(next, NULL, 0);
762       tc_begin_next_buffer_list(tc);
763       synced = true;
764    }
765 
766    if (synced) {
767       p_atomic_inc(&tc->num_syncs);
768 
769       if (tc_strcmp(func, "tc_destroy") != 0) {
770          tc_printf("sync %s %s", func, info);
771       }
772    }
773 
774    tc_debug_check(tc);
775 
776    if (tc->options.parse_renderpass_info) {
777       int renderpass_info_idx = next->renderpass_info_idx;
778       if (renderpass_info_idx > 0) {
779          /* don't reset if fb state is unflushed */
780          bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw;
781          uint32_t fb_info = tc->renderpass_info_recording->data32[0];
782          next->renderpass_info_idx = -1;
783          tc_batch_increment_renderpass_info(tc, tc->next, false);
784          if (fb_no_draw)
785             tc->renderpass_info_recording->data32[0] = fb_info;
786       } else if (tc->renderpass_info_recording->has_draw) {
787          tc->renderpass_info_recording->data32[0] = 0;
788       }
789       tc->seen_fb_state = false;
790       tc->query_ended = false;
791    }
792 }
793 
794 #define tc_sync(tc) _tc_sync(tc, "", __func__)
795 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
796 
797 /**
798  * Call this from fence_finish for same-context fence waits of deferred fences
799  * that haven't been flushed yet.
800  *
801  * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
802  * i.e., the wrapped one.
803  */
804 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)805 threaded_context_flush(struct pipe_context *_pipe,
806                        struct tc_unflushed_batch_token *token,
807                        bool prefer_async)
808 {
809    struct threaded_context *tc = threaded_context(_pipe);
810 
811    /* This is called from the gallium frontend / application thread. */
812    if (token->tc && token->tc == tc) {
813       struct tc_batch *last = &tc->batch_slots[tc->last];
814 
815       /* Prefer to do the flush in the driver thread if it is already
816        * running. That should be better for cache locality.
817        */
818       if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
819          tc_batch_flush(tc, false);
820       else
821          tc_sync(token->tc);
822    }
823 }
824 
825 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)826 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
827 {
828    uint32_t id = threaded_resource(buf)->buffer_id_unique;
829    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
830 }
831 
832 /* Reset a range of buffer binding slots. */
833 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)834 tc_unbind_buffers(uint32_t *binding, unsigned count)
835 {
836    if (count)
837       memset(binding, 0, sizeof(*binding) * count);
838 }
839 
840 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)841 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
842                                unsigned count)
843 {
844    for (unsigned i = 0; i < count; i++) {
845       if (bindings[i])
846          BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
847    }
848 }
849 
850 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)851 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
852                    unsigned count)
853 {
854    unsigned rebind_count = 0;
855 
856    for (unsigned i = 0; i < count; i++) {
857       if (bindings[i] == old_id) {
858          bindings[i] = new_id;
859          rebind_count++;
860       }
861    }
862    return rebind_count;
863 }
864 
865 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)866 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
867                                       BITSET_WORD *buffer_list,
868                                       enum pipe_shader_type shader)
869 {
870    tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
871                                   tc->max_const_buffers);
872    if (tc->seen_shader_buffers[shader]) {
873       tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
874                                      tc->max_shader_buffers);
875    }
876    if (tc->seen_image_buffers[shader]) {
877       tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
878                                      tc->max_images);
879    }
880    if (tc->seen_sampler_buffers[shader]) {
881       tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
882                                      tc->max_samplers);
883    }
884 }
885 
886 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)887 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
888                           uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
889 {
890    unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
891 
892    ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
893                             tc->max_const_buffers);
894    if (ubo)
895       *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
896    if (tc->seen_shader_buffers[shader]) {
897       ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
898                                 tc->max_shader_buffers);
899       if (ssbo)
900          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
901    }
902    if (tc->seen_image_buffers[shader]) {
903       img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
904                                tc->max_images);
905       if (img)
906          *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
907    }
908    if (tc->seen_sampler_buffers[shader]) {
909       sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
910                                    tc->max_samplers);
911       if (sampler)
912          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
913    }
914    return ubo + ssbo + img + sampler;
915 }
916 
917 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
918  * This is called by the first draw call in a batch when we want to inherit
919  * all bindings set by the previous batch.
920  */
921 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)922 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
923 {
924    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
925 
926    tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->num_vertex_buffers);
927    if (tc->seen_streamout_buffers)
928       tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
929 
930    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
931    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
932 
933    if (tc->seen_tcs)
934       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
935    if (tc->seen_tes)
936       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
937    if (tc->seen_gs)
938       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
939 
940    tc->add_all_gfx_bindings_to_buffer_list = false;
941 }
942 
943 /* Add all bound buffers used by compute to the buffer list.
944  * This is called by the first compute call in a batch when we want to inherit
945  * all bindings set by the previous batch.
946  */
947 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)948 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
949 {
950    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
951 
952    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
953    tc->add_all_compute_bindings_to_buffer_list = false;
954 }
955 
956 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)957 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
958 {
959    unsigned vbo = 0, so = 0;
960 
961    vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
962                             tc->num_vertex_buffers);
963    if (vbo)
964       *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
965 
966    if (tc->seen_streamout_buffers) {
967       so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
968                               PIPE_MAX_SO_BUFFERS);
969       if (so)
970          *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
971    }
972    unsigned rebound = vbo + so;
973 
974    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
975    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
976 
977    if (tc->seen_tcs)
978       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
979    if (tc->seen_tes)
980       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
981    if (tc->seen_gs)
982       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
983 
984    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
985 
986    if (rebound)
987       BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
988    return rebound;
989 }
990 
991 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)992 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
993 {
994    while (binding_mask) {
995       if (bindings[u_bit_scan(&binding_mask)] == id)
996          return true;
997    }
998    return false;
999 }
1000 
1001 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)1002 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
1003                                     enum pipe_shader_type shader)
1004 {
1005    if (tc->seen_shader_buffers[shader] &&
1006        tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
1007                                     tc->shader_buffers_writeable_mask[shader]))
1008       return true;
1009 
1010    if (tc->seen_image_buffers[shader] &&
1011        tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
1012                                     tc->image_buffers_writeable_mask[shader]))
1013       return true;
1014 
1015    return false;
1016 }
1017 
1018 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)1019 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
1020 {
1021    if (tc->seen_streamout_buffers &&
1022        tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
1023                                     BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
1024       return true;
1025 
1026    if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
1027        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
1028        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
1029       return true;
1030 
1031    if (tc->seen_tcs &&
1032        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
1033       return true;
1034 
1035    if (tc->seen_tes &&
1036        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
1037       return true;
1038 
1039    if (tc->seen_gs &&
1040        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
1041       return true;
1042 
1043    return false;
1044 }
1045 
1046 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)1047 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
1048                   unsigned map_usage)
1049 {
1050    if (!tc->options.is_resource_busy)
1051       return true;
1052 
1053    uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
1054 
1055    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
1056       struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
1057 
1058       /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
1059        * then the buffer is considered busy. */
1060       if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
1061           BITSET_TEST(buf_list->buffer_list, id_hash))
1062          return true;
1063    }
1064 
1065    /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
1066     * this buffer is busy or not. */
1067    return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
1068 }
1069 
1070 /**
1071  * allow_cpu_storage should be false for user memory and imported buffers.
1072  */
1073 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)1074 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
1075 {
1076    struct threaded_resource *tres = threaded_resource(res);
1077 
1078    tres->latest = &tres->b;
1079    tres->cpu_storage = NULL;
1080    util_range_init(&tres->valid_buffer_range);
1081    tres->is_shared = false;
1082    tres->is_user_ptr = false;
1083    tres->buffer_id_unique = 0;
1084    tres->pending_staging_uploads = 0;
1085    tres->last_batch_usage = -1;
1086    util_range_init(&tres->pending_staging_uploads_range);
1087 
1088    if (allow_cpu_storage &&
1089        !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
1090                        PIPE_RESOURCE_FLAG_SPARSE |
1091                        PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
1092        /* We need buffer invalidation and buffer busyness tracking for the CPU
1093         * storage, which aren't supported with pipe_vertex_state. */
1094        !(res->bind & PIPE_BIND_VERTEX_STATE))
1095       tres->allow_cpu_storage = true;
1096    else
1097       tres->allow_cpu_storage = false;
1098 }
1099 
1100 void
threaded_resource_deinit(struct pipe_resource * res)1101 threaded_resource_deinit(struct pipe_resource *res)
1102 {
1103    struct threaded_resource *tres = threaded_resource(res);
1104 
1105    if (tres->latest != &tres->b)
1106            pipe_resource_reference(&tres->latest, NULL);
1107    util_range_destroy(&tres->valid_buffer_range);
1108    util_range_destroy(&tres->pending_staging_uploads_range);
1109    align_free(tres->cpu_storage);
1110 }
1111 
1112 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)1113 threaded_context_unwrap_sync(struct pipe_context *pipe)
1114 {
1115    if (!pipe || !pipe->priv)
1116       return pipe;
1117 
1118    tc_sync(threaded_context(pipe));
1119    return (struct pipe_context*)pipe->priv;
1120 }
1121 
1122 
1123 /********************************************************************
1124  * simple functions
1125  */
1126 
1127 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
1128    struct tc_call_##func { \
1129       struct tc_call_base base; \
1130       type state; \
1131    }; \
1132    \
1133    static uint16_t \
1134    tc_call_##func(struct pipe_context *pipe, void *call) \
1135    { \
1136       pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
1137       return call_size(tc_call_##func); \
1138    } \
1139    \
1140    static void \
1141    tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
1142    { \
1143       struct threaded_context *tc = threaded_context(_pipe); \
1144       struct tc_call_##func *p = (struct tc_call_##func*) \
1145                      tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
1146       p->state = deref(param); \
1147       __VA_ARGS__; \
1148    }
1149 
1150 TC_FUNC1(set_active_query_state, , bool, , )
1151 
1152 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
1153 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
1154 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
1155 TC_FUNC1(set_sample_mask, , unsigned, , )
1156 TC_FUNC1(set_min_samples, , unsigned, , )
1157 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
1158 
1159 TC_FUNC1(texture_barrier, , unsigned, , )
1160 TC_FUNC1(memory_barrier, , unsigned, , )
1161 TC_FUNC1(delete_texture_handle, , uint64_t, , )
1162 TC_FUNC1(delete_image_handle, , uint64_t, , )
1163 TC_FUNC1(set_frontend_noop, , bool, , )
1164 
1165 
1166 /********************************************************************
1167  * queries
1168  */
1169 
1170 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)1171 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
1172                 unsigned index)
1173 {
1174    struct threaded_context *tc = threaded_context(_pipe);
1175    struct pipe_context *pipe = tc->pipe;
1176 
1177    return pipe->create_query(pipe, query_type, index);
1178 }
1179 
1180 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)1181 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
1182                       unsigned *query_types)
1183 {
1184    struct threaded_context *tc = threaded_context(_pipe);
1185    struct pipe_context *pipe = tc->pipe;
1186 
1187    return pipe->create_batch_query(pipe, num_queries, query_types);
1188 }
1189 
1190 struct tc_query_call {
1191    struct tc_call_base base;
1192    struct pipe_query *query;
1193 };
1194 
1195 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call)1196 tc_call_destroy_query(struct pipe_context *pipe, void *call)
1197 {
1198    struct pipe_query *query = to_call(call, tc_query_call)->query;
1199    struct threaded_query *tq = threaded_query(query);
1200 
1201    if (list_is_linked(&tq->head_unflushed))
1202       list_del(&tq->head_unflushed);
1203 
1204    pipe->destroy_query(pipe, query);
1205    return call_size(tc_query_call);
1206 }
1207 
1208 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)1209 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
1210 {
1211    struct threaded_context *tc = threaded_context(_pipe);
1212 
1213    tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
1214 }
1215 
1216 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call)1217 tc_call_begin_query(struct pipe_context *pipe, void *call)
1218 {
1219    pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
1220    return call_size(tc_query_call);
1221 }
1222 
1223 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)1224 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
1225 {
1226    struct threaded_context *tc = threaded_context(_pipe);
1227    tc->num_queries_active++;
1228 
1229    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
1230    return true; /* we don't care about the return value for this call */
1231 }
1232 
1233 struct tc_end_query_call {
1234    struct tc_call_base base;
1235    struct threaded_context *tc;
1236    struct pipe_query *query;
1237 };
1238 
1239 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call)1240 tc_call_end_query(struct pipe_context *pipe, void *call)
1241 {
1242    struct tc_end_query_call *p = to_call(call, tc_end_query_call);
1243    struct threaded_query *tq = threaded_query(p->query);
1244 
1245    if (!list_is_linked(&tq->head_unflushed))
1246       list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
1247 
1248    pipe->end_query(pipe, p->query);
1249    return call_size(tc_end_query_call);
1250 }
1251 
1252 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)1253 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
1254 {
1255    struct threaded_context *tc = threaded_context(_pipe);
1256    struct threaded_query *tq = threaded_query(query);
1257    struct tc_end_query_call *call =
1258       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
1259    tc->num_queries_active--;
1260 
1261    call->tc = tc;
1262    call->query = query;
1263 
1264    tq->flushed = false;
1265    tc->query_ended = true;
1266 
1267    return true; /* we don't care about the return value for this call */
1268 }
1269 
1270 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)1271 tc_get_query_result(struct pipe_context *_pipe,
1272                     struct pipe_query *query, bool wait,
1273                     union pipe_query_result *result)
1274 {
1275    struct threaded_context *tc = threaded_context(_pipe);
1276    struct threaded_query *tq = threaded_query(query);
1277    struct pipe_context *pipe = tc->pipe;
1278    bool flushed = tq->flushed;
1279 
1280    if (!flushed) {
1281       tc_sync_msg(tc, wait ? "wait" : "nowait");
1282       tc_set_driver_thread(tc);
1283    }
1284 
1285    bool success = pipe->get_query_result(pipe, query, wait, result);
1286 
1287    if (!flushed)
1288       tc_clear_driver_thread(tc);
1289 
1290    if (success) {
1291       tq->flushed = true;
1292       if (list_is_linked(&tq->head_unflushed)) {
1293          /* This is safe because it can only happen after we sync'd. */
1294          list_del(&tq->head_unflushed);
1295       }
1296    }
1297    return success;
1298 }
1299 
1300 struct tc_query_result_resource {
1301    struct tc_call_base base;
1302    enum pipe_query_flags flags:8;
1303    enum pipe_query_value_type result_type:8;
1304    int8_t index; /* it can be -1 */
1305    unsigned offset;
1306    struct pipe_query *query;
1307    struct pipe_resource *resource;
1308 };
1309 
1310 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call)1311 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call)
1312 {
1313    struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
1314 
1315    pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
1316                                    p->index, p->resource, p->offset);
1317    tc_drop_resource_reference(p->resource);
1318    return call_size(tc_query_result_resource);
1319 }
1320 
1321 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)1322 tc_get_query_result_resource(struct pipe_context *_pipe,
1323                              struct pipe_query *query,
1324                              enum pipe_query_flags flags,
1325                              enum pipe_query_value_type result_type, int index,
1326                              struct pipe_resource *resource, unsigned offset)
1327 {
1328    struct threaded_context *tc = threaded_context(_pipe);
1329 
1330    tc_buffer_disable_cpu_storage(resource);
1331 
1332    struct tc_query_result_resource *p =
1333       tc_add_call(tc, TC_CALL_get_query_result_resource,
1334                   tc_query_result_resource);
1335    p->query = query;
1336    p->flags = flags;
1337    p->result_type = result_type;
1338    p->index = index;
1339    tc_set_resource_reference(&p->resource, resource);
1340    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
1341    p->offset = offset;
1342 }
1343 
1344 struct tc_render_condition {
1345    struct tc_call_base base;
1346    bool condition;
1347    unsigned mode;
1348    struct pipe_query *query;
1349 };
1350 
1351 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call)1352 tc_call_render_condition(struct pipe_context *pipe, void *call)
1353 {
1354    struct tc_render_condition *p = to_call(call, tc_render_condition);
1355    pipe->render_condition(pipe, p->query, p->condition, p->mode);
1356    return call_size(tc_render_condition);
1357 }
1358 
1359 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)1360 tc_render_condition(struct pipe_context *_pipe,
1361                     struct pipe_query *query, bool condition,
1362                     enum pipe_render_cond_flag mode)
1363 {
1364    struct threaded_context *tc = threaded_context(_pipe);
1365    struct tc_render_condition *p =
1366       tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
1367 
1368    p->query = query;
1369    p->condition = condition;
1370    p->mode = mode;
1371 }
1372 
1373 
1374 /********************************************************************
1375  * constant (immutable) states
1376  */
1377 
1378 #define TC_CSO_CREATE(name, sname) \
1379    static void * \
1380    tc_create_##name##_state(struct pipe_context *_pipe, \
1381                             const struct pipe_##sname##_state *state) \
1382    { \
1383       struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1384       return pipe->create_##name##_state(pipe, state); \
1385    }
1386 
1387 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1388 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1389 
1390 #define TC_CSO(name, sname, ...) \
1391    TC_CSO_CREATE(name, sname) \
1392    TC_CSO_BIND(name, ##__VA_ARGS__) \
1393    TC_CSO_DELETE(name)
1394 
1395 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1396 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1397 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1398 
1399 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1400 TC_CSO_WHOLE(rasterizer)
1401 TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
1402 TC_CSO_BIND(depth_stencil_alpha,
1403    if (param && tc->options.parse_renderpass_info) {
1404       /* dsa info is only ever added during a renderpass;
1405        * changes outside of a renderpass reset the data
1406        */
1407       if (!tc->in_renderpass) {
1408          tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
1409          tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
1410       }
1411       /* let the driver parse its own state */
1412       tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
1413    }
1414 )
1415 TC_CSO_DELETE(depth_stencil_alpha)
1416 TC_CSO_WHOLE(compute)
1417 TC_CSO_CREATE(fs, shader)
1418 TC_CSO_BIND(fs,
1419    if (param && tc->options.parse_renderpass_info) {
1420       /* fs info is only ever added during a renderpass;
1421        * changes outside of a renderpass reset the data
1422        */
1423       if (!tc->in_renderpass) {
1424          tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
1425          tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
1426       }
1427       /* let the driver parse its own state */
1428       tc->options.fs_parse(param, tc_get_renderpass_info(tc));
1429    }
1430 )
1431 TC_CSO_DELETE(fs)
1432 TC_CSO_SHADER(vs)
1433 TC_CSO_SHADER_TRACK(gs)
1434 TC_CSO_SHADER_TRACK(tcs)
1435 TC_CSO_SHADER_TRACK(tes)
1436 TC_CSO_CREATE(sampler, sampler)
1437 TC_CSO_DELETE(sampler)
1438 TC_CSO_BIND(vertex_elements)
1439 TC_CSO_DELETE(vertex_elements)
1440 
1441 static void *
1442 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1443                                 const struct pipe_vertex_element *elems)
1444 {
1445    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1446 
1447    return pipe->create_vertex_elements_state(pipe, count, elems);
1448 }
1449 
1450 struct tc_sampler_states {
1451    struct tc_call_base base;
1452    uint8_t shader, start, count;
1453    void *slot[0]; /* more will be allocated if needed */
1454 };
1455 
1456 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call)1457 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call)
1458 {
1459    struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1460 
1461    pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1462    return p->base.num_slots;
1463 }
1464 
1465 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1466 tc_bind_sampler_states(struct pipe_context *_pipe,
1467                        enum pipe_shader_type shader,
1468                        unsigned start, unsigned count, void **states)
1469 {
1470    if (!count)
1471       return;
1472 
1473    struct threaded_context *tc = threaded_context(_pipe);
1474    struct tc_sampler_states *p =
1475       tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1476 
1477    p->shader = shader;
1478    p->start = start;
1479    p->count = count;
1480    memcpy(p->slot, states, count * sizeof(states[0]));
1481 }
1482 
1483 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1484 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1485 {
1486    struct threaded_context *tc = threaded_context(_pipe);
1487    tc->pipe->link_shader(tc->pipe, shaders);
1488 }
1489 /********************************************************************
1490  * immediate states
1491  */
1492 
1493 struct tc_framebuffer {
1494    struct tc_call_base base;
1495    struct pipe_framebuffer_state state;
1496 };
1497 
1498 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call)1499 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call)
1500 {
1501    struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1502 
1503    pipe->set_framebuffer_state(pipe, p);
1504 
1505    unsigned nr_cbufs = p->nr_cbufs;
1506    for (unsigned i = 0; i < nr_cbufs; i++)
1507       tc_drop_surface_reference(p->cbufs[i]);
1508    tc_drop_surface_reference(p->zsbuf);
1509    tc_drop_resource_reference(p->resolve);
1510    return call_size(tc_framebuffer);
1511 }
1512 
1513 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1514 tc_set_framebuffer_state(struct pipe_context *_pipe,
1515                          const struct pipe_framebuffer_state *fb)
1516 {
1517    struct threaded_context *tc = threaded_context(_pipe);
1518    struct tc_framebuffer *p =
1519       tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1520    unsigned nr_cbufs = fb->nr_cbufs;
1521 
1522    p->state.width = fb->width;
1523    p->state.height = fb->height;
1524    p->state.samples = fb->samples;
1525    p->state.layers = fb->layers;
1526    p->state.nr_cbufs = nr_cbufs;
1527    p->state.viewmask = fb->viewmask;
1528 
1529    /* when unbinding, mark attachments as used for the current batch */
1530    for (unsigned i = 0; i < tc->nr_cbufs; i++) {
1531       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], false);
1532       pipe_resource_reference(&tc->fb_resources[i], NULL);
1533    }
1534    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], false);
1535    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, false);
1536 
1537    for (unsigned i = 0; i < nr_cbufs; i++) {
1538       p->state.cbufs[i] = NULL;
1539       pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1540       /* full tracking requires storing the fb attachment resources */
1541       if (fb->cbufs[i])
1542          pipe_resource_reference(&tc->fb_resources[i], fb->cbufs[i]->texture);
1543       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], true);
1544    }
1545    tc->nr_cbufs = nr_cbufs;
1546    if (tc->options.parse_renderpass_info) {
1547       /* ensure this is treated as the first fb set if no fb activity has occurred */
1548       if (!tc->renderpass_info_recording->has_draw &&
1549           !tc->renderpass_info_recording->cbuf_clear &&
1550           !tc->renderpass_info_recording->cbuf_load &&
1551           !tc->renderpass_info_recording->zsbuf_load &&
1552           !tc->renderpass_info_recording->zsbuf_clear_partial)
1553          tc->batch_slots[tc->next].first_set_fb = false;
1554       /* store existing zsbuf data for possible persistence */
1555       uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
1556                       0 :
1557                       tc->renderpass_info_recording->data8[3];
1558       bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
1559                            (fb->zsbuf ? fb->zsbuf->texture : NULL);
1560 
1561       if (tc->seen_fb_state) {
1562          /* this is the end of a renderpass, so increment the renderpass info */
1563          tc_batch_increment_renderpass_info(tc, tc->next, false);
1564          /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
1565           * keep zsbuf usage data
1566           */
1567          if (!zsbuf_changed)
1568             tc->renderpass_info_recording->data8[3] = zsbuf;
1569       } else {
1570          /* this is the first time a set_framebuffer_call is triggered;
1571           * just increment the index and keep using the existing info for recording
1572           */
1573          tc->batch_slots[tc->next].renderpass_info_idx = 0;
1574       }
1575       /* future fb state changes will increment the index */
1576       tc->seen_fb_state = true;
1577    }
1578    pipe_resource_reference(&tc->fb_resources[PIPE_MAX_COLOR_BUFS],
1579                            fb->zsbuf ? fb->zsbuf->texture : NULL);
1580    pipe_resource_reference(&tc->fb_resolve, fb->resolve);
1581    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], true);
1582    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, true);
1583    tc->in_renderpass = false;
1584    p->state.zsbuf = NULL;
1585    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1586    p->state.resolve = NULL;
1587    pipe_resource_reference(&p->state.resolve, fb->resolve);
1588 }
1589 
1590 struct tc_tess_state {
1591    struct tc_call_base base;
1592    float state[6];
1593 };
1594 
1595 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call)1596 tc_call_set_tess_state(struct pipe_context *pipe, void *call)
1597 {
1598    float *p = to_call(call, tc_tess_state)->state;
1599 
1600    pipe->set_tess_state(pipe, p, p + 4);
1601    return call_size(tc_tess_state);
1602 }
1603 
1604 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1605 tc_set_tess_state(struct pipe_context *_pipe,
1606                   const float default_outer_level[4],
1607                   const float default_inner_level[2])
1608 {
1609    struct threaded_context *tc = threaded_context(_pipe);
1610    float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1611 
1612    memcpy(p, default_outer_level, 4 * sizeof(float));
1613    memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1614 }
1615 
1616 struct tc_patch_vertices {
1617    struct tc_call_base base;
1618    uint8_t patch_vertices;
1619 };
1620 
1621 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call)1622 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call)
1623 {
1624    uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1625 
1626    pipe->set_patch_vertices(pipe, patch_vertices);
1627    return call_size(tc_patch_vertices);
1628 }
1629 
1630 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1631 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1632 {
1633    struct threaded_context *tc = threaded_context(_pipe);
1634 
1635    tc_add_call(tc, TC_CALL_set_patch_vertices,
1636                tc_patch_vertices)->patch_vertices = patch_vertices;
1637 }
1638 
1639 struct tc_constant_buffer_base {
1640    struct tc_call_base base;
1641    uint8_t shader, index;
1642    bool is_null;
1643 };
1644 
1645 struct tc_constant_buffer {
1646    struct tc_constant_buffer_base base;
1647    struct pipe_constant_buffer cb;
1648 };
1649 
1650 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call)1651 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call)
1652 {
1653    struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1654 
1655    if (unlikely(p->base.is_null)) {
1656       pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1657       return call_size(tc_constant_buffer_base);
1658    }
1659 
1660    pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1661    return call_size(tc_constant_buffer);
1662 }
1663 
1664 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1665 tc_set_constant_buffer(struct pipe_context *_pipe,
1666                        enum pipe_shader_type shader, uint index,
1667                        bool take_ownership,
1668                        const struct pipe_constant_buffer *cb)
1669 {
1670    struct threaded_context *tc = threaded_context(_pipe);
1671 
1672    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1673       struct tc_constant_buffer_base *p =
1674          tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1675       p->shader = shader;
1676       p->index = index;
1677       p->is_null = true;
1678       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1679       return;
1680    }
1681 
1682    struct pipe_resource *buffer;
1683    unsigned offset;
1684 
1685    if (cb->user_buffer) {
1686       /* This must be done before adding set_constant_buffer, because it could
1687        * generate e.g. transfer_unmap and flush partially-uninitialized
1688        * set_constant_buffer to the driver if it was done afterwards.
1689        */
1690       buffer = NULL;
1691       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1692                     tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1693       u_upload_unmap(tc->base.const_uploader);
1694       take_ownership = true;
1695    } else {
1696       buffer = cb->buffer;
1697       offset = cb->buffer_offset;
1698    }
1699 
1700    struct tc_constant_buffer *p =
1701       tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1702    p->base.shader = shader;
1703    p->base.index = index;
1704    p->base.is_null = false;
1705    p->cb.user_buffer = NULL;
1706    p->cb.buffer_offset = offset;
1707    p->cb.buffer_size = cb->buffer_size;
1708 
1709    if (take_ownership)
1710       p->cb.buffer = buffer;
1711    else
1712       tc_set_resource_reference(&p->cb.buffer, buffer);
1713 
1714    if (buffer) {
1715       tc_bind_buffer(&tc->const_buffers[shader][index],
1716                      &tc->buffer_lists[tc->next_buf_list], buffer);
1717    } else {
1718       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1719    }
1720 }
1721 
1722 struct tc_inlinable_constants {
1723    struct tc_call_base base;
1724    uint8_t shader;
1725    uint8_t num_values;
1726    uint32_t values[MAX_INLINABLE_UNIFORMS];
1727 };
1728 
1729 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call)1730 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call)
1731 {
1732    struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1733 
1734    pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1735    return call_size(tc_inlinable_constants);
1736 }
1737 
1738 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1739 tc_set_inlinable_constants(struct pipe_context *_pipe,
1740                            enum pipe_shader_type shader,
1741                            uint num_values, uint32_t *values)
1742 {
1743    struct threaded_context *tc = threaded_context(_pipe);
1744    struct tc_inlinable_constants *p =
1745       tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1746    p->shader = shader;
1747    p->num_values = num_values;
1748    memcpy(p->values, values, num_values * 4);
1749 }
1750 
1751 struct tc_sample_locations {
1752    struct tc_call_base base;
1753    uint16_t size;
1754    uint8_t slot[0];
1755 };
1756 
1757 
1758 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call)1759 tc_call_set_sample_locations(struct pipe_context *pipe, void *call)
1760 {
1761    struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1762 
1763    pipe->set_sample_locations(pipe, p->size, p->slot);
1764    return p->base.num_slots;
1765 }
1766 
1767 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1768 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1769 {
1770    struct threaded_context *tc = threaded_context(_pipe);
1771    struct tc_sample_locations *p =
1772       tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1773                              tc_sample_locations, size);
1774 
1775    p->size = size;
1776    memcpy(p->slot, locations, size);
1777 }
1778 
1779 struct tc_scissors {
1780    struct tc_call_base base;
1781    uint8_t start, count;
1782    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1783 };
1784 
1785 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call)1786 tc_call_set_scissor_states(struct pipe_context *pipe, void *call)
1787 {
1788    struct tc_scissors *p = (struct tc_scissors *)call;
1789 
1790    pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1791    return p->base.num_slots;
1792 }
1793 
1794 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1795 tc_set_scissor_states(struct pipe_context *_pipe,
1796                       unsigned start, unsigned count,
1797                       const struct pipe_scissor_state *states)
1798 {
1799    struct threaded_context *tc = threaded_context(_pipe);
1800    struct tc_scissors *p =
1801       tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1802 
1803    p->start = start;
1804    p->count = count;
1805    memcpy(&p->slot, states, count * sizeof(states[0]));
1806 }
1807 
1808 struct tc_viewports {
1809    struct tc_call_base base;
1810    uint8_t start, count;
1811    struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1812 };
1813 
1814 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call)1815 tc_call_set_viewport_states(struct pipe_context *pipe, void *call)
1816 {
1817    struct tc_viewports *p = (struct tc_viewports *)call;
1818 
1819    pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1820    return p->base.num_slots;
1821 }
1822 
1823 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1824 tc_set_viewport_states(struct pipe_context *_pipe,
1825                        unsigned start, unsigned count,
1826                        const struct pipe_viewport_state *states)
1827 {
1828    if (!count)
1829       return;
1830 
1831    struct threaded_context *tc = threaded_context(_pipe);
1832    struct tc_viewports *p =
1833       tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1834 
1835    p->start = start;
1836    p->count = count;
1837    memcpy(&p->slot, states, count * sizeof(states[0]));
1838 }
1839 
1840 struct tc_window_rects {
1841    struct tc_call_base base;
1842    bool include;
1843    uint8_t count;
1844    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1845 };
1846 
1847 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call)1848 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call)
1849 {
1850    struct tc_window_rects *p = (struct tc_window_rects *)call;
1851 
1852    pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1853    return p->base.num_slots;
1854 }
1855 
1856 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1857 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1858                          unsigned count,
1859                          const struct pipe_scissor_state *rects)
1860 {
1861    struct threaded_context *tc = threaded_context(_pipe);
1862    struct tc_window_rects *p =
1863       tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1864 
1865    p->include = include;
1866    p->count = count;
1867    memcpy(p->slot, rects, count * sizeof(rects[0]));
1868 }
1869 
1870 struct tc_sampler_views {
1871    struct tc_call_base base;
1872    uint8_t shader, start, count, unbind_num_trailing_slots;
1873    struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1874 };
1875 
1876 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call)1877 tc_call_set_sampler_views(struct pipe_context *pipe, void *call)
1878 {
1879    struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1880 
1881    pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1882                            p->unbind_num_trailing_slots, true, p->slot);
1883    return p->base.num_slots;
1884 }
1885 
1886 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1887 tc_set_sampler_views(struct pipe_context *_pipe,
1888                      enum pipe_shader_type shader,
1889                      unsigned start, unsigned count,
1890                      unsigned unbind_num_trailing_slots, bool take_ownership,
1891                      struct pipe_sampler_view **views)
1892 {
1893    if (!count && !unbind_num_trailing_slots)
1894       return;
1895 
1896    struct threaded_context *tc = threaded_context(_pipe);
1897    struct tc_sampler_views *p =
1898       tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1899                              views ? count : 0);
1900 
1901    p->shader = shader;
1902    p->start = start;
1903 
1904    if (views) {
1905       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1906 
1907       p->count = count;
1908       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1909 
1910       if (take_ownership) {
1911          memcpy(p->slot, views, sizeof(*views) * count);
1912 
1913          for (unsigned i = 0; i < count; i++) {
1914             if (views[i]) {
1915                if (views[i]->target == PIPE_BUFFER)
1916                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1917                                  views[i]->texture);
1918                else
1919                   tc_set_resource_batch_usage(tc, views[i]->texture);
1920             } else {
1921                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1922             }
1923          }
1924       } else {
1925          for (unsigned i = 0; i < count; i++) {
1926             p->slot[i] = NULL;
1927             pipe_sampler_view_reference(&p->slot[i], views[i]);
1928 
1929             if (views[i]) {
1930                if (views[i]->target == PIPE_BUFFER)
1931                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1932                                  views[i]->texture);
1933                else
1934                   tc_set_resource_batch_usage(tc, views[i]->texture);
1935             } else {
1936                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1937             }
1938          }
1939       }
1940 
1941       tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1942                         unbind_num_trailing_slots);
1943       tc->seen_sampler_buffers[shader] = true;
1944    } else {
1945       p->count = 0;
1946       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1947 
1948       tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1949                         count + unbind_num_trailing_slots);
1950    }
1951 }
1952 
1953 struct tc_shader_images {
1954    struct tc_call_base base;
1955    uint8_t shader, start, count;
1956    uint8_t unbind_num_trailing_slots;
1957    struct pipe_image_view slot[0]; /* more will be allocated if needed */
1958 };
1959 
1960 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call)1961 tc_call_set_shader_images(struct pipe_context *pipe, void *call)
1962 {
1963    struct tc_shader_images *p = (struct tc_shader_images *)call;
1964    unsigned count = p->count;
1965 
1966    if (!p->count) {
1967       pipe->set_shader_images(pipe, p->shader, p->start, 0,
1968                               p->unbind_num_trailing_slots, NULL);
1969       return call_size(tc_shader_images);
1970    }
1971 
1972    pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1973                            p->unbind_num_trailing_slots, p->slot);
1974 
1975    for (unsigned i = 0; i < count; i++)
1976       tc_drop_resource_reference(p->slot[i].resource);
1977 
1978    return p->base.num_slots;
1979 }
1980 
1981 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1982 tc_set_shader_images(struct pipe_context *_pipe,
1983                      enum pipe_shader_type shader,
1984                      unsigned start, unsigned count,
1985                      unsigned unbind_num_trailing_slots,
1986                      const struct pipe_image_view *images)
1987 {
1988    if (!count && !unbind_num_trailing_slots)
1989       return;
1990 
1991    struct threaded_context *tc = threaded_context(_pipe);
1992    struct tc_shader_images *p =
1993       tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1994                              images ? count : 0);
1995    unsigned writable_buffers = 0;
1996 
1997    p->shader = shader;
1998    p->start = start;
1999 
2000    if (images) {
2001       p->count = count;
2002       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
2003 
2004       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2005 
2006       for (unsigned i = 0; i < count; i++) {
2007          struct pipe_resource *resource = images[i].resource;
2008 
2009          tc_set_resource_reference(&p->slot[i].resource, resource);
2010 
2011          if (resource) {
2012             if (resource->target == PIPE_BUFFER) {
2013                tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
2014 
2015                if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
2016                   struct threaded_resource *tres = threaded_resource(resource);
2017 
2018                   tc_buffer_disable_cpu_storage(resource);
2019                   util_range_add(&tres->b, &tres->valid_buffer_range,
2020                                  images[i].u.buf.offset,
2021                                  images[i].u.buf.offset + images[i].u.buf.size);
2022                   writable_buffers |= BITFIELD_BIT(start + i);
2023                }
2024             } else {
2025                tc_set_resource_batch_usage(tc, resource);
2026             }
2027          } else {
2028             tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
2029          }
2030       }
2031       memcpy(p->slot, images, count * sizeof(images[0]));
2032 
2033       tc_unbind_buffers(&tc->image_buffers[shader][start + count],
2034                         unbind_num_trailing_slots);
2035       tc->seen_image_buffers[shader] = true;
2036    } else {
2037       p->count = 0;
2038       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
2039 
2040       tc_unbind_buffers(&tc->image_buffers[shader][start],
2041                         count + unbind_num_trailing_slots);
2042    }
2043 
2044    tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2045    tc->image_buffers_writeable_mask[shader] |= writable_buffers;
2046 }
2047 
2048 struct tc_shader_buffers {
2049    struct tc_call_base base;
2050    uint8_t shader, start, count;
2051    bool unbind;
2052    unsigned writable_bitmask;
2053    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
2054 };
2055 
2056 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call)2057 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call)
2058 {
2059    struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
2060    unsigned count = p->count;
2061 
2062    if (p->unbind) {
2063       pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
2064       return call_size(tc_shader_buffers);
2065    }
2066 
2067    pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
2068                             p->writable_bitmask);
2069 
2070    for (unsigned i = 0; i < count; i++)
2071       tc_drop_resource_reference(p->slot[i].buffer);
2072 
2073    return p->base.num_slots;
2074 }
2075 
2076 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)2077 tc_set_shader_buffers(struct pipe_context *_pipe,
2078                       enum pipe_shader_type shader,
2079                       unsigned start, unsigned count,
2080                       const struct pipe_shader_buffer *buffers,
2081                       unsigned writable_bitmask)
2082 {
2083    if (!count)
2084       return;
2085 
2086    struct threaded_context *tc = threaded_context(_pipe);
2087    struct tc_shader_buffers *p =
2088       tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
2089                              buffers ? count : 0);
2090 
2091    p->shader = shader;
2092    p->start = start;
2093    p->count = count;
2094    p->unbind = buffers == NULL;
2095    p->writable_bitmask = writable_bitmask;
2096 
2097    if (buffers) {
2098       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2099 
2100       for (unsigned i = 0; i < count; i++) {
2101          struct pipe_shader_buffer *dst = &p->slot[i];
2102          const struct pipe_shader_buffer *src = buffers + i;
2103 
2104          tc_set_resource_reference(&dst->buffer, src->buffer);
2105          dst->buffer_offset = src->buffer_offset;
2106          dst->buffer_size = src->buffer_size;
2107 
2108          if (src->buffer) {
2109             struct threaded_resource *tres = threaded_resource(src->buffer);
2110 
2111             tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
2112 
2113             if (writable_bitmask & BITFIELD_BIT(i)) {
2114                tc_buffer_disable_cpu_storage(src->buffer);
2115                util_range_add(&tres->b, &tres->valid_buffer_range,
2116                               src->buffer_offset,
2117                               src->buffer_offset + src->buffer_size);
2118             }
2119          } else {
2120             tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
2121          }
2122       }
2123       tc->seen_shader_buffers[shader] = true;
2124    } else {
2125       tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
2126    }
2127 
2128    tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2129    tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
2130 }
2131 
2132 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call)2133 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call)
2134 {
2135    struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
2136    unsigned count = p->count;
2137 
2138    for (unsigned i = 0; i < count; i++)
2139       tc_assert(!p->slot[i].is_user_buffer);
2140 
2141    pipe->set_vertex_buffers(pipe, count, p->slot);
2142    return p->base.num_slots;
2143 }
2144 
2145 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned count,const struct pipe_vertex_buffer * buffers)2146 tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned count,
2147                       const struct pipe_vertex_buffer *buffers)
2148 {
2149    struct threaded_context *tc = threaded_context(_pipe);
2150 
2151    assert(!count || buffers);
2152 
2153    if (count) {
2154       struct tc_vertex_buffers *p =
2155          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2156       p->count = count;
2157 
2158       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2159 
2160       memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
2161 
2162       for (unsigned i = 0; i < count; i++) {
2163          struct pipe_resource *buf = buffers[i].buffer.resource;
2164 
2165          if (buf) {
2166             tc_bind_buffer(&tc->vertex_buffers[i], next, buf);
2167          } else {
2168             tc_unbind_buffer(&tc->vertex_buffers[i]);
2169          }
2170       }
2171    } else {
2172       struct tc_vertex_buffers *p =
2173          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
2174       p->count = 0;
2175    }
2176 
2177    /* We don't need to unbind trailing buffers because we never touch bindings
2178     * after num_vertex_buffers.
2179     */
2180    tc->num_vertex_buffers = count;
2181 }
2182 
2183 struct pipe_vertex_buffer *
tc_add_set_vertex_buffers_call(struct pipe_context * _pipe,unsigned count)2184 tc_add_set_vertex_buffers_call(struct pipe_context *_pipe, unsigned count)
2185 {
2186    struct threaded_context *tc = threaded_context(_pipe);
2187 
2188    /* We don't need to unbind trailing buffers because we never touch bindings
2189     * after num_vertex_buffers.
2190     */
2191    tc->num_vertex_buffers = count;
2192 
2193    struct tc_vertex_buffers *p =
2194       tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2195    p->count = count;
2196    return p->slot;
2197 }
2198 
2199 struct tc_stream_outputs {
2200    struct tc_call_base base;
2201    uint8_t count;
2202    uint8_t output_prim;
2203    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
2204    unsigned offsets[PIPE_MAX_SO_BUFFERS];
2205 };
2206 
2207 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call)2208 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call)
2209 {
2210    struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
2211    unsigned count = p->count;
2212 
2213    pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets,
2214                                    p->output_prim);
2215    for (unsigned i = 0; i < count; i++)
2216       tc_drop_so_target_reference(p->targets[i]);
2217 
2218    return call_size(tc_stream_outputs);
2219 }
2220 
2221 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets,enum mesa_prim output_prim)2222 tc_set_stream_output_targets(struct pipe_context *_pipe,
2223                              unsigned count,
2224                              struct pipe_stream_output_target **tgs,
2225                              const unsigned *offsets,
2226                              enum mesa_prim output_prim)
2227 {
2228    struct threaded_context *tc = threaded_context(_pipe);
2229    struct tc_stream_outputs *p =
2230       tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
2231    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2232 
2233    for (unsigned i = 0; i < count; i++) {
2234       p->targets[i] = NULL;
2235       pipe_so_target_reference(&p->targets[i], tgs[i]);
2236       if (tgs[i]) {
2237          tc_buffer_disable_cpu_storage(tgs[i]->buffer);
2238          tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
2239       } else {
2240          tc_unbind_buffer(&tc->streamout_buffers[i]);
2241       }
2242    }
2243    p->count = count;
2244    p->output_prim = output_prim;
2245    memcpy(p->offsets, offsets, count * sizeof(unsigned));
2246 
2247    tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
2248    if (count)
2249       tc->seen_streamout_buffers = true;
2250 }
2251 
2252 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)2253 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
2254                          unsigned count, struct pipe_surface **resources)
2255 {
2256    struct threaded_context *tc = threaded_context(_pipe);
2257    struct pipe_context *pipe = tc->pipe;
2258 
2259    tc_sync(tc);
2260    pipe->set_compute_resources(pipe, start, count, resources);
2261 }
2262 
2263 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)2264 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
2265                       unsigned count, struct pipe_resource **resources,
2266                       uint32_t **handles)
2267 {
2268    struct threaded_context *tc = threaded_context(_pipe);
2269    struct pipe_context *pipe = tc->pipe;
2270 
2271    tc_sync(tc);
2272    pipe->set_global_binding(pipe, first, count, resources, handles);
2273 }
2274 
2275 
2276 /********************************************************************
2277  * views
2278  */
2279 
2280 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)2281 tc_create_surface(struct pipe_context *_pipe,
2282                   struct pipe_resource *resource,
2283                   const struct pipe_surface *surf_tmpl)
2284 {
2285    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2286    struct pipe_surface *view =
2287          pipe->create_surface(pipe, resource, surf_tmpl);
2288 
2289    if (view)
2290       view->context = _pipe;
2291    return view;
2292 }
2293 
2294 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)2295 tc_surface_destroy(struct pipe_context *_pipe,
2296                    struct pipe_surface *surf)
2297 {
2298    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2299 
2300    pipe->surface_destroy(pipe, surf);
2301 }
2302 
2303 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)2304 tc_create_sampler_view(struct pipe_context *_pipe,
2305                        struct pipe_resource *resource,
2306                        const struct pipe_sampler_view *templ)
2307 {
2308    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2309    struct pipe_sampler_view *view =
2310          pipe->create_sampler_view(pipe, resource, templ);
2311 
2312    if (view)
2313       view->context = _pipe;
2314    return view;
2315 }
2316 
2317 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)2318 tc_sampler_view_destroy(struct pipe_context *_pipe,
2319                         struct pipe_sampler_view *view)
2320 {
2321    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2322 
2323    pipe->sampler_view_destroy(pipe, view);
2324 }
2325 
2326 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)2327 tc_create_stream_output_target(struct pipe_context *_pipe,
2328                                struct pipe_resource *res,
2329                                unsigned buffer_offset,
2330                                unsigned buffer_size)
2331 {
2332    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2333    struct threaded_resource *tres = threaded_resource(res);
2334    struct pipe_stream_output_target *view;
2335 
2336    util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
2337                   buffer_offset + buffer_size);
2338 
2339    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
2340                                             buffer_size);
2341    if (view)
2342       view->context = _pipe;
2343    return view;
2344 }
2345 
2346 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)2347 tc_stream_output_target_destroy(struct pipe_context *_pipe,
2348                                 struct pipe_stream_output_target *target)
2349 {
2350    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2351 
2352    pipe->stream_output_target_destroy(pipe, target);
2353 }
2354 
2355 
2356 /********************************************************************
2357  * bindless
2358  */
2359 
2360 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2361 tc_create_texture_handle(struct pipe_context *_pipe,
2362                          struct pipe_sampler_view *view,
2363                          const struct pipe_sampler_state *state)
2364 {
2365    struct threaded_context *tc = threaded_context(_pipe);
2366    struct pipe_context *pipe = tc->pipe;
2367 
2368    tc_sync(tc);
2369    return pipe->create_texture_handle(pipe, view, state);
2370 }
2371 
2372 struct tc_make_texture_handle_resident {
2373    struct tc_call_base base;
2374    bool resident;
2375    uint64_t handle;
2376 };
2377 
2378 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call)2379 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call)
2380 {
2381    struct tc_make_texture_handle_resident *p =
2382       to_call(call, tc_make_texture_handle_resident);
2383 
2384    pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
2385    return call_size(tc_make_texture_handle_resident);
2386 }
2387 
2388 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)2389 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2390                                 bool resident)
2391 {
2392    struct threaded_context *tc = threaded_context(_pipe);
2393    struct tc_make_texture_handle_resident *p =
2394       tc_add_call(tc, TC_CALL_make_texture_handle_resident,
2395                   tc_make_texture_handle_resident);
2396 
2397    p->handle = handle;
2398    p->resident = resident;
2399 }
2400 
2401 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)2402 tc_create_image_handle(struct pipe_context *_pipe,
2403                        const struct pipe_image_view *image)
2404 {
2405    struct threaded_context *tc = threaded_context(_pipe);
2406    struct pipe_context *pipe = tc->pipe;
2407    struct pipe_resource *resource = image->resource;
2408 
2409    if (image->access & PIPE_IMAGE_ACCESS_WRITE &&
2410        resource && resource->target == PIPE_BUFFER) {
2411       struct threaded_resource *tres = threaded_resource(resource);
2412 
2413       /* The CPU storage doesn't support writable buffer. */
2414       tc_buffer_disable_cpu_storage(resource);
2415 
2416       util_range_add(&tres->b, &tres->valid_buffer_range,
2417                      image->u.buf.offset,
2418                      image->u.buf.offset + image->u.buf.size);
2419    }
2420 
2421    tc_sync(tc);
2422    return pipe->create_image_handle(pipe, image);
2423 }
2424 
2425 struct tc_make_image_handle_resident {
2426    struct tc_call_base base;
2427    bool resident;
2428    unsigned access;
2429    uint64_t handle;
2430 };
2431 
2432 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call)2433 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call)
2434 {
2435    struct tc_make_image_handle_resident *p =
2436       to_call(call, tc_make_image_handle_resident);
2437 
2438    pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
2439    return call_size(tc_make_image_handle_resident);
2440 }
2441 
2442 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)2443 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2444                               unsigned access, bool resident)
2445 {
2446    struct threaded_context *tc = threaded_context(_pipe);
2447    struct tc_make_image_handle_resident *p =
2448       tc_add_call(tc, TC_CALL_make_image_handle_resident,
2449                   tc_make_image_handle_resident);
2450 
2451    p->handle = handle;
2452    p->access = access;
2453    p->resident = resident;
2454 }
2455 
2456 
2457 /********************************************************************
2458  * transfer
2459  */
2460 
2461 static void
2462 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2463          unsigned flags);
2464 
2465 struct tc_replace_buffer_storage {
2466    struct tc_call_base base;
2467    uint16_t num_rebinds;
2468    uint32_t rebind_mask;
2469    uint32_t delete_buffer_id;
2470    struct pipe_resource *dst;
2471    struct pipe_resource *src;
2472    tc_replace_buffer_storage_func func;
2473 };
2474 
2475 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call)2476 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call)
2477 {
2478    struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2479 
2480    p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2481 
2482    tc_drop_resource_reference(p->dst);
2483    tc_drop_resource_reference(p->src);
2484    return call_size(tc_replace_buffer_storage);
2485 }
2486 
2487 /* Return true if the buffer has been invalidated or is idle. */
2488 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2489 tc_invalidate_buffer(struct threaded_context *tc,
2490                      struct threaded_resource *tbuf)
2491 {
2492    if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2493       /* It's idle, so invalidation would be a no-op, but we can still clear
2494        * the valid range because we are technically doing invalidation, but
2495        * skipping it because it's useless.
2496        *
2497        * If the buffer is bound for write, we can't invalidate the range.
2498        */
2499       if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2500          util_range_set_empty(&tbuf->valid_buffer_range);
2501       return true;
2502    }
2503 
2504    struct pipe_screen *screen = tc->base.screen;
2505    struct pipe_resource *new_buf;
2506 
2507    /* Shared, pinned, and sparse buffers can't be reallocated. */
2508    if (tbuf->is_shared ||
2509        tbuf->is_user_ptr ||
2510        tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2511       return false;
2512 
2513    assert(tbuf->b.target == PIPE_BUFFER);
2514    tc->bytes_replaced_estimate += tbuf->b.width0;
2515 
2516    if (tc->bytes_replaced_limit && (tc->bytes_replaced_estimate > tc->bytes_replaced_limit)) {
2517       tc_flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
2518    }
2519 
2520    /* Allocate a new one. */
2521    new_buf = screen->resource_create(screen, &tbuf->b);
2522    if (!new_buf)
2523       return false;
2524 
2525    /* Replace the "latest" pointer. */
2526    if (tbuf->latest != &tbuf->b)
2527       pipe_resource_reference(&tbuf->latest, NULL);
2528 
2529    tbuf->latest = new_buf;
2530 
2531    uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2532 
2533    /* Enqueue storage replacement of the original buffer. */
2534    struct tc_replace_buffer_storage *p =
2535       tc_add_call(tc, TC_CALL_replace_buffer_storage,
2536                   tc_replace_buffer_storage);
2537 
2538    p->func = tc->replace_buffer_storage;
2539    tc_set_resource_reference(&p->dst, &tbuf->b);
2540    tc_set_resource_reference(&p->src, new_buf);
2541    p->delete_buffer_id = delete_buffer_id;
2542    p->rebind_mask = 0;
2543 
2544    /* Treat the current buffer as the new buffer. */
2545    bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2546    p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2547                                      threaded_resource(new_buf)->buffer_id_unique,
2548                                      &p->rebind_mask);
2549 
2550    /* If the buffer is not bound for write, clear the valid range. */
2551    if (!bound_for_write)
2552       util_range_set_empty(&tbuf->valid_buffer_range);
2553 
2554    tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2555    threaded_resource(new_buf)->buffer_id_unique = 0;
2556 
2557    return true;
2558 }
2559 
2560 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2561 tc_improve_map_buffer_flags(struct threaded_context *tc,
2562                             struct threaded_resource *tres, unsigned usage,
2563                             unsigned offset, unsigned size)
2564 {
2565    /* Never invalidate inside the driver and never infer "unsynchronized". */
2566    unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2567                        TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2568 
2569    /* Prevent a reentry. */
2570    if (usage & tc_flags)
2571       return usage;
2572 
2573    /* Use the staging upload if it's preferred. */
2574    if (usage & (PIPE_MAP_DISCARD_RANGE |
2575                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2576        !(usage & PIPE_MAP_PERSISTENT) &&
2577        tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2578        tc->use_forced_staging_uploads) {
2579       usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2580                  PIPE_MAP_UNSYNCHRONIZED);
2581 
2582       return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2583    }
2584 
2585    /* Sparse buffers can't be mapped directly and can't be reallocated
2586     * (fully invalidated). That may just be a radeonsi limitation, but
2587     * the threaded context must obey it with radeonsi.
2588     */
2589    if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2590       /* We can use DISCARD_RANGE instead of full discard. This is the only
2591        * fast path for sparse buffers that doesn't need thread synchronization.
2592        */
2593       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2594          usage |= PIPE_MAP_DISCARD_RANGE;
2595 
2596       /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2597        * The threaded context doesn't do unsychronized mappings and invalida-
2598        * tions of sparse buffers, therefore a correct driver behavior won't
2599        * result in an incorrect behavior with the threaded context.
2600        */
2601       return usage;
2602    }
2603 
2604    usage |= tc_flags;
2605 
2606    /* Handle CPU reads trivially. */
2607    if (usage & PIPE_MAP_READ) {
2608       if (usage & PIPE_MAP_UNSYNCHRONIZED)
2609          usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2610 
2611       /* Drivers aren't allowed to do buffer invalidations. */
2612       return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2613    }
2614 
2615    /* See if the buffer range being mapped has never been initialized or
2616     * the buffer is idle, in which case it can be mapped unsynchronized. */
2617    if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2618        ((!tres->is_shared &&
2619          !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2620         !tc_is_buffer_busy(tc, tres, usage)))
2621       usage |= PIPE_MAP_UNSYNCHRONIZED;
2622 
2623    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2624       /* If discarding the entire valid range, discard the whole resource instead. */
2625       if (usage & PIPE_MAP_DISCARD_RANGE &&
2626           util_ranges_covered(&tres->valid_buffer_range, offset, offset + size))
2627          usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2628 
2629       /* Discard the whole resource if needed. */
2630       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2631          if (tc_invalidate_buffer(tc, tres))
2632             usage |= PIPE_MAP_UNSYNCHRONIZED;
2633          else
2634             usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2635       }
2636    }
2637 
2638    /* We won't need this flag anymore. */
2639    /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2640    usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2641 
2642    /* GL_AMD_pinned_memory and persistent mappings can't use staging
2643     * buffers. */
2644    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2645                 PIPE_MAP_PERSISTENT) ||
2646        tres->is_user_ptr)
2647       usage &= ~PIPE_MAP_DISCARD_RANGE;
2648 
2649    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2650    if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2651       usage &= ~PIPE_MAP_DISCARD_RANGE;
2652       usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2653    }
2654 
2655    return usage;
2656 }
2657 
2658 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2659 tc_buffer_map(struct pipe_context *_pipe,
2660               struct pipe_resource *resource, unsigned level,
2661               unsigned usage, const struct pipe_box *box,
2662               struct pipe_transfer **transfer)
2663 {
2664    struct threaded_context *tc = threaded_context(_pipe);
2665    struct threaded_resource *tres = threaded_resource(resource);
2666    struct pipe_context *pipe = tc->pipe;
2667 
2668    /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2669     * this shouldn't normally be necessary because glthread only uses large buffers.
2670     */
2671    if (usage & PIPE_MAP_THREAD_SAFE)
2672       tc_buffer_disable_cpu_storage(resource);
2673 
2674    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2675 
2676    /* If the CPU storage is enabled, return it directly. */
2677    if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2678       /* We can't let resource_copy_region disable the CPU storage. */
2679       assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2680 
2681       if (!tres->cpu_storage) {
2682          tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2683 
2684          if (tres->cpu_storage && tres->valid_buffer_range.end) {
2685             /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2686             struct pipe_box box2;
2687             struct pipe_transfer *transfer2;
2688 
2689             unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2690             u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2691 
2692             tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2693             tc_set_driver_thread(tc);
2694 
2695             void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2696                                          0, PIPE_MAP_READ, &box2, &transfer2);
2697             memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2698                    ret,
2699                    valid_range_len);
2700             pipe->buffer_unmap(pipe, transfer2);
2701 
2702             tc_clear_driver_thread(tc);
2703          }
2704       }
2705 
2706       if (tres->cpu_storage) {
2707          struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2708          ttrans->b.resource = resource;
2709          ttrans->b.usage = usage;
2710          ttrans->b.box = *box;
2711          ttrans->valid_buffer_range = &tres->valid_buffer_range;
2712          ttrans->cpu_storage_mapped = true;
2713          *transfer = &ttrans->b;
2714 
2715          return (uint8_t*)tres->cpu_storage + box->x;
2716       } else {
2717          tres->allow_cpu_storage = false;
2718       }
2719    }
2720 
2721    /* Do a staging transfer within the threaded context. The driver should
2722     * only get resource_copy_region.
2723     */
2724    if (usage & PIPE_MAP_DISCARD_RANGE) {
2725       struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2726       uint8_t *map;
2727 
2728       u_upload_alloc(tc->base.stream_uploader, 0,
2729                      box->width + (box->x % tc->map_buffer_alignment),
2730                      tc->map_buffer_alignment, &ttrans->b.offset,
2731                      &ttrans->staging, (void**)&map);
2732       if (!map) {
2733          slab_free(&tc->pool_transfers, ttrans);
2734          return NULL;
2735       }
2736 
2737       ttrans->b.resource = resource;
2738       ttrans->b.level = 0;
2739       ttrans->b.usage = usage;
2740       ttrans->b.box = *box;
2741       ttrans->b.stride = 0;
2742       ttrans->b.layer_stride = 0;
2743       ttrans->valid_buffer_range = &tres->valid_buffer_range;
2744       ttrans->cpu_storage_mapped = false;
2745       *transfer = &ttrans->b;
2746 
2747       p_atomic_inc(&tres->pending_staging_uploads);
2748       util_range_add(resource, &tres->pending_staging_uploads_range,
2749                      box->x, box->x + box->width);
2750 
2751       return map + (box->x % tc->map_buffer_alignment);
2752    }
2753 
2754    if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2755        p_atomic_read(&tres->pending_staging_uploads) &&
2756        util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2757       /* Write conflict detected between a staging transfer and the direct mapping we're
2758        * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2759        * will have to wait for the staging transfer completion.
2760        * Note: The conflict detection is only based on the mapped range, not on the actual
2761        * written range(s).
2762        */
2763       usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2764       tc->use_forced_staging_uploads = false;
2765    }
2766 
2767    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2768    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2769       tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2770                       usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2771       tc_set_driver_thread(tc);
2772    }
2773 
2774    tc->bytes_mapped_estimate += box->width;
2775 
2776    void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2777                                 level, usage, box, transfer);
2778    threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2779    threaded_transfer(*transfer)->cpu_storage_mapped = false;
2780 
2781    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2782       tc_clear_driver_thread(tc);
2783 
2784    return ret;
2785 }
2786 
2787 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2788 tc_texture_map(struct pipe_context *_pipe,
2789                struct pipe_resource *resource, unsigned level,
2790                unsigned usage, const struct pipe_box *box,
2791                struct pipe_transfer **transfer)
2792 {
2793    struct threaded_context *tc = threaded_context(_pipe);
2794    struct threaded_resource *tres = threaded_resource(resource);
2795    struct pipe_context *pipe = tc->pipe;
2796 
2797    tc_sync_msg(tc, "texture");
2798    tc_set_driver_thread(tc);
2799    /* block all unsync texture subdata during map */
2800    tc_set_resource_batch_usage_persistent(tc, resource, true);
2801 
2802    tc->bytes_mapped_estimate += box->width;
2803 
2804    void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2805                                  level, usage, box, transfer);
2806 
2807    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2808       tc_clear_driver_thread(tc);
2809 
2810    return ret;
2811 }
2812 
2813 struct tc_transfer_flush_region {
2814    struct tc_call_base base;
2815    struct pipe_box box;
2816    struct pipe_transfer *transfer;
2817 };
2818 
2819 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call)2820 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call)
2821 {
2822    struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2823 
2824    pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2825    return call_size(tc_transfer_flush_region);
2826 }
2827 
2828 struct tc_resource_copy_region {
2829    struct tc_call_base base;
2830    unsigned dst_level;
2831    unsigned dstx, dsty, dstz;
2832    unsigned src_level;
2833    struct pipe_box src_box;
2834    struct pipe_resource *dst;
2835    struct pipe_resource *src;
2836 };
2837 
2838 static void
2839 tc_resource_copy_region(struct pipe_context *_pipe,
2840                         struct pipe_resource *dst, unsigned dst_level,
2841                         unsigned dstx, unsigned dsty, unsigned dstz,
2842                         struct pipe_resource *src, unsigned src_level,
2843                         const struct pipe_box *src_box);
2844 
2845 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2846 tc_buffer_do_flush_region(struct threaded_context *tc,
2847                           struct threaded_transfer *ttrans,
2848                           const struct pipe_box *box)
2849 {
2850    struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2851 
2852    if (ttrans->staging) {
2853       struct pipe_box src_box;
2854 
2855       u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2856                (box->x - ttrans->b.box.x),
2857                box->width, &src_box);
2858 
2859       /* Copy the staging buffer into the original one. */
2860       tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2861                               ttrans->staging, 0, &src_box);
2862    }
2863 
2864    /* Don't update the valid range when we're uploading the CPU storage
2865     * because it includes the uninitialized range too.
2866     */
2867    if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2868       util_range_add(&tres->b, ttrans->valid_buffer_range,
2869                      box->x, box->x + box->width);
2870    }
2871 }
2872 
2873 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2874 tc_transfer_flush_region(struct pipe_context *_pipe,
2875                          struct pipe_transfer *transfer,
2876                          const struct pipe_box *rel_box)
2877 {
2878    struct threaded_context *tc = threaded_context(_pipe);
2879    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2880    struct threaded_resource *tres = threaded_resource(transfer->resource);
2881    unsigned required_usage = PIPE_MAP_WRITE |
2882                              PIPE_MAP_FLUSH_EXPLICIT;
2883 
2884    if (tres->b.target == PIPE_BUFFER) {
2885       if ((transfer->usage & required_usage) == required_usage) {
2886          struct pipe_box box;
2887 
2888          u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2889          tc_buffer_do_flush_region(tc, ttrans, &box);
2890       }
2891 
2892       /* Staging transfers don't send the call to the driver.
2893        *
2894        * Transfers using the CPU storage shouldn't call transfer_flush_region
2895        * in the driver because the buffer is not really mapped on the driver
2896        * side and the CPU storage always re-uploads everything (flush_region
2897        * makes no difference).
2898        */
2899       if (ttrans->staging || ttrans->cpu_storage_mapped)
2900          return;
2901    }
2902 
2903    struct tc_transfer_flush_region *p =
2904       tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2905    p->transfer = transfer;
2906    p->box = *rel_box;
2907 }
2908 
2909 struct tc_buffer_unmap {
2910    struct tc_call_base base;
2911    bool was_staging_transfer;
2912    union {
2913       struct pipe_transfer *transfer;
2914       struct pipe_resource *resource;
2915    };
2916 };
2917 
2918 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call)2919 tc_call_buffer_unmap(struct pipe_context *pipe, void *call)
2920 {
2921    struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2922 
2923    if (p->was_staging_transfer) {
2924       struct threaded_resource *tres = threaded_resource(p->resource);
2925       /* Nothing to do except keeping track of staging uploads */
2926       assert(tres->pending_staging_uploads > 0);
2927       p_atomic_dec(&tres->pending_staging_uploads);
2928       tc_drop_resource_reference(p->resource);
2929    } else {
2930       pipe->buffer_unmap(pipe, p->transfer);
2931    }
2932 
2933    return call_size(tc_buffer_unmap);
2934 }
2935 
2936 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2937 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2938 {
2939    struct threaded_context *tc = threaded_context(_pipe);
2940    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2941    struct threaded_resource *tres = threaded_resource(transfer->resource);
2942 
2943    /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2944     * called from any thread and bypasses all multithreaded queues.
2945     */
2946    if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2947       assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2948       assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2949                                   PIPE_MAP_DISCARD_RANGE)));
2950 
2951       struct pipe_context *pipe = tc->pipe;
2952       util_range_add(&tres->b, ttrans->valid_buffer_range,
2953                       transfer->box.x, transfer->box.x + transfer->box.width);
2954 
2955       pipe->buffer_unmap(pipe, transfer);
2956       return;
2957    }
2958 
2959    if (transfer->usage & PIPE_MAP_WRITE &&
2960        !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2961       tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2962 
2963    if (ttrans->cpu_storage_mapped) {
2964       /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2965        * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2966        * If that happens, we just ignore the unmap call and don't upload anything to prevent
2967        * a crash.
2968        *
2969        * Disallow the CPU storage in the driver to work around this.
2970        */
2971       assert(tres->cpu_storage);
2972 
2973       if (tres->cpu_storage) {
2974          tc_invalidate_buffer(tc, tres);
2975          tc_buffer_subdata(&tc->base, &tres->b,
2976                            PIPE_MAP_UNSYNCHRONIZED |
2977                            TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2978                            0, tres->b.width0, tres->cpu_storage);
2979          /* This shouldn't have been freed by buffer_subdata. */
2980          assert(tres->cpu_storage);
2981       } else {
2982          static bool warned_once = false;
2983          if (!warned_once) {
2984             fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2985             fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2986             warned_once = true;
2987          }
2988       }
2989 
2990       tc_drop_resource_reference(ttrans->staging);
2991       slab_free(&tc->pool_transfers, ttrans);
2992       return;
2993    }
2994 
2995    bool was_staging_transfer = false;
2996 
2997    if (ttrans->staging) {
2998       was_staging_transfer = true;
2999 
3000       tc_drop_resource_reference(ttrans->staging);
3001       slab_free(&tc->pool_transfers, ttrans);
3002    }
3003 
3004    struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
3005                                            tc_buffer_unmap);
3006    if (was_staging_transfer) {
3007       tc_set_resource_reference(&p->resource, &tres->b);
3008       p->was_staging_transfer = true;
3009    } else {
3010       p->transfer = transfer;
3011       p->was_staging_transfer = false;
3012    }
3013 
3014    /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
3015     * defers the unmap operation to the batch execution.
3016     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3017     * and if it goes over an optional limit the current batch is flushed,
3018     * to reclaim some RAM. */
3019    if (!ttrans->staging && tc->bytes_mapped_limit &&
3020        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3021       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3022    }
3023 }
3024 
3025 struct tc_texture_unmap {
3026    struct tc_call_base base;
3027    struct pipe_transfer *transfer;
3028 };
3029 
3030 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call)3031 tc_call_texture_unmap(struct pipe_context *pipe, void *call)
3032 {
3033    struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
3034 
3035    pipe->texture_unmap(pipe, p->transfer);
3036    return call_size(tc_texture_unmap);
3037 }
3038 
3039 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)3040 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
3041 {
3042    struct threaded_context *tc = threaded_context(_pipe);
3043    struct threaded_transfer *ttrans = threaded_transfer(transfer);
3044 
3045    /* enable subdata again once resource is no longer mapped */
3046    tc_set_resource_batch_usage_persistent(tc, transfer->resource, false);
3047 
3048    tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
3049 
3050    /* tc_texture_map directly maps the textures, but tc_texture_unmap
3051     * defers the unmap operation to the batch execution.
3052     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3053     * and if it goes over an optional limit the current batch is flushed,
3054     * to reclaim some RAM. */
3055    if (!ttrans->staging && tc->bytes_mapped_limit &&
3056        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3057       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3058    }
3059 }
3060 
3061 struct tc_buffer_subdata {
3062    struct tc_call_base base;
3063    unsigned usage, offset, size;
3064    struct pipe_resource *resource;
3065    char slot[0]; /* more will be allocated if needed */
3066 };
3067 
3068 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call)3069 tc_call_buffer_subdata(struct pipe_context *pipe, void *call)
3070 {
3071    struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
3072 
3073    pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
3074                         p->slot);
3075    tc_drop_resource_reference(p->resource);
3076    return p->base.num_slots;
3077 }
3078 
3079 static bool
is_mergeable_buffer_subdata(const struct tc_call_base * previous_call,unsigned usage,unsigned offset,struct pipe_resource * resource)3080 is_mergeable_buffer_subdata(const struct tc_call_base *previous_call,
3081                             unsigned usage, unsigned offset,
3082                             struct pipe_resource *resource)
3083 {
3084    if (!previous_call || previous_call->call_id != TC_CALL_buffer_subdata)
3085       return false;
3086 
3087    struct tc_buffer_subdata *subdata = (struct tc_buffer_subdata *)previous_call;
3088 
3089    return subdata->usage == usage && subdata->resource == resource
3090           && (subdata->offset + subdata->size) == offset;
3091 }
3092 
3093 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)3094 tc_buffer_subdata(struct pipe_context *_pipe,
3095                   struct pipe_resource *resource,
3096                   unsigned usage, unsigned offset,
3097                   unsigned size, const void *data)
3098 {
3099    struct threaded_context *tc = threaded_context(_pipe);
3100    struct threaded_resource *tres = threaded_resource(resource);
3101 
3102    if (!size)
3103       return;
3104 
3105    usage |= PIPE_MAP_WRITE;
3106 
3107    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
3108    if (!(usage & PIPE_MAP_DIRECTLY))
3109       usage |= PIPE_MAP_DISCARD_RANGE;
3110 
3111    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
3112 
3113    /* Unsychronized and big transfers should use transfer_map. Also handle
3114     * full invalidations, because drivers aren't allowed to do them.
3115     */
3116    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
3117                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
3118        size > TC_MAX_SUBDATA_BYTES ||
3119        tres->cpu_storage) {
3120       struct pipe_transfer *transfer;
3121       struct pipe_box box;
3122       uint8_t *map = NULL;
3123 
3124       u_box_1d(offset, size, &box);
3125 
3126       /* CPU storage is only useful for partial updates. It can add overhead
3127        * on glBufferData calls so avoid using it.
3128        */
3129       if (!tres->cpu_storage && offset == 0 && size == resource->width0)
3130          usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
3131 
3132       map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
3133       if (map) {
3134          memcpy(map, data, size);
3135          tc_buffer_unmap(_pipe, transfer);
3136       }
3137       return;
3138    }
3139 
3140    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3141 
3142    /* We can potentially merge this subdata call with the previous one (if any),
3143     * if the application does a whole-buffer upload piecewise. */
3144    {
3145       struct tc_call_base *last_call = tc_get_last_mergeable_call(tc);
3146       struct tc_buffer_subdata *merge_dest = (struct tc_buffer_subdata *)last_call;
3147 
3148       if (is_mergeable_buffer_subdata(last_call, usage, offset, resource) &&
3149          tc_enlarge_last_mergeable_call(tc, call_size_with_slots(tc_buffer_subdata, merge_dest->size + size))) {
3150          memcpy(merge_dest->slot + merge_dest->size, data, size);
3151          merge_dest->size += size;
3152 
3153          /* TODO: We *could* do an invalidate + upload here if we detect that
3154           * the merged subdata call overwrites the entire buffer. However, that's
3155           * a little complicated since we can't add further calls to our batch
3156           * until we have removed the merged subdata call, which means that
3157           * calling tc_invalidate_buffer before we have removed the call will
3158           * blow things up.
3159           *
3160           * Just leave a large, merged subdata call in the batch for now, which is
3161           * at least better than tons of tiny subdata calls.
3162           */
3163 
3164          return;
3165       }
3166    }
3167 
3168    /* The upload is small. Enqueue it. */
3169    struct tc_buffer_subdata *p =
3170       tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
3171 
3172    tc_set_resource_reference(&p->resource, resource);
3173    /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
3174     * _flags would set UNSYNCHRONIZED and we wouldn't get here.
3175     */
3176    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
3177    p->usage = usage;
3178    p->offset = offset;
3179    p->size = size;
3180    memcpy(p->slot, data, size);
3181 
3182    tc_mark_call_mergeable(tc, &p->base);
3183 }
3184 
3185 struct tc_texture_subdata {
3186    struct tc_call_base base;
3187    unsigned level, usage, stride;
3188    struct pipe_box box;
3189    struct pipe_resource *resource;
3190    uintptr_t layer_stride;
3191    char slot[0]; /* more will be allocated if needed */
3192 };
3193 
3194 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call)3195 tc_call_texture_subdata(struct pipe_context *pipe, void *call)
3196 {
3197    struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
3198 
3199    pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
3200                          p->slot, p->stride, p->layer_stride);
3201    tc_drop_resource_reference(p->resource);
3202    return p->base.num_slots;
3203 }
3204 
3205 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,uintptr_t layer_stride)3206 tc_texture_subdata(struct pipe_context *_pipe,
3207                    struct pipe_resource *resource,
3208                    unsigned level, unsigned usage,
3209                    const struct pipe_box *box,
3210                    const void *data, unsigned stride,
3211                    uintptr_t layer_stride)
3212 {
3213    struct threaded_context *tc = threaded_context(_pipe);
3214    uint64_t size;
3215 
3216    assert(box->height >= 1);
3217    assert(box->depth >= 1);
3218 
3219    size = (box->depth - 1) * layer_stride +
3220           (box->height - 1) * (uint64_t)stride +
3221           box->width * util_format_get_blocksize(resource->format);
3222    if (!size)
3223       return;
3224 
3225    /* Small uploads can be enqueued, big uploads must sync. */
3226    if (size <= TC_MAX_SUBDATA_BYTES) {
3227       struct tc_texture_subdata *p =
3228          tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
3229 
3230       tc_set_resource_batch_usage(tc, resource);
3231       tc_set_resource_reference(&p->resource, resource);
3232       p->level = level;
3233       p->usage = usage;
3234       p->box = *box;
3235       p->stride = stride;
3236       p->layer_stride = layer_stride;
3237       memcpy(p->slot, data, size);
3238    } else {
3239       struct pipe_context *pipe = tc->pipe;
3240       struct threaded_resource *tres = threaded_resource(resource);
3241       unsigned unsync_usage = TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_WRITE;
3242       bool can_unsync = !tc_resource_batch_usage_test_busy(tc, resource) &&
3243                         tc->options.is_resource_busy &&
3244                         !tc->options.is_resource_busy(tc->pipe->screen, tres->latest, usage | unsync_usage);
3245 
3246       if (!can_unsync && resource->usage != PIPE_USAGE_STAGING &&
3247           tc->options.parse_renderpass_info && tc->in_renderpass) {
3248          enum pipe_format format = resource->format;
3249          if (usage & PIPE_MAP_DEPTH_ONLY)
3250             format = util_format_get_depth_only(format);
3251          else if (usage & PIPE_MAP_STENCIL_ONLY)
3252             format = PIPE_FORMAT_S8_UINT;
3253 
3254          unsigned fmt_stride = util_format_get_stride(format, box->width);
3255          uint64_t fmt_layer_stride = util_format_get_2d_size(format, stride, box->height);
3256          assert(fmt_layer_stride * box->depth <= UINT32_MAX);
3257 
3258          struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth);
3259          pipe->buffer_subdata(pipe, pres, unsync_usage, 0, layer_stride * box->depth, data);
3260          struct pipe_box src_box = *box;
3261          src_box.x = src_box.y = src_box.z = 0;
3262 
3263          if (fmt_stride == stride && fmt_layer_stride == layer_stride) {
3264             /* if stride matches, single copy is fine*/
3265             tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box);
3266          } else {
3267             /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */
3268             src_box.depth = 1;
3269             for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) {
3270                unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z;
3271                int blocksize = util_format_get_blocksize(format);
3272                int blockwidth = util_format_get_blockwidth(format);
3273                int blockheight = util_format_get_blockheight(format);
3274 
3275                assert(blocksize > 0);
3276                assert(blockwidth > 0);
3277                assert(blockheight > 0);
3278 
3279                dst_x /= blockwidth;
3280                dst_y /= blockheight;
3281                width = DIV_ROUND_UP(width, blockwidth);
3282                height = DIV_ROUND_UP(height, blockheight);
3283 
3284                width *= blocksize;
3285 
3286                if (width == fmt_stride && width == (unsigned)stride) {
3287                   ASSERTED uint64_t size = (uint64_t)height * width;
3288 
3289                   assert(size <= SIZE_MAX);
3290                   assert(dst_x + src_box.width < u_minify(pres->width0, level));
3291                   assert(dst_y + src_box.height < u_minify(pres->height0, level));
3292                   assert(pres->target != PIPE_TEXTURE_3D ||  z + src_box.depth < u_minify(pres->depth0, level));
3293                   tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3294                } else {
3295                   src_box.height = 1;
3296                   for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride)
3297                      tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3298                }
3299             }
3300          }
3301 
3302          pipe_resource_reference(&pres, NULL);
3303       } else {
3304          if (can_unsync) {
3305             usage |= unsync_usage;
3306          } else {
3307             tc_sync(tc);
3308             tc_set_driver_thread(tc);
3309          }
3310          pipe->texture_subdata(pipe, resource, level, usage, box, data,
3311                               stride, layer_stride);
3312          if (!can_unsync)
3313             tc_clear_driver_thread(tc);
3314       }
3315    }
3316 }
3317 
3318 
3319 /********************************************************************
3320  * miscellaneous
3321  */
3322 
3323 #define TC_FUNC_SYNC_RET0(ret_type, func) \
3324    static ret_type \
3325    tc_##func(struct pipe_context *_pipe) \
3326    { \
3327       struct threaded_context *tc = threaded_context(_pipe); \
3328       struct pipe_context *pipe = tc->pipe; \
3329       tc_sync(tc); \
3330       return pipe->func(pipe); \
3331    }
3332 
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)3333 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
3334 
3335 static void
3336 tc_get_sample_position(struct pipe_context *_pipe,
3337                        unsigned sample_count, unsigned sample_index,
3338                        float *out_value)
3339 {
3340    struct threaded_context *tc = threaded_context(_pipe);
3341    struct pipe_context *pipe = tc->pipe;
3342 
3343    pipe->get_sample_position(pipe, sample_count, sample_index,
3344                              out_value);
3345 }
3346 
3347 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)3348 tc_get_device_reset_status(struct pipe_context *_pipe)
3349 {
3350    struct threaded_context *tc = threaded_context(_pipe);
3351    struct pipe_context *pipe = tc->pipe;
3352 
3353    if (!tc->options.unsynchronized_get_device_reset_status)
3354       tc_sync(tc);
3355 
3356    return pipe->get_device_reset_status(pipe);
3357 }
3358 
3359 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)3360 tc_set_device_reset_callback(struct pipe_context *_pipe,
3361                              const struct pipe_device_reset_callback *cb)
3362 {
3363    struct threaded_context *tc = threaded_context(_pipe);
3364    struct pipe_context *pipe = tc->pipe;
3365 
3366    tc_sync(tc);
3367    pipe->set_device_reset_callback(pipe, cb);
3368 }
3369 
3370 struct tc_string_marker {
3371    struct tc_call_base base;
3372    int len;
3373    char slot[0]; /* more will be allocated if needed */
3374 };
3375 
3376 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call)3377 tc_call_emit_string_marker(struct pipe_context *pipe, void *call)
3378 {
3379    struct tc_string_marker *p = (struct tc_string_marker *)call;
3380    pipe->emit_string_marker(pipe, p->slot, p->len);
3381    return p->base.num_slots;
3382 }
3383 
3384 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)3385 tc_emit_string_marker(struct pipe_context *_pipe,
3386                       const char *string, int len)
3387 {
3388    struct threaded_context *tc = threaded_context(_pipe);
3389 
3390    if (len <= TC_MAX_STRING_MARKER_BYTES) {
3391       struct tc_string_marker *p =
3392          tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
3393 
3394       memcpy(p->slot, string, len);
3395       p->len = len;
3396    } else {
3397       struct pipe_context *pipe = tc->pipe;
3398 
3399       tc_sync(tc);
3400       tc_set_driver_thread(tc);
3401       pipe->emit_string_marker(pipe, string, len);
3402       tc_clear_driver_thread(tc);
3403    }
3404 }
3405 
3406 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)3407 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
3408                     unsigned flags)
3409 {
3410    struct threaded_context *tc = threaded_context(_pipe);
3411    struct pipe_context *pipe = tc->pipe;
3412 
3413    tc_sync(tc);
3414    pipe->dump_debug_state(pipe, stream, flags);
3415 }
3416 
3417 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)3418 tc_set_debug_callback(struct pipe_context *_pipe,
3419                       const struct util_debug_callback *cb)
3420 {
3421    struct threaded_context *tc = threaded_context(_pipe);
3422    struct pipe_context *pipe = tc->pipe;
3423 
3424    tc_sync(tc);
3425 
3426    /* Drop all synchronous debug callbacks. Drivers are expected to be OK
3427     * with this. shader-db will use an environment variable to disable
3428     * the threaded context.
3429     */
3430    if (cb && !cb->async)
3431       pipe->set_debug_callback(pipe, NULL);
3432    else
3433       pipe->set_debug_callback(pipe, cb);
3434 }
3435 
3436 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)3437 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
3438 {
3439    struct threaded_context *tc = threaded_context(_pipe);
3440    struct pipe_context *pipe = tc->pipe;
3441 
3442    tc_sync(tc);
3443    pipe->set_log_context(pipe, log);
3444 }
3445 
3446 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)3447 tc_create_fence_fd(struct pipe_context *_pipe,
3448                    struct pipe_fence_handle **fence, int fd,
3449                    enum pipe_fd_type type)
3450 {
3451    struct threaded_context *tc = threaded_context(_pipe);
3452    struct pipe_context *pipe = tc->pipe;
3453 
3454    if (!tc->options.unsynchronized_create_fence_fd)
3455       tc_sync(tc);
3456 
3457    pipe->create_fence_fd(pipe, fence, fd, type);
3458 }
3459 
3460 struct tc_fence_call {
3461    struct tc_call_base base;
3462    struct pipe_fence_handle *fence;
3463 };
3464 
3465 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call)3466 tc_call_fence_server_sync(struct pipe_context *pipe, void *call)
3467 {
3468    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
3469 
3470    pipe->fence_server_sync(pipe, fence);
3471    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
3472    return call_size(tc_fence_call);
3473 }
3474 
3475 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3476 tc_fence_server_sync(struct pipe_context *_pipe,
3477                      struct pipe_fence_handle *fence)
3478 {
3479    struct threaded_context *tc = threaded_context(_pipe);
3480    struct pipe_screen *screen = tc->pipe->screen;
3481    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
3482                                             tc_fence_call);
3483 
3484    call->fence = NULL;
3485    screen->fence_reference(screen, &call->fence, fence);
3486 }
3487 
3488 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3489 tc_fence_server_signal(struct pipe_context *_pipe,
3490                            struct pipe_fence_handle *fence)
3491 {
3492    struct threaded_context *tc = threaded_context(_pipe);
3493    struct pipe_context *pipe = tc->pipe;
3494    tc_sync(tc);
3495    pipe->fence_server_signal(pipe, fence);
3496 }
3497 
3498 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)3499 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
3500                       UNUSED const struct pipe_video_codec *templ)
3501 {
3502    unreachable("Threaded context should not be enabled for video APIs");
3503    return NULL;
3504 }
3505 
3506 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)3507 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
3508                        UNUSED const struct pipe_video_buffer *templ)
3509 {
3510    unreachable("Threaded context should not be enabled for video APIs");
3511    return NULL;
3512 }
3513 
3514 struct tc_context_param {
3515    struct tc_call_base base;
3516    enum pipe_context_param param;
3517    unsigned value;
3518 };
3519 
3520 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call)3521 tc_call_set_context_param(struct pipe_context *pipe, void *call)
3522 {
3523    struct tc_context_param *p = to_call(call, tc_context_param);
3524 
3525    if (pipe->set_context_param)
3526       pipe->set_context_param(pipe, p->param, p->value);
3527 
3528    return call_size(tc_context_param);
3529 }
3530 
3531 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)3532 tc_set_context_param(struct pipe_context *_pipe,
3533                            enum pipe_context_param param,
3534                            unsigned value)
3535 {
3536    struct threaded_context *tc = threaded_context(_pipe);
3537 
3538    if (param == PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING) {
3539       util_thread_sched_apply_policy(tc->queue.threads[0],
3540                                      UTIL_THREAD_THREADED_CONTEXT, value,
3541                                      NULL);
3542 
3543       /* Execute this immediately (without enqueuing).
3544        * It's required to be thread-safe.
3545        */
3546       struct pipe_context *pipe = tc->pipe;
3547       if (pipe->set_context_param)
3548          pipe->set_context_param(pipe, param, value);
3549       return;
3550    }
3551 
3552    if (tc->pipe->set_context_param) {
3553       struct tc_context_param *call =
3554          tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
3555 
3556       call->param = param;
3557       call->value = value;
3558    }
3559 }
3560 
3561 
3562 /********************************************************************
3563  * draw, launch, clear, blit, copy, flush
3564  */
3565 
3566 struct tc_flush_deferred_call {
3567    struct tc_call_base base;
3568    unsigned flags;
3569    struct pipe_fence_handle *fence;
3570 };
3571 
3572 struct tc_flush_call {
3573    struct tc_call_base base;
3574    unsigned flags;
3575    struct pipe_fence_handle *fence;
3576    struct threaded_context *tc;
3577 };
3578 
3579 static void
tc_flush_queries(struct threaded_context * tc)3580 tc_flush_queries(struct threaded_context *tc)
3581 {
3582    struct threaded_query *tq, *tmp;
3583    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3584       list_del(&tq->head_unflushed);
3585 
3586       /* Memory release semantics: due to a possible race with
3587        * tc_get_query_result, we must ensure that the linked list changes
3588        * are visible before setting tq->flushed.
3589        */
3590       p_atomic_set(&tq->flushed, true);
3591    }
3592 }
3593 
3594 static uint16_t
tc_call_flush_deferred(struct pipe_context * pipe,void * call)3595 tc_call_flush_deferred(struct pipe_context *pipe, void *call)
3596 {
3597    struct tc_flush_deferred_call *p = to_call(call, tc_flush_deferred_call);
3598    struct pipe_screen *screen = pipe->screen;
3599 
3600    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3601    screen->fence_reference(screen, &p->fence, NULL);
3602 
3603    return call_size(tc_flush_deferred_call);
3604 }
3605 
3606 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call)3607 tc_call_flush(struct pipe_context *pipe, void *call)
3608 {
3609    struct tc_flush_call *p = to_call(call, tc_flush_call);
3610    struct pipe_screen *screen = pipe->screen;
3611 
3612    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3613    screen->fence_reference(screen, &p->fence, NULL);
3614 
3615    tc_flush_queries(p->tc);
3616 
3617    return call_size(tc_flush_call);
3618 }
3619 
3620 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3621 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3622          unsigned flags)
3623 {
3624    struct threaded_context *tc = threaded_context(_pipe);
3625    struct pipe_context *pipe = tc->pipe;
3626    struct pipe_screen *screen = pipe->screen;
3627    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3628    bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0;
3629 
3630    if (!deferred || !fence)
3631       tc->in_renderpass = false;
3632 
3633    if (async && tc->options.create_fence) {
3634       if (fence) {
3635          struct tc_batch *next = &tc->batch_slots[tc->next];
3636 
3637          if (!next->token) {
3638             next->token = malloc(sizeof(*next->token));
3639             if (!next->token)
3640                goto out_of_memory;
3641 
3642             pipe_reference_init(&next->token->ref, 1);
3643             next->token->tc = tc;
3644          }
3645 
3646          screen->fence_reference(screen, fence,
3647                                  tc->options.create_fence(pipe, next->token));
3648          if (!*fence)
3649             goto out_of_memory;
3650       }
3651 
3652       struct tc_flush_call *p;
3653       if (deferred) {
3654          /* these have identical fields */
3655          p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call);
3656       } else {
3657          p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3658          p->tc = tc;
3659       }
3660       p->fence = fence ? *fence : NULL;
3661       p->flags = flags | TC_FLUSH_ASYNC;
3662 
3663       if (!deferred) {
3664          /* non-deferred async flushes indicate completion of existing renderpass info */
3665          tc_signal_renderpass_info_ready(tc);
3666          tc_batch_flush(tc, false);
3667          tc->seen_fb_state = false;
3668       }
3669 
3670       return;
3671    }
3672 
3673 out_of_memory:
3674    tc->flushing = true;
3675    /* renderpass info is signaled during sync */
3676    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3677                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3678 
3679    if (!deferred) {
3680       tc_flush_queries(tc);
3681       tc->seen_fb_state = false;
3682       tc->query_ended = false;
3683    }
3684    tc_set_driver_thread(tc);
3685    pipe->flush(pipe, fence, flags);
3686    tc_clear_driver_thread(tc);
3687    tc->flushing = false;
3688 }
3689 
3690 struct tc_draw_single_drawid {
3691    struct tc_draw_single base;
3692    unsigned drawid_offset;
3693 };
3694 
3695 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call)3696 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call)
3697 {
3698    struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3699    struct tc_draw_single *info = &info_drawid->base;
3700 
3701    /* u_threaded_context stores start/count in min/max_index for single draws. */
3702    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3703    struct pipe_draw_start_count_bias draw;
3704 
3705    draw.start = info->info.min_index;
3706    draw.count = info->info.max_index;
3707    draw.index_bias = info->index_bias;
3708 
3709    info->info.index_bounds_valid = false;
3710    info->info.has_user_indices = false;
3711    info->info.take_index_buffer_ownership = false;
3712 
3713    pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3714    if (info->info.index_size)
3715       tc_drop_resource_reference(info->info.index.resource);
3716 
3717    return call_size(tc_draw_single_drawid);
3718 }
3719 
3720 static void
simplify_draw_info(struct pipe_draw_info * info)3721 simplify_draw_info(struct pipe_draw_info *info)
3722 {
3723    /* Clear these fields to facilitate draw merging.
3724     * Drivers shouldn't use them.
3725     */
3726    info->has_user_indices = false;
3727    info->index_bounds_valid = false;
3728    info->take_index_buffer_ownership = false;
3729    info->index_bias_varies = false;
3730    info->_pad = 0;
3731 
3732    /* This shouldn't be set when merging single draws. */
3733    info->increment_draw_id = false;
3734 
3735    if (info->index_size) {
3736       if (!info->primitive_restart)
3737          info->restart_index = 0;
3738    } else {
3739       assert(!info->primitive_restart);
3740       info->primitive_restart = false;
3741       info->restart_index = 0;
3742       info->index.resource = NULL;
3743    }
3744 }
3745 
3746 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3747 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3748                               struct tc_draw_single *next)
3749 {
3750    if (next->base.call_id != TC_CALL_draw_single)
3751       return false;
3752 
3753    STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3754                  sizeof(struct pipe_draw_info) - 8);
3755    STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3756                  sizeof(struct pipe_draw_info) - 4);
3757    /* All fields must be the same except start and count. */
3758    /* u_threaded_context stores start/count in min/max_index for single draws. */
3759    return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3760                  DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3761 }
3762 
3763 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call)3764 tc_call_draw_single(struct pipe_context *pipe, void *call)
3765 {
3766    /* Draw call merging. */
3767    struct tc_draw_single *first = to_call(call, tc_draw_single);
3768    struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3769 
3770    /* If at least 2 consecutive draw calls can be merged... */
3771    if (next->base.call_id == TC_CALL_draw_single) {
3772       if (is_next_call_a_mergeable_draw(first, next)) {
3773          /* The maximum number of merged draws is given by the batch size. */
3774          struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3775          unsigned num_draws = 2;
3776          bool index_bias_varies = first->index_bias != next->index_bias;
3777 
3778          /* u_threaded_context stores start/count in min/max_index for single draws. */
3779          multi[0].start = first->info.min_index;
3780          multi[0].count = first->info.max_index;
3781          multi[0].index_bias = first->index_bias;
3782          multi[1].start = next->info.min_index;
3783          multi[1].count = next->info.max_index;
3784          multi[1].index_bias = next->index_bias;
3785 
3786          /* Find how many other draws can be merged. */
3787          next = get_next_call(next, tc_draw_single);
3788          for (; is_next_call_a_mergeable_draw(first, next);
3789               next = get_next_call(next, tc_draw_single), num_draws++) {
3790             /* u_threaded_context stores start/count in min/max_index for single draws. */
3791             multi[num_draws].start = next->info.min_index;
3792             multi[num_draws].count = next->info.max_index;
3793             multi[num_draws].index_bias = next->index_bias;
3794             index_bias_varies |= first->index_bias != next->index_bias;
3795          }
3796 
3797          first->info.index_bias_varies = index_bias_varies;
3798          pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3799 
3800          /* Since all draws use the same index buffer, drop all references at once. */
3801          if (first->info.index_size)
3802             pipe_drop_resource_references(first->info.index.resource, num_draws);
3803 
3804          return call_size(tc_draw_single) * num_draws;
3805       }
3806    }
3807 
3808    /* u_threaded_context stores start/count in min/max_index for single draws. */
3809    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3810    struct pipe_draw_start_count_bias draw;
3811 
3812    draw.start = first->info.min_index;
3813    draw.count = first->info.max_index;
3814    draw.index_bias = first->index_bias;
3815 
3816    first->info.index_bounds_valid = false;
3817    first->info.has_user_indices = false;
3818    first->info.take_index_buffer_ownership = false;
3819 
3820    pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3821    if (first->info.index_size)
3822       tc_drop_resource_reference(first->info.index.resource);
3823 
3824    return call_size(tc_draw_single);
3825 }
3826 
3827 struct tc_draw_indirect {
3828    struct tc_call_base base;
3829    struct pipe_draw_start_count_bias draw;
3830    struct pipe_draw_info info;
3831    struct pipe_draw_indirect_info indirect;
3832 };
3833 
3834 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call)3835 tc_call_draw_indirect(struct pipe_context *pipe, void *call)
3836 {
3837    struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3838 
3839    info->info.index_bounds_valid = false;
3840    info->info.take_index_buffer_ownership = false;
3841 
3842    pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3843    if (info->info.index_size)
3844       tc_drop_resource_reference(info->info.index.resource);
3845 
3846    tc_drop_resource_reference(info->indirect.buffer);
3847    tc_drop_resource_reference(info->indirect.indirect_draw_count);
3848    tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3849    return call_size(tc_draw_indirect);
3850 }
3851 
3852 struct tc_draw_multi {
3853    struct tc_call_base base;
3854    unsigned num_draws;
3855    struct pipe_draw_info info;
3856    struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3857 };
3858 
3859 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call)3860 tc_call_draw_multi(struct pipe_context *pipe, void *call)
3861 {
3862    struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3863 
3864    info->info.has_user_indices = false;
3865    info->info.index_bounds_valid = false;
3866    info->info.take_index_buffer_ownership = false;
3867 
3868    pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3869    if (info->info.index_size)
3870       tc_drop_resource_reference(info->info.index.resource);
3871 
3872    return info->base.num_slots;
3873 }
3874 
3875 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3876    offsetof(struct pipe_draw_info, index)
3877 
3878 /* Single draw with drawid_offset == 0. */
3879 static void
tc_draw_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3880 tc_draw_single(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3881                unsigned drawid_offset,
3882                const struct pipe_draw_indirect_info *indirect,
3883                const struct pipe_draw_start_count_bias *draws,
3884                unsigned num_draws)
3885 {
3886    struct threaded_context *tc = threaded_context(_pipe);
3887    struct tc_draw_single *p =
3888       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3889 
3890    if (info->index_size) {
3891       if (!info->take_index_buffer_ownership) {
3892          tc_set_resource_reference(&p->info.index.resource,
3893                                    info->index.resource);
3894       }
3895       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3896    }
3897    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3898    /* u_threaded_context stores start/count in min/max_index for single draws. */
3899    p->info.min_index = draws[0].start;
3900    p->info.max_index = draws[0].count;
3901    p->index_bias = draws[0].index_bias;
3902    simplify_draw_info(&p->info);
3903 }
3904 
3905 /* Single draw with drawid_offset > 0. */
3906 static void
tc_draw_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3907 tc_draw_single_draw_id(struct pipe_context *_pipe,
3908                        const struct pipe_draw_info *info,
3909                        unsigned drawid_offset,
3910                        const struct pipe_draw_indirect_info *indirect,
3911                        const struct pipe_draw_start_count_bias *draws,
3912                        unsigned num_draws)
3913 {
3914    struct threaded_context *tc = threaded_context(_pipe);
3915    struct tc_draw_single *p =
3916       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3917 
3918    if (info->index_size) {
3919       if (!info->take_index_buffer_ownership) {
3920          tc_set_resource_reference(&p->info.index.resource,
3921                                    info->index.resource);
3922       }
3923       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3924    }
3925    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3926    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3927    /* u_threaded_context stores start/count in min/max_index for single draws. */
3928    p->info.min_index = draws[0].start;
3929    p->info.max_index = draws[0].count;
3930    p->index_bias = draws[0].index_bias;
3931    simplify_draw_info(&p->info);
3932 }
3933 
3934 /* Single draw with user indices and drawid_offset == 0. */
3935 static void
tc_draw_user_indices_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3936 tc_draw_user_indices_single(struct pipe_context *_pipe,
3937                             const struct pipe_draw_info *info,
3938                             unsigned drawid_offset,
3939                             const struct pipe_draw_indirect_info *indirect,
3940                             const struct pipe_draw_start_count_bias *draws,
3941                             unsigned num_draws)
3942 {
3943    struct threaded_context *tc = threaded_context(_pipe);
3944    unsigned index_size = info->index_size;
3945    unsigned size = draws[0].count * index_size;
3946    struct pipe_resource *buffer = NULL;
3947    unsigned offset;
3948 
3949    if (!size)
3950       return;
3951 
3952    /* This must be done before adding draw_vbo, because it could generate
3953     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3954     * to the driver if it was done afterwards.
3955     */
3956    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3957                  (uint8_t*)info->index.user + draws[0].start * index_size,
3958                  &offset, &buffer);
3959    if (unlikely(!buffer))
3960       return;
3961 
3962    struct tc_draw_single *p =
3963       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3964    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3965    p->info.index.resource = buffer;
3966    /* u_threaded_context stores start/count in min/max_index for single draws. */
3967    p->info.min_index = offset >> util_logbase2(index_size);
3968    p->info.max_index = draws[0].count;
3969    p->index_bias = draws[0].index_bias;
3970    simplify_draw_info(&p->info);
3971 }
3972 
3973 /* Single draw with user indices and drawid_offset > 0. */
3974 static void
tc_draw_user_indices_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3975 tc_draw_user_indices_single_draw_id(struct pipe_context *_pipe,
3976                                     const struct pipe_draw_info *info,
3977                                     unsigned drawid_offset,
3978                                     const struct pipe_draw_indirect_info *indirect,
3979                                     const struct pipe_draw_start_count_bias *draws,
3980                                     unsigned num_draws)
3981 {
3982    struct threaded_context *tc = threaded_context(_pipe);
3983    unsigned index_size = info->index_size;
3984    unsigned size = draws[0].count * index_size;
3985    struct pipe_resource *buffer = NULL;
3986    unsigned offset;
3987 
3988    if (!size)
3989       return;
3990 
3991    /* This must be done before adding draw_vbo, because it could generate
3992     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3993     * to the driver if it was done afterwards.
3994     */
3995    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3996                  (uint8_t*)info->index.user + draws[0].start * index_size,
3997                  &offset, &buffer);
3998    if (unlikely(!buffer))
3999       return;
4000 
4001    struct tc_draw_single *p =
4002       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
4003    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
4004    p->info.index.resource = buffer;
4005    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
4006    /* u_threaded_context stores start/count in min/max_index for single draws. */
4007    p->info.min_index = offset >> util_logbase2(index_size);
4008    p->info.max_index = draws[0].count;
4009    p->index_bias = draws[0].index_bias;
4010    simplify_draw_info(&p->info);
4011 }
4012 
4013 #define DRAW_OVERHEAD_BYTES sizeof(struct tc_draw_multi)
4014 #define ONE_DRAW_SLOT_BYTES sizeof(((struct tc_draw_multi*)NULL)->slot[0])
4015 
4016 #define SLOTS_FOR_ONE_DRAW \
4017    DIV_ROUND_UP(DRAW_OVERHEAD_BYTES + ONE_DRAW_SLOT_BYTES, \
4018                 sizeof(struct tc_call_base))
4019 
4020 static void
tc_draw_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4021 tc_draw_multi(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4022               unsigned drawid_offset,
4023               const struct pipe_draw_indirect_info *indirect,
4024               const struct pipe_draw_start_count_bias *draws,
4025               unsigned num_draws)
4026 {
4027    struct threaded_context *tc = threaded_context(_pipe);
4028    int total_offset = 0;
4029    bool take_index_buffer_ownership = info->take_index_buffer_ownership;
4030 
4031    while (num_draws) {
4032       struct tc_batch *next = &tc->batch_slots[tc->next];
4033 
4034       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4035       /* If there isn't enough place for one draw, try to fill the next one */
4036       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4037          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4038       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4039 
4040       /* How many draws can we fit in the current batch */
4041       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4042                           ONE_DRAW_SLOT_BYTES);
4043 
4044       /* Non-indexed call or indexed with a real index buffer. */
4045       struct tc_draw_multi *p =
4046          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4047                                 dr);
4048       if (info->index_size) {
4049          if (!take_index_buffer_ownership) {
4050             tc_set_resource_reference(&p->info.index.resource,
4051                                       info->index.resource);
4052          }
4053          tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
4054       }
4055       take_index_buffer_ownership = false;
4056       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4057       p->num_draws = dr;
4058       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4059       num_draws -= dr;
4060 
4061       total_offset += dr;
4062    }
4063 }
4064 
4065 static void
tc_draw_user_indices_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4066 tc_draw_user_indices_multi(struct pipe_context *_pipe,
4067                            const struct pipe_draw_info *info,
4068                            unsigned drawid_offset,
4069                            const struct pipe_draw_indirect_info *indirect,
4070                            const struct pipe_draw_start_count_bias *draws,
4071                            unsigned num_draws)
4072 {
4073    struct threaded_context *tc = threaded_context(_pipe);
4074    struct pipe_resource *buffer = NULL;
4075    unsigned buffer_offset, total_count = 0;
4076    unsigned index_size_shift = util_logbase2(info->index_size);
4077    uint8_t *ptr = NULL;
4078 
4079    /* Get the total count. */
4080    for (unsigned i = 0; i < num_draws; i++)
4081       total_count += draws[i].count;
4082 
4083    if (!total_count)
4084       return;
4085 
4086    /* Allocate space for all index buffers.
4087     *
4088     * This must be done before adding draw_vbo, because it could generate
4089     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
4090     * to the driver if it was done afterwards.
4091     */
4092    u_upload_alloc(tc->base.stream_uploader, 0,
4093                   total_count << index_size_shift, 4,
4094                   &buffer_offset, &buffer, (void**)&ptr);
4095    if (unlikely(!buffer))
4096       return;
4097 
4098    int total_offset = 0;
4099    unsigned offset = 0;
4100    while (num_draws) {
4101       struct tc_batch *next = &tc->batch_slots[tc->next];
4102 
4103       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4104       /* If there isn't enough place for one draw, try to fill the next one */
4105       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4106          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4107       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4108 
4109       /* How many draws can we fit in the current batch */
4110       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4111                           ONE_DRAW_SLOT_BYTES);
4112 
4113       struct tc_draw_multi *p =
4114          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4115                                 dr);
4116       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
4117 
4118       if (total_offset == 0)
4119          /* the first slot inherits the reference from u_upload_alloc() */
4120          p->info.index.resource = buffer;
4121       else
4122          /* all following slots need a new reference */
4123          tc_set_resource_reference(&p->info.index.resource, buffer);
4124 
4125       p->num_draws = dr;
4126 
4127       /* Upload index buffers. */
4128       for (unsigned i = 0; i < dr; i++) {
4129          unsigned count = draws[i + total_offset].count;
4130 
4131          if (!count) {
4132             p->slot[i].start = 0;
4133             p->slot[i].count = 0;
4134             p->slot[i].index_bias = 0;
4135             continue;
4136          }
4137 
4138          unsigned size = count << index_size_shift;
4139          memcpy(ptr + offset,
4140                 (uint8_t*)info->index.user +
4141                 (draws[i + total_offset].start << index_size_shift), size);
4142          p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
4143          p->slot[i].count = count;
4144          p->slot[i].index_bias = draws[i + total_offset].index_bias;
4145          offset += size;
4146       }
4147 
4148       total_offset += dr;
4149       num_draws -= dr;
4150    }
4151 }
4152 
4153 static void
tc_draw_indirect(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4154 tc_draw_indirect(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4155                  unsigned drawid_offset,
4156                  const struct pipe_draw_indirect_info *indirect,
4157                  const struct pipe_draw_start_count_bias *draws,
4158                  unsigned num_draws)
4159 {
4160    struct threaded_context *tc = threaded_context(_pipe);
4161    assert(!info->has_user_indices);
4162    assert(num_draws == 1);
4163 
4164    struct tc_draw_indirect *p =
4165       tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
4166    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4167 
4168    if (info->index_size) {
4169       if (!info->take_index_buffer_ownership) {
4170          tc_set_resource_reference(&p->info.index.resource,
4171                                    info->index.resource);
4172       }
4173       tc_add_to_buffer_list(next, info->index.resource);
4174    }
4175    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4176 
4177    tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
4178    tc_set_resource_reference(&p->indirect.indirect_draw_count,
4179                              indirect->indirect_draw_count);
4180    p->indirect.count_from_stream_output = NULL;
4181    pipe_so_target_reference(&p->indirect.count_from_stream_output,
4182                             indirect->count_from_stream_output);
4183 
4184    if (indirect->buffer)
4185       tc_add_to_buffer_list(next, indirect->buffer);
4186    if (indirect->indirect_draw_count)
4187       tc_add_to_buffer_list(next, indirect->indirect_draw_count);
4188    if (indirect->count_from_stream_output)
4189       tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
4190 
4191    memcpy(&p->indirect, indirect, sizeof(*indirect));
4192    p->draw.start = draws[0].start;
4193 }
4194 
4195 /* Dispatch table for tc_draw_vbo:
4196  *
4197  * Indexed by:
4198  *    [is_indirect * 8 + index_size_and_has_user_indices * 4 +
4199  *     is_multi_draw * 2 + non_zero_draw_id]
4200  */
4201 static pipe_draw_func draw_funcs[16] = {
4202    tc_draw_single,
4203    tc_draw_single_draw_id,
4204    tc_draw_multi,
4205    tc_draw_multi,
4206    tc_draw_user_indices_single,
4207    tc_draw_user_indices_single_draw_id,
4208    tc_draw_user_indices_multi,
4209    tc_draw_user_indices_multi,
4210    tc_draw_indirect,
4211    tc_draw_indirect,
4212    tc_draw_indirect,
4213    tc_draw_indirect,
4214    tc_draw_indirect,
4215    tc_draw_indirect,
4216    tc_draw_indirect,
4217    tc_draw_indirect,
4218 };
4219 
4220 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4221 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4222             unsigned drawid_offset,
4223             const struct pipe_draw_indirect_info *indirect,
4224             const struct pipe_draw_start_count_bias *draws,
4225             unsigned num_draws)
4226 {
4227    STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
4228                  sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
4229 
4230    struct threaded_context *tc = threaded_context(_pipe);
4231    if (tc->options.parse_renderpass_info)
4232       tc_parse_draw(tc);
4233 
4234    /* Use a function table to call the desired variant of draw_vbo. */
4235    unsigned index = (indirect != NULL) * 8 +
4236                     (info->index_size && info->has_user_indices) * 4 +
4237                     (num_draws > 1) * 2 + (drawid_offset != 0);
4238    draw_funcs[index](_pipe, info, drawid_offset, indirect, draws, num_draws);
4239 
4240    /* This must be after tc_add_*call, which can flush the batch. */
4241    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4242       tc_add_all_gfx_bindings_to_buffer_list(tc);
4243 }
4244 
4245 struct tc_draw_single *
tc_add_draw_single_call(struct pipe_context * _pipe,struct pipe_resource * index_bo)4246 tc_add_draw_single_call(struct pipe_context *_pipe,
4247                         struct pipe_resource *index_bo)
4248 {
4249    struct threaded_context *tc = threaded_context(_pipe);
4250 
4251    if (tc->options.parse_renderpass_info)
4252       tc_parse_draw(tc);
4253 
4254    struct tc_draw_single *p =
4255       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
4256 
4257    if (index_bo)
4258       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], index_bo);
4259 
4260    /* This must be after tc_add_*call, which can flush the batch. */
4261    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4262       tc_add_all_gfx_bindings_to_buffer_list(tc);
4263 
4264    return p;
4265 }
4266 
4267 struct tc_draw_vstate_single {
4268    struct tc_call_base base;
4269    struct pipe_draw_start_count_bias draw;
4270 
4271    /* The following states must be together without holes because they are
4272     * compared by draw merging.
4273     */
4274    struct pipe_vertex_state *state;
4275    uint32_t partial_velem_mask;
4276    struct pipe_draw_vertex_state_info info;
4277 };
4278 
4279 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)4280 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
4281                                      struct tc_draw_vstate_single *next)
4282 {
4283    if (next->base.call_id != TC_CALL_draw_vstate_single)
4284       return false;
4285 
4286    return !memcmp(&first->state, &next->state,
4287                   offsetof(struct tc_draw_vstate_single, info) +
4288                   sizeof(struct pipe_draw_vertex_state_info) -
4289                   offsetof(struct tc_draw_vstate_single, state));
4290 }
4291 
4292 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call)4293 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call)
4294 {
4295    /* Draw call merging. */
4296    struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
4297    struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
4298 
4299    /* If at least 2 consecutive draw calls can be merged... */
4300    if (is_next_call_a_mergeable_draw_vstate(first, next)) {
4301       /* The maximum number of merged draws is given by the batch size. */
4302       struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
4303                                               call_size(tc_draw_vstate_single)];
4304       unsigned num_draws = 2;
4305 
4306       draws[0] = first->draw;
4307       draws[1] = next->draw;
4308 
4309       /* Find how many other draws can be merged. */
4310       next = get_next_call(next, tc_draw_vstate_single);
4311       for (; is_next_call_a_mergeable_draw_vstate(first, next);
4312            next = get_next_call(next, tc_draw_vstate_single),
4313            num_draws++)
4314          draws[num_draws] = next->draw;
4315 
4316       pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4317                               first->info, draws, num_draws);
4318       /* Since all draws use the same state, drop all references at once. */
4319       tc_drop_vertex_state_references(first->state, num_draws);
4320 
4321       return call_size(tc_draw_vstate_single) * num_draws;
4322    }
4323 
4324    pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4325                            first->info, &first->draw, 1);
4326    tc_drop_vertex_state_references(first->state, 1);
4327    return call_size(tc_draw_vstate_single);
4328 }
4329 
4330 struct tc_draw_vstate_multi {
4331    struct tc_call_base base;
4332    uint32_t partial_velem_mask;
4333    struct pipe_draw_vertex_state_info info;
4334    unsigned num_draws;
4335    struct pipe_vertex_state *state;
4336    struct pipe_draw_start_count_bias slot[0];
4337 };
4338 
4339 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call)4340 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call)
4341 {
4342    struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
4343 
4344    pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
4345                            info->info, info->slot, info->num_draws);
4346    tc_drop_vertex_state_references(info->state, 1);
4347    return info->base.num_slots;
4348 }
4349 
4350 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4351 tc_draw_vertex_state(struct pipe_context *_pipe,
4352                      struct pipe_vertex_state *state,
4353                      uint32_t partial_velem_mask,
4354                      struct pipe_draw_vertex_state_info info,
4355                      const struct pipe_draw_start_count_bias *draws,
4356                      unsigned num_draws)
4357 {
4358    struct threaded_context *tc = threaded_context(_pipe);
4359    if (tc->options.parse_renderpass_info)
4360       tc_parse_draw(tc);
4361 
4362    if (num_draws == 1) {
4363       /* Single draw. */
4364       struct tc_draw_vstate_single *p =
4365          tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
4366       p->partial_velem_mask = partial_velem_mask;
4367       p->draw = draws[0];
4368       p->info.mode = info.mode;
4369       p->info.take_vertex_state_ownership = false;
4370 
4371       /* This should be always 0 for simplicity because we assume that
4372        * index_bias doesn't vary.
4373        */
4374       assert(draws[0].index_bias == 0);
4375 
4376       if (!info.take_vertex_state_ownership)
4377          tc_set_vertex_state_reference(&p->state, state);
4378       else
4379          p->state = state;
4380 
4381 
4382       /* This must be after tc_add_*call, which can flush the batch. */
4383       if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4384          tc_add_all_gfx_bindings_to_buffer_list(tc);
4385       return;
4386    }
4387 
4388    const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
4389    const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
4390    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
4391                                                sizeof(struct tc_call_base));
4392    /* Multi draw. */
4393    int total_offset = 0;
4394    bool take_vertex_state_ownership = info.take_vertex_state_ownership;
4395    while (num_draws) {
4396       struct tc_batch *next = &tc->batch_slots[tc->next];
4397 
4398       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4399       /* If there isn't enough place for one draw, try to fill the next one */
4400       if (nb_slots_left < slots_for_one_draw)
4401          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4402       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4403 
4404       /* How many draws can we fit in the current batch */
4405       const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
4406 
4407       /* Non-indexed call or indexed with a real index buffer. */
4408       struct tc_draw_vstate_multi *p =
4409          tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
4410 
4411       if (!take_vertex_state_ownership)
4412          tc_set_vertex_state_reference(&p->state, state);
4413       else
4414          p->state = state;
4415 
4416       take_vertex_state_ownership = false;
4417       p->partial_velem_mask = partial_velem_mask;
4418       p->info.mode = info.mode;
4419       p->info.take_vertex_state_ownership = false;
4420       p->num_draws = dr;
4421       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4422       num_draws -= dr;
4423 
4424       total_offset += dr;
4425    }
4426 
4427 
4428    /* This must be after tc_add_*call, which can flush the batch. */
4429    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4430       tc_add_all_gfx_bindings_to_buffer_list(tc);
4431 }
4432 
4433 struct tc_launch_grid_call {
4434    struct tc_call_base base;
4435    struct pipe_grid_info info;
4436 };
4437 
4438 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call)4439 tc_call_launch_grid(struct pipe_context *pipe, void *call)
4440 {
4441    struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
4442 
4443    pipe->launch_grid(pipe, p);
4444    tc_drop_resource_reference(p->indirect);
4445    return call_size(tc_launch_grid_call);
4446 }
4447 
4448 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)4449 tc_launch_grid(struct pipe_context *_pipe,
4450                const struct pipe_grid_info *info)
4451 {
4452    struct threaded_context *tc = threaded_context(_pipe);
4453    struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
4454                                                tc_launch_grid_call);
4455    assert(info->input == NULL);
4456 
4457    tc_set_resource_reference(&p->info.indirect, info->indirect);
4458    memcpy(&p->info, info, sizeof(*info));
4459 
4460    if (info->indirect)
4461       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
4462 
4463    /* This must be after tc_add_*call, which can flush the batch. */
4464    if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
4465       tc_add_all_compute_bindings_to_buffer_list(tc);
4466 }
4467 
4468 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call)4469 tc_call_resource_copy_region(struct pipe_context *pipe, void *call)
4470 {
4471    struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
4472 
4473    pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
4474                               p->dstz, p->src, p->src_level, &p->src_box);
4475    tc_drop_resource_reference(p->dst);
4476    tc_drop_resource_reference(p->src);
4477    return call_size(tc_resource_copy_region);
4478 }
4479 
4480 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)4481 tc_resource_copy_region(struct pipe_context *_pipe,
4482                         struct pipe_resource *dst, unsigned dst_level,
4483                         unsigned dstx, unsigned dsty, unsigned dstz,
4484                         struct pipe_resource *src, unsigned src_level,
4485                         const struct pipe_box *src_box)
4486 {
4487    struct threaded_context *tc = threaded_context(_pipe);
4488    struct threaded_resource *tdst = threaded_resource(dst);
4489    struct tc_resource_copy_region *p =
4490       tc_add_call(tc, TC_CALL_resource_copy_region,
4491                   tc_resource_copy_region);
4492 
4493    if (dst->target == PIPE_BUFFER)
4494       tc_buffer_disable_cpu_storage(dst);
4495 
4496    tc_set_resource_batch_usage(tc, dst);
4497    tc_set_resource_reference(&p->dst, dst);
4498    p->dst_level = dst_level;
4499    p->dstx = dstx;
4500    p->dsty = dsty;
4501    p->dstz = dstz;
4502    tc_set_resource_batch_usage(tc, src);
4503    tc_set_resource_reference(&p->src, src);
4504    p->src_level = src_level;
4505    p->src_box = *src_box;
4506 
4507    if (dst->target == PIPE_BUFFER) {
4508       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4509 
4510       tc_add_to_buffer_list(next, src);
4511       tc_add_to_buffer_list(next, dst);
4512 
4513       util_range_add(&tdst->b, &tdst->valid_buffer_range,
4514                      dstx, dstx + src_box->width);
4515    }
4516 }
4517 
4518 struct tc_blit_call {
4519    struct tc_call_base base;
4520    struct pipe_blit_info info;
4521 };
4522 
4523 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call)4524 tc_call_blit(struct pipe_context *pipe, void *call)
4525 {
4526    struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
4527 
4528    pipe->blit(pipe, blit);
4529    tc_drop_resource_reference(blit->dst.resource);
4530    tc_drop_resource_reference(blit->src.resource);
4531    return call_size(tc_blit_call);
4532 }
4533 
4534 static void
tc_blit_enqueue(struct threaded_context * tc,const struct pipe_blit_info * info)4535 tc_blit_enqueue(struct threaded_context *tc, const struct pipe_blit_info *info)
4536 {
4537    struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
4538 
4539    tc_set_resource_batch_usage(tc, info->dst.resource);
4540    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
4541    tc_set_resource_batch_usage(tc, info->src.resource);
4542    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
4543    memcpy(&blit->info, info, sizeof(*info));
4544 }
4545 
4546 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)4547 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
4548 {
4549    struct threaded_context *tc = threaded_context(_pipe);
4550 
4551    /* filter out untracked non-resolves */
4552    if (!tc->options.parse_renderpass_info ||
4553        info->src.resource->nr_samples <= 1 ||
4554        info->dst.resource->nr_samples > 1) {
4555       tc_blit_enqueue(tc, info);
4556       return;
4557    }
4558 
4559    if (tc->fb_resolve == info->dst.resource) {
4560       /* optimize out this blit entirely */
4561       tc->renderpass_info_recording->has_resolve = true;
4562       return;
4563    }
4564    for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4565       if (tc->fb_resources[i] == info->src.resource) {
4566          tc->renderpass_info_recording->has_resolve = true;
4567          break;
4568       }
4569    }
4570    tc_blit_enqueue(tc, info);
4571 }
4572 
4573 struct tc_generate_mipmap {
4574    struct tc_call_base base;
4575    enum pipe_format format;
4576    unsigned base_level;
4577    unsigned last_level;
4578    unsigned first_layer;
4579    unsigned last_layer;
4580    struct pipe_resource *res;
4581 };
4582 
4583 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call)4584 tc_call_generate_mipmap(struct pipe_context *pipe, void *call)
4585 {
4586    struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
4587    ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
4588                                                     p->base_level,
4589                                                     p->last_level,
4590                                                     p->first_layer,
4591                                                     p->last_layer);
4592    assert(result);
4593    tc_drop_resource_reference(p->res);
4594    return call_size(tc_generate_mipmap);
4595 }
4596 
4597 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)4598 tc_generate_mipmap(struct pipe_context *_pipe,
4599                    struct pipe_resource *res,
4600                    enum pipe_format format,
4601                    unsigned base_level,
4602                    unsigned last_level,
4603                    unsigned first_layer,
4604                    unsigned last_layer)
4605 {
4606    struct threaded_context *tc = threaded_context(_pipe);
4607    struct pipe_context *pipe = tc->pipe;
4608    struct pipe_screen *screen = pipe->screen;
4609    unsigned bind = PIPE_BIND_SAMPLER_VIEW;
4610 
4611    if (util_format_is_depth_or_stencil(format))
4612       bind = PIPE_BIND_DEPTH_STENCIL;
4613    else
4614       bind = PIPE_BIND_RENDER_TARGET;
4615 
4616    if (!screen->is_format_supported(screen, format, res->target,
4617                                     res->nr_samples, res->nr_storage_samples,
4618                                     bind))
4619       return false;
4620 
4621    struct tc_generate_mipmap *p =
4622       tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
4623 
4624    tc_set_resource_batch_usage(tc, res);
4625    tc_set_resource_reference(&p->res, res);
4626    p->format = format;
4627    p->base_level = base_level;
4628    p->last_level = last_level;
4629    p->first_layer = first_layer;
4630    p->last_layer = last_layer;
4631    return true;
4632 }
4633 
4634 struct tc_resource_call {
4635    struct tc_call_base base;
4636    struct pipe_resource *resource;
4637 };
4638 
4639 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call)4640 tc_call_flush_resource(struct pipe_context *pipe, void *call)
4641 {
4642    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4643 
4644    pipe->flush_resource(pipe, resource);
4645    tc_drop_resource_reference(resource);
4646    return call_size(tc_resource_call);
4647 }
4648 
4649 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4650 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
4651 {
4652    struct threaded_context *tc = threaded_context(_pipe);
4653    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
4654                                                tc_resource_call);
4655 
4656    tc_set_resource_batch_usage(tc, resource);
4657    tc_set_resource_reference(&call->resource, resource);
4658 }
4659 
4660 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call)4661 tc_call_invalidate_resource(struct pipe_context *pipe, void *call)
4662 {
4663    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4664 
4665    pipe->invalidate_resource(pipe, resource);
4666    tc_drop_resource_reference(resource);
4667    return call_size(tc_resource_call);
4668 }
4669 
4670 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4671 tc_invalidate_resource(struct pipe_context *_pipe,
4672                        struct pipe_resource *resource)
4673 {
4674    struct threaded_context *tc = threaded_context(_pipe);
4675 
4676    if (resource->target == PIPE_BUFFER) {
4677       tc_invalidate_buffer(tc, threaded_resource(resource));
4678       return;
4679    }
4680 
4681    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
4682                                                tc_resource_call);
4683    tc_set_resource_batch_usage(tc, resource);
4684    tc_set_resource_reference(&call->resource, resource);
4685 
4686    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4687    if (info) {
4688       if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
4689          info->zsbuf_invalidate = true;
4690       } else {
4691          for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4692             if (tc->fb_resources[i] == resource)
4693                info->cbuf_invalidate |= BITFIELD_BIT(i);
4694          }
4695       }
4696    }
4697 }
4698 
4699 struct tc_clear {
4700    struct tc_call_base base;
4701    bool scissor_state_set;
4702    uint8_t stencil;
4703    uint16_t buffers;
4704    float depth;
4705    struct pipe_scissor_state scissor_state;
4706    union pipe_color_union color;
4707 };
4708 
4709 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call)4710 tc_call_clear(struct pipe_context *pipe, void *call)
4711 {
4712    struct tc_clear *p = to_call(call, tc_clear);
4713 
4714    pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
4715    return call_size(tc_clear);
4716 }
4717 
4718 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)4719 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
4720          const union pipe_color_union *color, double depth,
4721          unsigned stencil)
4722 {
4723    struct threaded_context *tc = threaded_context(_pipe);
4724    struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
4725 
4726    p->buffers = buffers;
4727    if (scissor_state) {
4728       p->scissor_state = *scissor_state;
4729       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4730       /* partial clear info is useful for drivers to know whether any zs writes occur;
4731        * drivers are responsible for optimizing partial clear -> full clear
4732        */
4733       if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
4734          info->zsbuf_clear_partial |= !info->zsbuf_clear;
4735    } else {
4736       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4737       if (info) {
4738          /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
4739          info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
4740          if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
4741             if (!info->zsbuf_load && !info->zsbuf_clear_partial)
4742                info->zsbuf_clear = true;
4743             else if (!info->zsbuf_clear)
4744                /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */
4745                info->zsbuf_clear_partial = true;
4746          }
4747       }
4748    }
4749    p->scissor_state_set = !!scissor_state;
4750    p->color = *color;
4751    p->depth = depth;
4752    p->stencil = stencil;
4753 }
4754 
4755 struct tc_clear_render_target {
4756    struct tc_call_base base;
4757    bool render_condition_enabled;
4758    unsigned dstx;
4759    unsigned dsty;
4760    unsigned width;
4761    unsigned height;
4762    union pipe_color_union color;
4763    struct pipe_surface *dst;
4764 };
4765 
4766 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call)4767 tc_call_clear_render_target(struct pipe_context *pipe, void *call)
4768 {
4769    struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
4770 
4771    pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
4772                              p->render_condition_enabled);
4773    tc_drop_surface_reference(p->dst);
4774    return call_size(tc_clear_render_target);
4775 }
4776 
4777 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4778 tc_clear_render_target(struct pipe_context *_pipe,
4779                        struct pipe_surface *dst,
4780                        const union pipe_color_union *color,
4781                        unsigned dstx, unsigned dsty,
4782                        unsigned width, unsigned height,
4783                        bool render_condition_enabled)
4784 {
4785    struct threaded_context *tc = threaded_context(_pipe);
4786    struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
4787    p->dst = NULL;
4788    pipe_surface_reference(&p->dst, dst);
4789    p->color = *color;
4790    p->dstx = dstx;
4791    p->dsty = dsty;
4792    p->width = width;
4793    p->height = height;
4794    p->render_condition_enabled = render_condition_enabled;
4795 }
4796 
4797 
4798 struct tc_clear_depth_stencil {
4799    struct tc_call_base base;
4800    bool render_condition_enabled;
4801    float depth;
4802    unsigned clear_flags;
4803    unsigned stencil;
4804    unsigned dstx;
4805    unsigned dsty;
4806    unsigned width;
4807    unsigned height;
4808    struct pipe_surface *dst;
4809 };
4810 
4811 
4812 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call)4813 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call)
4814 {
4815    struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4816 
4817    pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4818                              p->dstx, p->dsty, p->width, p->height,
4819                              p->render_condition_enabled);
4820    tc_drop_surface_reference(p->dst);
4821    return call_size(tc_clear_depth_stencil);
4822 }
4823 
4824 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4825 tc_clear_depth_stencil(struct pipe_context *_pipe,
4826                        struct pipe_surface *dst, unsigned clear_flags,
4827                        double depth, unsigned stencil, unsigned dstx,
4828                        unsigned dsty, unsigned width, unsigned height,
4829                        bool render_condition_enabled)
4830 {
4831    struct threaded_context *tc = threaded_context(_pipe);
4832    struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4833    p->dst = NULL;
4834    pipe_surface_reference(&p->dst, dst);
4835    p->clear_flags = clear_flags;
4836    p->depth = depth;
4837    p->stencil = stencil;
4838    p->dstx = dstx;
4839    p->dsty = dsty;
4840    p->width = width;
4841    p->height = height;
4842    p->render_condition_enabled = render_condition_enabled;
4843 }
4844 
4845 struct tc_clear_buffer {
4846    struct tc_call_base base;
4847    uint8_t clear_value_size;
4848    unsigned offset;
4849    unsigned size;
4850    char clear_value[16];
4851    struct pipe_resource *res;
4852 };
4853 
4854 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call)4855 tc_call_clear_buffer(struct pipe_context *pipe, void *call)
4856 {
4857    struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4858 
4859    pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4860                       p->clear_value_size);
4861    tc_drop_resource_reference(p->res);
4862    return call_size(tc_clear_buffer);
4863 }
4864 
4865 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4866 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4867                 unsigned offset, unsigned size,
4868                 const void *clear_value, int clear_value_size)
4869 {
4870    struct threaded_context *tc = threaded_context(_pipe);
4871    struct threaded_resource *tres = threaded_resource(res);
4872    struct tc_clear_buffer *p =
4873       tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4874 
4875    tc_buffer_disable_cpu_storage(res);
4876 
4877    tc_set_resource_reference(&p->res, res);
4878    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
4879    p->offset = offset;
4880    p->size = size;
4881    memcpy(p->clear_value, clear_value, clear_value_size);
4882    p->clear_value_size = clear_value_size;
4883 
4884    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4885 }
4886 
4887 struct tc_clear_texture {
4888    struct tc_call_base base;
4889    unsigned level;
4890    struct pipe_box box;
4891    char data[16];
4892    struct pipe_resource *res;
4893 };
4894 
4895 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call)4896 tc_call_clear_texture(struct pipe_context *pipe, void *call)
4897 {
4898    struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4899 
4900    pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4901    tc_drop_resource_reference(p->res);
4902    return call_size(tc_clear_texture);
4903 }
4904 
4905 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4906 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4907                  unsigned level, const struct pipe_box *box, const void *data)
4908 {
4909    struct threaded_context *tc = threaded_context(_pipe);
4910    struct tc_clear_texture *p =
4911       tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4912 
4913    tc_set_resource_batch_usage(tc, res);
4914    tc_set_resource_reference(&p->res, res);
4915    p->level = level;
4916    p->box = *box;
4917    memcpy(p->data, data,
4918           util_format_get_blocksize(res->format));
4919 }
4920 
4921 struct tc_resource_commit {
4922    struct tc_call_base base;
4923    bool commit;
4924    unsigned level;
4925    struct pipe_box box;
4926    struct pipe_resource *res;
4927 };
4928 
4929 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call)4930 tc_call_resource_commit(struct pipe_context *pipe, void *call)
4931 {
4932    struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4933 
4934    pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4935    tc_drop_resource_reference(p->res);
4936    return call_size(tc_resource_commit);
4937 }
4938 
4939 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4940 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4941                    unsigned level, struct pipe_box *box, bool commit)
4942 {
4943    struct threaded_context *tc = threaded_context(_pipe);
4944    struct tc_resource_commit *p =
4945       tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4946 
4947    tc_set_resource_reference(&p->res, res);
4948    tc_set_resource_batch_usage(tc, res);
4949    p->level = level;
4950    p->box = *box;
4951    p->commit = commit;
4952    return true; /* we don't care about the return value for this call */
4953 }
4954 
4955 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4956 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4957 {
4958    struct threaded_context *tc = threaded_context(_pipe);
4959    struct pipe_context *pipe = tc->pipe;
4960 
4961    return pipe->init_intel_perf_query_info(pipe);
4962 }
4963 
4964 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4965 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4966                              unsigned query_index,
4967                              const char **name,
4968                              uint32_t *data_size,
4969                              uint32_t *n_counters,
4970                              uint32_t *n_active)
4971 {
4972    struct threaded_context *tc = threaded_context(_pipe);
4973    struct pipe_context *pipe = tc->pipe;
4974 
4975    tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4976    pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4977          n_counters, n_active);
4978 }
4979 
4980 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4981 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4982                                      unsigned query_index,
4983                                      unsigned counter_index,
4984                                      const char **name,
4985                                      const char **desc,
4986                                      uint32_t *offset,
4987                                      uint32_t *data_size,
4988                                      uint32_t *type_enum,
4989                                      uint32_t *data_type_enum,
4990                                      uint64_t *raw_max)
4991 {
4992    struct threaded_context *tc = threaded_context(_pipe);
4993    struct pipe_context *pipe = tc->pipe;
4994 
4995    pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4996          name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4997 }
4998 
4999 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)5000 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
5001 {
5002    struct threaded_context *tc = threaded_context(_pipe);
5003    struct pipe_context *pipe = tc->pipe;
5004 
5005    return pipe->new_intel_perf_query_obj(pipe, query_index);
5006 }
5007 
5008 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call)5009 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call)
5010 {
5011    (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
5012    return call_size(tc_query_call);
5013 }
5014 
5015 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5016 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5017 {
5018    struct threaded_context *tc = threaded_context(_pipe);
5019 
5020    tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
5021 
5022    /* assume success, begin failure can be signaled from get_intel_perf_query_data */
5023    return true;
5024 }
5025 
5026 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call)5027 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call)
5028 {
5029    pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
5030    return call_size(tc_query_call);
5031 }
5032 
5033 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5034 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5035 {
5036    struct threaded_context *tc = threaded_context(_pipe);
5037 
5038    tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
5039 }
5040 
5041 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5042 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5043 {
5044    struct threaded_context *tc = threaded_context(_pipe);
5045    struct pipe_context *pipe = tc->pipe;
5046 
5047    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5048    pipe->delete_intel_perf_query(pipe, q);
5049 }
5050 
5051 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5052 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5053 {
5054    struct threaded_context *tc = threaded_context(_pipe);
5055    struct pipe_context *pipe = tc->pipe;
5056 
5057    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5058    pipe->wait_intel_perf_query(pipe, q);
5059 }
5060 
5061 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)5062 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
5063 {
5064    struct threaded_context *tc = threaded_context(_pipe);
5065    struct pipe_context *pipe = tc->pipe;
5066 
5067    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5068    return pipe->is_intel_perf_query_ready(pipe, q);
5069 }
5070 
5071 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)5072 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
5073                              struct pipe_query *q,
5074                              size_t data_size,
5075                              uint32_t *data,
5076                              uint32_t *bytes_written)
5077 {
5078    struct threaded_context *tc = threaded_context(_pipe);
5079    struct pipe_context *pipe = tc->pipe;
5080 
5081    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5082    return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
5083 }
5084 
5085 /********************************************************************
5086  * callback
5087  */
5088 
5089 struct tc_callback_call {
5090    struct tc_call_base base;
5091    void (*fn)(void *data);
5092    void *data;
5093 };
5094 
5095 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call)5096 tc_call_callback(UNUSED struct pipe_context *pipe, void *call)
5097 {
5098    struct tc_callback_call *p = to_call(call, tc_callback_call);
5099 
5100    p->fn(p->data);
5101    return call_size(tc_callback_call);
5102 }
5103 
5104 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)5105 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
5106             bool asap)
5107 {
5108    struct threaded_context *tc = threaded_context(_pipe);
5109 
5110    if (asap && tc_is_sync(tc)) {
5111       fn(data);
5112       return;
5113    }
5114 
5115    struct tc_callback_call *p =
5116       tc_add_call(tc, TC_CALL_callback, tc_callback_call);
5117    p->fn = fn;
5118    p->data = data;
5119 }
5120 
5121 
5122 /********************************************************************
5123  * create & destroy
5124  */
5125 
5126 static void
tc_destroy(struct pipe_context * _pipe)5127 tc_destroy(struct pipe_context *_pipe)
5128 {
5129    struct threaded_context *tc = threaded_context(_pipe);
5130    struct pipe_context *pipe = tc->pipe;
5131 
5132    if (tc->base.const_uploader &&
5133        tc->base.stream_uploader != tc->base.const_uploader)
5134       u_upload_destroy(tc->base.const_uploader);
5135 
5136    if (tc->base.stream_uploader)
5137       u_upload_destroy(tc->base.stream_uploader);
5138 
5139    tc_sync(tc);
5140 
5141    if (util_queue_is_initialized(&tc->queue)) {
5142       util_queue_destroy(&tc->queue);
5143 
5144       for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5145          util_queue_fence_destroy(&tc->batch_slots[i].fence);
5146          util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
5147          assert(!tc->batch_slots[i].token);
5148       }
5149    }
5150 
5151    slab_destroy_child(&tc->pool_transfers);
5152    assert(tc->batch_slots[tc->next].num_total_slots == 0);
5153    pipe->destroy(pipe);
5154 
5155    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
5156       if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
5157          util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
5158       util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
5159    }
5160 
5161    for (unsigned i = 0; i < ARRAY_SIZE(tc->fb_resources); i++)
5162       pipe_resource_reference(&tc->fb_resources[i], NULL);
5163    pipe_resource_reference(&tc->fb_resolve, NULL);
5164 
5165    FREE(tc);
5166 }
5167 
tc_driver_internal_flush_notify(struct threaded_context * tc)5168 void tc_driver_internal_flush_notify(struct threaded_context *tc)
5169 {
5170    /* Allow drivers to call this function even for internal contexts that
5171     * don't have tc. It simplifies drivers.
5172     */
5173    if (!tc)
5174       return;
5175 
5176    /* Signal fences set by tc_batch_execute. */
5177    for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
5178       util_queue_fence_signal(tc->signal_fences_next_flush[i]);
5179 
5180    tc->num_signal_fences_next_flush = 0;
5181 }
5182 
5183 /**
5184  * Wrap an existing pipe_context into a threaded_context.
5185  *
5186  * \param pipe                 pipe_context to wrap
5187  * \param parent_transfer_pool parent slab pool set up for creating pipe_-
5188  *                             transfer objects; the driver should have one
5189  *                             in pipe_screen.
5190  * \param replace_buffer  callback for replacing a pipe_resource's storage
5191  *                        with another pipe_resource's storage.
5192  * \param options         optional TC options/callbacks
5193  * \param out  if successful, the threaded_context will be returned here in
5194  *             addition to the return value if "out" != NULL
5195  */
5196 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)5197 threaded_context_create(struct pipe_context *pipe,
5198                         struct slab_parent_pool *parent_transfer_pool,
5199                         tc_replace_buffer_storage_func replace_buffer,
5200                         const struct threaded_context_options *options,
5201                         struct threaded_context **out)
5202 {
5203    struct threaded_context *tc;
5204 
5205    if (!pipe)
5206       return NULL;
5207 
5208    if (!debug_get_bool_option("GALLIUM_THREAD", true))
5209       return pipe;
5210 
5211    tc = CALLOC_STRUCT(threaded_context);
5212    if (!tc) {
5213       pipe->destroy(pipe);
5214       return NULL;
5215    }
5216 
5217    if (options) {
5218       /* this is unimplementable */
5219       assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify));
5220       tc->options = *options;
5221    }
5222 
5223    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
5224 
5225    /* The driver context isn't wrapped, so set its "priv" to NULL. */
5226    pipe->priv = NULL;
5227 
5228    tc->pipe = pipe;
5229    tc->replace_buffer_storage = replace_buffer;
5230    tc->map_buffer_alignment =
5231       pipe->screen->caps.min_map_buffer_alignment;
5232    tc->ubo_alignment =
5233       MAX2(pipe->screen->caps.constant_buffer_offset_alignment, 64);
5234    tc->base.priv = pipe; /* priv points to the wrapped driver context */
5235    tc->base.screen = pipe->screen;
5236    tc->base.destroy = tc_destroy;
5237    tc->base.callback = tc_callback;
5238 
5239    tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
5240    if (pipe->stream_uploader == pipe->const_uploader)
5241       tc->base.const_uploader = tc->base.stream_uploader;
5242    else
5243       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
5244 
5245    if (!tc->base.stream_uploader || !tc->base.const_uploader)
5246       goto fail;
5247 
5248    tc->use_forced_staging_uploads = true;
5249 
5250    /* The queue size is the number of batches "waiting". Batches are removed
5251     * from the queue before being executed, so keep one tc_batch slot for that
5252     * execution. Also, keep one unused slot for an unflushed batch.
5253     */
5254    if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
5255       goto fail;
5256 
5257    tc->last_completed = -1;
5258    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5259 #if !defined(NDEBUG) && TC_DEBUG >= 1
5260       tc->batch_slots[i].sentinel = TC_SENTINEL;
5261 #endif
5262       tc->batch_slots[i].tc = tc;
5263       tc->batch_slots[i].batch_idx = i;
5264       util_queue_fence_init(&tc->batch_slots[i].fence);
5265       tc->batch_slots[i].renderpass_info_idx = -1;
5266       if (tc->options.parse_renderpass_info) {
5267          util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
5268          tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]);
5269       }
5270    }
5271    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
5272       util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
5273 
5274    list_inithead(&tc->unflushed_queries);
5275 
5276    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
5277 
5278    /* If you have different limits in each shader stage, set the maximum. */
5279    struct pipe_screen *screen = pipe->screen;;
5280    tc->max_const_buffers =
5281       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5282                                PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
5283    tc->max_shader_buffers =
5284       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5285                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
5286    tc->max_images =
5287       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5288                                PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
5289    tc->max_samplers =
5290       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5291                                PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
5292 
5293    tc->base.set_context_param = tc_set_context_param; /* always set this */
5294 
5295 #define CTX_INIT(_member) \
5296    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
5297 
5298    CTX_INIT(flush);
5299    CTX_INIT(draw_vbo);
5300    CTX_INIT(draw_vertex_state);
5301    CTX_INIT(launch_grid);
5302    CTX_INIT(resource_copy_region);
5303    CTX_INIT(blit);
5304    CTX_INIT(clear);
5305    CTX_INIT(clear_render_target);
5306    CTX_INIT(clear_depth_stencil);
5307    CTX_INIT(clear_buffer);
5308    CTX_INIT(clear_texture);
5309    CTX_INIT(flush_resource);
5310    CTX_INIT(generate_mipmap);
5311    CTX_INIT(render_condition);
5312    CTX_INIT(create_query);
5313    CTX_INIT(create_batch_query);
5314    CTX_INIT(destroy_query);
5315    CTX_INIT(begin_query);
5316    CTX_INIT(end_query);
5317    CTX_INIT(get_query_result);
5318    CTX_INIT(get_query_result_resource);
5319    CTX_INIT(set_active_query_state);
5320    CTX_INIT(create_blend_state);
5321    CTX_INIT(bind_blend_state);
5322    CTX_INIT(delete_blend_state);
5323    CTX_INIT(create_sampler_state);
5324    CTX_INIT(bind_sampler_states);
5325    CTX_INIT(delete_sampler_state);
5326    CTX_INIT(create_rasterizer_state);
5327    CTX_INIT(bind_rasterizer_state);
5328    CTX_INIT(delete_rasterizer_state);
5329    CTX_INIT(create_depth_stencil_alpha_state);
5330    CTX_INIT(bind_depth_stencil_alpha_state);
5331    CTX_INIT(delete_depth_stencil_alpha_state);
5332    CTX_INIT(link_shader);
5333    CTX_INIT(create_fs_state);
5334    CTX_INIT(bind_fs_state);
5335    CTX_INIT(delete_fs_state);
5336    CTX_INIT(create_vs_state);
5337    CTX_INIT(bind_vs_state);
5338    CTX_INIT(delete_vs_state);
5339    CTX_INIT(create_gs_state);
5340    CTX_INIT(bind_gs_state);
5341    CTX_INIT(delete_gs_state);
5342    CTX_INIT(create_tcs_state);
5343    CTX_INIT(bind_tcs_state);
5344    CTX_INIT(delete_tcs_state);
5345    CTX_INIT(create_tes_state);
5346    CTX_INIT(bind_tes_state);
5347    CTX_INIT(delete_tes_state);
5348    CTX_INIT(create_compute_state);
5349    CTX_INIT(bind_compute_state);
5350    CTX_INIT(delete_compute_state);
5351    CTX_INIT(create_vertex_elements_state);
5352    CTX_INIT(bind_vertex_elements_state);
5353    CTX_INIT(delete_vertex_elements_state);
5354    CTX_INIT(set_blend_color);
5355    CTX_INIT(set_stencil_ref);
5356    CTX_INIT(set_sample_mask);
5357    CTX_INIT(set_min_samples);
5358    CTX_INIT(set_clip_state);
5359    CTX_INIT(set_constant_buffer);
5360    CTX_INIT(set_inlinable_constants);
5361    CTX_INIT(set_framebuffer_state);
5362    CTX_INIT(set_polygon_stipple);
5363    CTX_INIT(set_sample_locations);
5364    CTX_INIT(set_scissor_states);
5365    CTX_INIT(set_viewport_states);
5366    CTX_INIT(set_window_rectangles);
5367    CTX_INIT(set_sampler_views);
5368    CTX_INIT(set_tess_state);
5369    CTX_INIT(set_patch_vertices);
5370    CTX_INIT(set_shader_buffers);
5371    CTX_INIT(set_shader_images);
5372    CTX_INIT(set_vertex_buffers);
5373    CTX_INIT(create_stream_output_target);
5374    CTX_INIT(stream_output_target_destroy);
5375    CTX_INIT(set_stream_output_targets);
5376    CTX_INIT(create_sampler_view);
5377    CTX_INIT(sampler_view_destroy);
5378    CTX_INIT(create_surface);
5379    CTX_INIT(surface_destroy);
5380    CTX_INIT(buffer_map);
5381    CTX_INIT(texture_map);
5382    CTX_INIT(transfer_flush_region);
5383    CTX_INIT(buffer_unmap);
5384    CTX_INIT(texture_unmap);
5385    CTX_INIT(buffer_subdata);
5386    CTX_INIT(texture_subdata);
5387    CTX_INIT(texture_barrier);
5388    CTX_INIT(memory_barrier);
5389    CTX_INIT(resource_commit);
5390    CTX_INIT(create_video_codec);
5391    CTX_INIT(create_video_buffer);
5392    CTX_INIT(set_compute_resources);
5393    CTX_INIT(set_global_binding);
5394    CTX_INIT(get_sample_position);
5395    CTX_INIT(invalidate_resource);
5396    CTX_INIT(get_device_reset_status);
5397    CTX_INIT(set_device_reset_callback);
5398    CTX_INIT(dump_debug_state);
5399    CTX_INIT(set_log_context);
5400    CTX_INIT(emit_string_marker);
5401    CTX_INIT(set_debug_callback);
5402    CTX_INIT(create_fence_fd);
5403    CTX_INIT(fence_server_sync);
5404    CTX_INIT(fence_server_signal);
5405    CTX_INIT(get_timestamp);
5406    CTX_INIT(create_texture_handle);
5407    CTX_INIT(delete_texture_handle);
5408    CTX_INIT(make_texture_handle_resident);
5409    CTX_INIT(create_image_handle);
5410    CTX_INIT(delete_image_handle);
5411    CTX_INIT(make_image_handle_resident);
5412    CTX_INIT(set_frontend_noop);
5413    CTX_INIT(init_intel_perf_query_info);
5414    CTX_INIT(get_intel_perf_query_info);
5415    CTX_INIT(get_intel_perf_query_counter_info);
5416    CTX_INIT(new_intel_perf_query_obj);
5417    CTX_INIT(begin_intel_perf_query);
5418    CTX_INIT(end_intel_perf_query);
5419    CTX_INIT(delete_intel_perf_query);
5420    CTX_INIT(wait_intel_perf_query);
5421    CTX_INIT(is_intel_perf_query_ready);
5422    CTX_INIT(get_intel_perf_query_data);
5423 #undef CTX_INIT
5424 
5425 #define CALL(name) tc->execute_func[TC_CALL_##name] = tc_call_##name;
5426 #include "u_threaded_context_calls.h"
5427 #undef CALL
5428 
5429    if (out)
5430       *out = tc;
5431 
5432    tc_begin_next_buffer_list(tc);
5433    if (tc->options.parse_renderpass_info)
5434       tc_batch_increment_renderpass_info(tc, tc->next, false);
5435    return &tc->base;
5436 
5437 fail:
5438    tc_destroy(&tc->base);
5439    return NULL;
5440 }
5441 
5442 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)5443 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
5444 {
5445    uint64_t total_ram;
5446    if (os_get_total_physical_memory(&total_ram)) {
5447       tc->bytes_mapped_limit = total_ram / divisor;
5448       if (sizeof(void*) == 4)
5449          tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
5450    }
5451 }
5452 
5453 const struct tc_renderpass_info *
threaded_context_get_renderpass_info(struct threaded_context * tc)5454 threaded_context_get_renderpass_info(struct threaded_context *tc)
5455 {
5456    assert(tc->renderpass_info && tc->options.parse_renderpass_info);
5457    struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info);
5458    while (1) {
5459       util_queue_fence_wait(&info->ready);
5460       if (!info->next)
5461          return &info->info;
5462       info = info->next;
5463    }
5464 }
5465