• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "util/perf/cpu_trace.h"
36 #include "util/thread_sched.h"
37 #include "compiler/shader_info.h"
38 
39 #if TC_DEBUG >= 1
40 #define tc_assert assert
41 #else
42 #define tc_assert(x)
43 #endif
44 
45 #if TC_DEBUG >= 2
46 #define tc_printf mesa_logi
47 #define tc_asprintf asprintf
48 #define tc_strcmp strcmp
49 #else
50 #define tc_printf(...)
51 #define tc_asprintf(...) 0
52 #define tc_strcmp(...) 0
53 #endif
54 
55 #define TC_SENTINEL 0x5ca1ab1e
56 
57 #if TC_DEBUG >= 3 || defined(TC_TRACE)
58 static const char *tc_call_names[] = {
59 #define CALL(name) #name,
60 #include "u_threaded_context_calls.h"
61 #undef CALL
62 };
63 #endif
64 
65 #ifdef TC_TRACE
66 #  define TC_TRACE_SCOPE(call_id) MESA_TRACE_SCOPE(tc_call_names[call_id])
67 #else
68 #  define TC_TRACE_SCOPE(call_id)
69 #endif
70 
71 static void
72 tc_buffer_subdata(struct pipe_context *_pipe,
73                   struct pipe_resource *resource,
74                   unsigned usage, unsigned offset,
75                   unsigned size, const void *data);
76 
77 static void
tc_batch_check(UNUSED struct tc_batch * batch)78 tc_batch_check(UNUSED struct tc_batch *batch)
79 {
80    tc_assert(batch->sentinel == TC_SENTINEL);
81    tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
82 }
83 
84 static void
tc_debug_check(struct threaded_context * tc)85 tc_debug_check(struct threaded_context *tc)
86 {
87    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
88       tc_batch_check(&tc->batch_slots[i]);
89       tc_assert(tc->batch_slots[i].tc == tc);
90    }
91 }
92 
93 static void
tc_set_driver_thread(struct threaded_context * tc)94 tc_set_driver_thread(struct threaded_context *tc)
95 {
96 #ifndef NDEBUG
97    tc->driver_thread = thrd_current();
98 #endif
99 }
100 
101 static void
tc_clear_driver_thread(struct threaded_context * tc)102 tc_clear_driver_thread(struct threaded_context *tc)
103 {
104 #ifndef NDEBUG
105    memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
106 #endif
107 }
108 
109 struct tc_batch_rp_info {
110    /* this is what drivers can see */
111    struct tc_renderpass_info info;
112    /* determines whether the info can be "safely" read by drivers or if it may still be in use */
113    struct util_queue_fence ready;
114    /* when a batch is full, the rp info rollsover onto 'next' */
115    struct tc_batch_rp_info *next;
116    /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */
117    struct tc_batch_rp_info *prev;
118 };
119 
120 static struct tc_batch_rp_info *
tc_batch_rp_info(struct tc_renderpass_info * info)121 tc_batch_rp_info(struct tc_renderpass_info *info)
122 {
123    return (struct tc_batch_rp_info *)info;
124 }
125 
126 static void
tc_sanitize_renderpass_info(struct threaded_context * tc)127 tc_sanitize_renderpass_info(struct threaded_context *tc)
128 {
129    tc->renderpass_info_recording->cbuf_invalidate = 0;
130    tc->renderpass_info_recording->zsbuf_invalidate = false;
131    tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS);
132    if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording))
133       /* this should be a "safe" way to indicate to the driver that both loads and stores are required;
134       * driver can always detect invalidation
135       */
136       tc->renderpass_info_recording->zsbuf_clear_partial = true;
137    if (tc->num_queries_active)
138       tc->renderpass_info_recording->has_query_ends = true;
139 }
140 
141 /* ensure the batch's array of renderpass data is large enough for the current index */
142 static void
tc_batch_renderpass_infos_resize(struct threaded_context * tc,struct tc_batch * batch)143 tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch)
144 {
145    unsigned size = batch->renderpass_infos.capacity;
146    unsigned cur_num = MAX2(batch->renderpass_info_idx, 0);
147 
148    if (size / sizeof(struct tc_batch_rp_info) > cur_num)
149       return;
150 
151    struct tc_batch_rp_info *infos = batch->renderpass_infos.data;
152    unsigned old_idx = batch->renderpass_info_idx - 1;
153    bool redo = tc->renderpass_info_recording &&
154                tc->renderpass_info_recording == &infos[old_idx].info;
155    if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10))
156       mesa_loge("tc: memory alloc fail!");
157 
158    if (size != batch->renderpass_infos.capacity) {
159       /* zero new allocation region */
160       uint8_t *data = batch->renderpass_infos.data;
161       memset(data + size, 0, batch->renderpass_infos.capacity - size);
162       unsigned start = size / sizeof(struct tc_batch_rp_info);
163       unsigned count = (batch->renderpass_infos.capacity - size) /
164                        sizeof(struct tc_batch_rp_info);
165       infos = batch->renderpass_infos.data;
166       if (infos->prev)
167          infos->prev->next = infos;
168       for (unsigned i = 0; i < count; i++)
169          util_queue_fence_init(&infos[start + i].ready);
170       /* re-set current recording info on resize */
171       if (redo)
172          tc->renderpass_info_recording = &infos[old_idx].info;
173    }
174 }
175 
176 /* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
177 static void
tc_signal_renderpass_info_ready(struct threaded_context * tc)178 tc_signal_renderpass_info_ready(struct threaded_context *tc)
179 {
180    if (tc->renderpass_info_recording &&
181        !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready))
182       util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready);
183 }
184 
185 /* increment the current renderpass info struct for recording
186  * 'full_copy' is used for preserving data across non-blocking tc batch flushes
187  */
188 static void
tc_batch_increment_renderpass_info(struct threaded_context * tc,unsigned batch_idx,bool full_copy)189 tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy)
190 {
191    struct tc_batch *batch = &tc->batch_slots[batch_idx];
192    struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data;
193 
194    if (tc_info[0].next || batch->num_total_slots) {
195       /* deadlock condition detected: all batches are in flight, renderpass hasn't ended
196        * (probably a cts case)
197        */
198       struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording);
199       if (!util_queue_fence_is_signalled(&info->ready)) {
200          /* this batch is actively executing and the driver is waiting on the recording fence to signal */
201          /* force all buffer usage to avoid data loss */
202          info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear);
203          info->info.zsbuf_clear_partial = true;
204          info->info.has_query_ends = tc->num_queries_active > 0;
205          /* ensure threaded_context_get_renderpass_info() won't deadlock */
206          info->next = NULL;
207          util_queue_fence_signal(&info->ready);
208       }
209       /* always wait on the batch to finish since this will otherwise overwrite thread data */
210       util_queue_fence_wait(&batch->fence);
211    }
212    /* increment rp info and initialize it */
213    batch->renderpass_info_idx++;
214    tc_batch_renderpass_infos_resize(tc, batch);
215    tc_info = batch->renderpass_infos.data;
216 
217    if (full_copy) {
218       /* this should only be called when changing batches */
219       assert(batch->renderpass_info_idx == 0);
220       /* copy the previous data in its entirety: this is still the same renderpass */
221       if (tc->renderpass_info_recording) {
222          tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data;
223          tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx];
224          tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording);
225          /* guard against deadlock scenario */
226          assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording);
227       } else {
228          tc_info[batch->renderpass_info_idx].info.data = 0;
229          tc_info[batch->renderpass_info_idx].prev = NULL;
230       }
231    } else {
232       /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
233       tc_info[batch->renderpass_info_idx].info.data = 0;
234       if (tc->renderpass_info_recording) {
235          tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2];
236          tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL;
237          tc_info[batch->renderpass_info_idx].prev = NULL;
238       }
239    }
240 
241    assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next);
242    /* signal existing info since it will not be used anymore */
243    tc_signal_renderpass_info_ready(tc);
244    util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
245    /* guard against deadlock scenario */
246    assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info);
247    /* this is now the current recording renderpass info */
248    tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info;
249    batch->max_renderpass_info_idx = batch->renderpass_info_idx;
250 }
251 
252 static ALWAYS_INLINE struct tc_renderpass_info *
tc_get_renderpass_info(struct threaded_context * tc)253 tc_get_renderpass_info(struct threaded_context *tc)
254 {
255    return tc->renderpass_info_recording;
256 }
257 
258 /* update metadata at draw time */
259 static void
tc_parse_draw(struct threaded_context * tc)260 tc_parse_draw(struct threaded_context *tc)
261 {
262    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
263 
264    if (info) {
265       /* all buffers that aren't cleared are considered loaded */
266       info->cbuf_load |= ~info->cbuf_clear;
267       if (!info->zsbuf_clear)
268          info->zsbuf_load = true;
269       /* previous invalidates are no longer relevant */
270       info->cbuf_invalidate = 0;
271       info->zsbuf_invalidate = false;
272       info->has_draw = true;
273       info->has_query_ends |= tc->query_ended;
274    }
275 
276    tc->in_renderpass = true;
277    tc->seen_fb_state = true;
278    tc->query_ended = false;
279 }
280 
281 static void *
to_call_check(void * ptr,unsigned num_slots)282 to_call_check(void *ptr, unsigned num_slots)
283 {
284 #if TC_DEBUG >= 1
285    struct tc_call_base *call = ptr;
286    tc_assert(call->num_slots == num_slots);
287 #endif
288    return ptr;
289 }
290 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
291 
292 #define size_to_slots(size)      DIV_ROUND_UP(size, 8)
293 #define call_size(type)          size_to_slots(sizeof(struct type))
294 #define call_size_with_slots(type, num_slots) size_to_slots( \
295    sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
296 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
297 
298 ALWAYS_INLINE static void
tc_set_resource_batch_usage(struct threaded_context * tc,struct pipe_resource * pres)299 tc_set_resource_batch_usage(struct threaded_context *tc, struct pipe_resource *pres)
300 {
301    /* ignore batch usage when persistent */
302    if (threaded_resource(pres)->last_batch_usage != INT8_MAX)
303       threaded_resource(pres)->last_batch_usage = tc->next;
304    threaded_resource(pres)->batch_generation = tc->batch_generation;
305 }
306 
307 ALWAYS_INLINE static void
tc_set_resource_batch_usage_persistent(struct threaded_context * tc,struct pipe_resource * pres,bool enable)308 tc_set_resource_batch_usage_persistent(struct threaded_context *tc, struct pipe_resource *pres, bool enable)
309 {
310    if (!pres)
311       return;
312    /* mark with special value to block any unsynchronized access */
313    threaded_resource(pres)->last_batch_usage = enable ? INT8_MAX : tc->next;
314    threaded_resource(pres)->batch_generation = tc->batch_generation;
315 }
316 
317 /* this can ONLY be used to check against the currently recording batch */
318 ALWAYS_INLINE static bool
tc_resource_batch_usage_test_busy(const struct threaded_context * tc,const struct pipe_resource * pres)319 tc_resource_batch_usage_test_busy(const struct threaded_context *tc, const struct pipe_resource *pres)
320 {
321    const struct threaded_resource *tbuf = (const struct threaded_resource*)pres;
322 
323    if (!tc->options.unsynchronized_texture_subdata)
324       return true;
325 
326    /* resource has persistent access: assume always busy */
327    if (tbuf->last_batch_usage == INT8_MAX)
328       return true;
329 
330    /* resource has never been seen */
331    if (tbuf->last_batch_usage == -1)
332       return false;
333 
334    /* resource has been seen but no batches have executed */
335    if (tc->last_completed == -1)
336       return true;
337 
338    /* begin comparisons checking number of times batches have cycled */
339    unsigned diff = tc->batch_generation - tbuf->batch_generation;
340    /* resource has been seen, batches have fully cycled at least once */
341    if (diff > 1)
342       return false;
343 
344    /* resource has been seen in current batch cycle: return whether batch has definitely completed */
345    if (diff == 0)
346       return tc->last_completed >= tbuf->last_batch_usage;
347 
348    /* resource has been seen within one batch cycle: check for batch wrapping */
349    if (tc->last_completed >= tbuf->last_batch_usage)
350       /* this or a subsequent pre-wrap batch was the last to definitely complete: resource is idle */
351       return false;
352 
353    /* batch execution has not definitely wrapped: resource is definitely not idle */
354    if (tc->last_completed > tc->next)
355       return true;
356 
357    /* resource was seen pre-wrap, batch execution has definitely wrapped: idle */
358    if (tbuf->last_batch_usage > tc->last_completed)
359       return false;
360 
361    /* tc->last_completed is not an exact measurement, so anything else is considered busy */
362    return true;
363 }
364 
365 /* Assign src to dst while dst is uninitialized. */
366 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)367 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
368 {
369    *dst = src;
370    pipe_reference(NULL, &src->reference); /* only increment refcount */
371 }
372 
373 /* Assign src to dst while dst is uninitialized. */
374 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)375 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
376                               struct pipe_vertex_state *src)
377 {
378    *dst = src;
379    pipe_reference(NULL, &src->reference); /* only increment refcount */
380 }
381 
382 /* Unreference dst but don't touch the dst pointer. */
383 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)384 tc_drop_resource_reference(struct pipe_resource *dst)
385 {
386    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
387       pipe_resource_destroy(dst);
388 }
389 
390 /* Unreference dst but don't touch the dst pointer. */
391 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)392 tc_drop_surface_reference(struct pipe_surface *dst)
393 {
394    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
395       dst->context->surface_destroy(dst->context, dst);
396 }
397 
398 /* Unreference dst but don't touch the dst pointer. */
399 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)400 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
401 {
402    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
403       dst->context->stream_output_target_destroy(dst->context, dst);
404 }
405 
406 /**
407  * Subtract the given number of references.
408  */
409 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)410 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
411 {
412    int count = p_atomic_add_return(&dst->reference.count, -num_refs);
413 
414    assert(count >= 0);
415    /* Underflows shouldn't happen, but let's be safe. */
416    if (count <= 0)
417       dst->screen->vertex_state_destroy(dst->screen, dst);
418 }
419 
420 /* We don't want to read or write min_index and max_index, because
421  * it shouldn't be needed by drivers at this point.
422  */
423 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
424    offsetof(struct pipe_draw_info, min_index)
425 
426 ALWAYS_INLINE static struct tc_renderpass_info *
incr_rp_info(struct tc_renderpass_info * tc_info)427 incr_rp_info(struct tc_renderpass_info *tc_info)
428 {
429    struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info);
430    return &info[1].info;
431 }
432 
433 ALWAYS_INLINE static void
batch_execute(struct tc_batch * batch,struct pipe_context * pipe,uint64_t * last,bool parsing)434 batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
435 {
436    /* if the framebuffer state is persisting from a previous batch,
437     * begin incrementing renderpass info on the first set_framebuffer_state call
438     */
439    bool first = !batch->first_set_fb;
440    const tc_execute *execute_func = batch->tc->execute_func;
441 
442    for (uint64_t *iter = batch->slots; iter != last;) {
443       struct tc_call_base *call = (struct tc_call_base *)iter;
444 
445       tc_assert(call->sentinel == TC_SENTINEL);
446 
447 #if TC_DEBUG >= 3
448       tc_printf("CALL: %s", tc_call_names[call->call_id]);
449 #endif
450 
451       TC_TRACE_SCOPE(call->call_id);
452 
453       iter += execute_func[call->call_id](pipe, call);
454 
455       if (parsing) {
456          if (call->call_id == TC_CALL_flush) {
457             /* always increment renderpass info for non-deferred flushes */
458             batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
459             /* if a flush happens, renderpass info is always incremented after */
460             first = false;
461          } else if (call->call_id == TC_CALL_set_framebuffer_state) {
462             /* the renderpass info pointer is already set at the start of the batch,
463              * so don't increment on the first set_framebuffer_state call
464              */
465             if (!first)
466                batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
467             first = false;
468          } else if (call->call_id >= TC_CALL_draw_single &&
469                     call->call_id <= TC_CALL_draw_vstate_multi) {
470             /* if a draw happens before a set_framebuffer_state on this batch,
471              * begin incrementing renderpass data
472              */
473             first = false;
474          }
475       }
476    }
477 }
478 
479 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)480 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
481 {
482    struct tc_batch *batch = job;
483    struct pipe_context *pipe = batch->tc->pipe;
484    uint64_t *last = &batch->slots[batch->num_total_slots];
485 
486    tc_batch_check(batch);
487    tc_set_driver_thread(batch->tc);
488 
489    assert(!batch->token);
490 
491    /* setup renderpass info */
492    batch->tc->renderpass_info = batch->renderpass_infos.data;
493 
494    if (batch->tc->options.parse_renderpass_info) {
495       batch_execute(batch, pipe, last, true);
496 
497       struct tc_batch_rp_info *info = batch->renderpass_infos.data;
498       for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) {
499          if (info[i].next)
500             info[i].next->prev = NULL;
501          info[i].next = NULL;
502       }
503    } else {
504       batch_execute(batch, pipe, last, false);
505    }
506 
507    /* Add the fence to the list of fences for the driver to signal at the next
508     * flush, which we use for tracking which buffers are referenced by
509     * an unflushed command buffer.
510     */
511    struct threaded_context *tc = batch->tc;
512    struct util_queue_fence *fence =
513       &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
514 
515    if (tc->options.driver_calls_flush_notify) {
516       tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
517 
518       /* Since our buffer lists are chained as a ring, we need to flush
519        * the context twice as we go around the ring to make the driver signal
520        * the buffer list fences, so that the producer thread can reuse the buffer
521        * list structures for the next batches without waiting.
522        */
523       unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
524       if (batch->buffer_list_index % half_ring == half_ring - 1)
525          pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
526    } else {
527       util_queue_fence_signal(fence);
528    }
529 
530    tc_clear_driver_thread(batch->tc);
531    tc_batch_check(batch);
532    batch->num_total_slots = 0;
533    batch->last_mergeable_call = NULL;
534    batch->first_set_fb = false;
535    batch->max_renderpass_info_idx = 0;
536    batch->tc->last_completed = batch->batch_idx;
537 }
538 
539 static void
tc_begin_next_buffer_list(struct threaded_context * tc)540 tc_begin_next_buffer_list(struct threaded_context *tc)
541 {
542    tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
543 
544    tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
545 
546    /* Clear the buffer list in the new empty batch. */
547    struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
548    assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
549    util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
550    BITSET_ZERO(buf_list->buffer_list);
551 
552    tc->add_all_gfx_bindings_to_buffer_list = true;
553    tc->add_all_compute_bindings_to_buffer_list = true;
554 }
555 
556 static void
tc_add_call_end(struct tc_batch * next)557 tc_add_call_end(struct tc_batch *next)
558 {
559    /* Add a dummy last call that won't be executed, but will indicate the end
560     * of the batch. It's for calls that always look at the next call and this
561     * stops them looking farther ahead.
562     */
563    assert(next->num_total_slots < TC_SLOTS_PER_BATCH);
564    struct tc_call_base *call =
565       (struct tc_call_base*)&next->slots[next->num_total_slots];
566    call->call_id = TC_NUM_CALLS;
567    call->num_slots = 1;
568 }
569 
570 static void
tc_batch_flush(struct threaded_context * tc,bool full_copy)571 tc_batch_flush(struct threaded_context *tc, bool full_copy)
572 {
573    struct tc_batch *next = &tc->batch_slots[tc->next];
574    unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES;
575 
576    tc_assert(next->num_total_slots != 0);
577    tc_add_call_end(next);
578 
579    tc_batch_check(next);
580    tc_debug_check(tc);
581    tc->bytes_mapped_estimate = 0;
582    p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
583 
584    if (next->token) {
585       next->token->tc = NULL;
586       tc_unflushed_batch_token_reference(&next->token, NULL);
587    }
588    /* reset renderpass info index for subsequent use */
589    next->renderpass_info_idx = -1;
590 
591    /* always increment renderpass info on batch flush;
592     * renderpass info can only be accessed by its owner batch during execution
593     */
594    if (tc->renderpass_info_recording) {
595       tc->batch_slots[next_id].first_set_fb = full_copy;
596       tc_batch_increment_renderpass_info(tc, next_id, full_copy);
597    }
598 
599    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
600                       NULL, 0);
601    tc->last = tc->next;
602    tc->next = next_id;
603    if (next_id == 0)
604       tc->batch_generation++;
605    tc_begin_next_buffer_list(tc);
606 
607 }
608 
609 /* This is the function that adds variable-sized calls into the current
610  * batch. It also flushes the batch if there is not enough space there.
611  * All other higher-level "add" functions use it.
612  */
613 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)614 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
615                   unsigned num_slots)
616 {
617    TC_TRACE_SCOPE(id);
618    struct tc_batch *next = &tc->batch_slots[tc->next];
619    assert(num_slots <= TC_SLOTS_PER_BATCH - 1);
620    tc_debug_check(tc);
621 
622    if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH - 1)) {
623       /* copy existing renderpass info during flush */
624       tc_batch_flush(tc, true);
625       next = &tc->batch_slots[tc->next];
626       tc_assert(next->num_total_slots == 0);
627       tc_assert(next->last_mergeable_call == NULL);
628    }
629 
630    tc_assert(util_queue_fence_is_signalled(&next->fence));
631 
632    struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
633    next->num_total_slots += num_slots;
634 
635 #if !defined(NDEBUG) && TC_DEBUG >= 1
636    call->sentinel = TC_SENTINEL;
637 #endif
638    call->call_id = id;
639    call->num_slots = num_slots;
640 
641 #if TC_DEBUG >= 3
642    tc_printf("ENQUEUE: %s", tc_call_names[id]);
643 #endif
644 
645    tc_debug_check(tc);
646    return call;
647 }
648 
649 #define tc_add_call(tc, execute, type) \
650    ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
651 
652 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
653    ((struct type*)tc_add_sized_call(tc, execute, \
654                                     call_size_with_slots(type, num_slots)))
655 
656 /* Returns the last mergeable call that was added to the unflushed
657  * batch, or NULL if the address of that call is not currently known
658  * or no such call exists in the unflushed batch.
659  */
660 static struct tc_call_base *
tc_get_last_mergeable_call(struct threaded_context * tc)661 tc_get_last_mergeable_call(struct threaded_context *tc)
662 {
663    struct tc_batch *batch = &tc->batch_slots[tc->next];
664    struct tc_call_base *call = batch->last_mergeable_call;
665 
666    tc_assert(call == NULL || call->num_slots <= batch->num_total_slots);
667 
668    if (call && (uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots])
669       return call;
670    else
671       return NULL;
672 }
673 
674 /* Increases the size of the last call in the unflushed batch to the
675  * given number of slots, if possible, without changing the call's data.
676  */
677 static bool
tc_enlarge_last_mergeable_call(struct threaded_context * tc,unsigned desired_num_slots)678 tc_enlarge_last_mergeable_call(struct threaded_context *tc, unsigned desired_num_slots)
679 {
680    struct tc_batch *batch = &tc->batch_slots[tc->next];
681    struct tc_call_base *call = tc_get_last_mergeable_call(tc);
682 
683    tc_assert(call);
684    tc_assert(desired_num_slots >= call->num_slots);
685 
686    unsigned added_slots = desired_num_slots - call->num_slots;
687 
688    if (unlikely(batch->num_total_slots + added_slots > TC_SLOTS_PER_BATCH - 1))
689       return false;
690 
691    batch->num_total_slots += added_slots;
692    call->num_slots += added_slots;
693 
694    return true;
695 }
696 
697 static void
tc_mark_call_mergeable(struct threaded_context * tc,struct tc_call_base * call)698 tc_mark_call_mergeable(struct threaded_context *tc, struct tc_call_base *call)
699 {
700    struct tc_batch *batch = &tc->batch_slots[tc->next];
701    tc_assert(call->num_slots <= batch->num_total_slots);
702    tc_assert((uint64_t *)call == &batch->slots[batch->num_total_slots - call->num_slots]);
703    batch->last_mergeable_call = call;
704 }
705 
706 static bool
tc_is_sync(struct threaded_context * tc)707 tc_is_sync(struct threaded_context *tc)
708 {
709    struct tc_batch *last = &tc->batch_slots[tc->last];
710    struct tc_batch *next = &tc->batch_slots[tc->next];
711 
712    return util_queue_fence_is_signalled(&last->fence) &&
713           !next->num_total_slots;
714 }
715 
716 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)717 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
718 {
719    struct tc_batch *last = &tc->batch_slots[tc->last];
720    struct tc_batch *next = &tc->batch_slots[tc->next];
721    bool synced = false;
722 
723    MESA_TRACE_SCOPE(func);
724 
725    tc_debug_check(tc);
726 
727    if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) {
728       /* corner case: if tc syncs for any reason but a driver flush during a renderpass,
729        * then the current renderpass info MUST be signaled to avoid deadlocking the driver
730        *
731        * this is not a "complete" signal operation, however, as it's unknown what calls may
732        * come after this one, which means that framebuffer attachment data is unreliable
733        *
734        * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination),
735        * force all attachments active and assume the app was going to get bad perf here anyway
736        */
737       tc_sanitize_renderpass_info(tc);
738    }
739    tc_signal_renderpass_info_ready(tc);
740 
741    /* Only wait for queued calls... */
742    if (!util_queue_fence_is_signalled(&last->fence)) {
743       util_queue_fence_wait(&last->fence);
744       synced = true;
745    }
746 
747    tc_debug_check(tc);
748 
749    if (next->token) {
750       next->token->tc = NULL;
751       tc_unflushed_batch_token_reference(&next->token, NULL);
752    }
753 
754    /* .. and execute unflushed calls directly. */
755    if (next->num_total_slots) {
756       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
757       tc->bytes_mapped_estimate = 0;
758       tc_add_call_end(next);
759       tc_batch_execute(next, NULL, 0);
760       tc_begin_next_buffer_list(tc);
761       synced = true;
762    }
763 
764    if (synced) {
765       p_atomic_inc(&tc->num_syncs);
766 
767       if (tc_strcmp(func, "tc_destroy") != 0) {
768          tc_printf("sync %s %s", func, info);
769       }
770    }
771 
772    tc_debug_check(tc);
773 
774    if (tc->options.parse_renderpass_info) {
775       int renderpass_info_idx = next->renderpass_info_idx;
776       if (renderpass_info_idx > 0) {
777          /* don't reset if fb state is unflushed */
778          bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw;
779          uint32_t fb_info = tc->renderpass_info_recording->data32[0];
780          next->renderpass_info_idx = -1;
781          tc_batch_increment_renderpass_info(tc, tc->next, false);
782          if (fb_no_draw)
783             tc->renderpass_info_recording->data32[0] = fb_info;
784       } else if (tc->renderpass_info_recording->has_draw) {
785          tc->renderpass_info_recording->data32[0] = 0;
786       }
787       tc->seen_fb_state = false;
788       tc->query_ended = false;
789    }
790 }
791 
792 #define tc_sync(tc) _tc_sync(tc, "", __func__)
793 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
794 
795 /**
796  * Call this from fence_finish for same-context fence waits of deferred fences
797  * that haven't been flushed yet.
798  *
799  * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
800  * i.e., the wrapped one.
801  */
802 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)803 threaded_context_flush(struct pipe_context *_pipe,
804                        struct tc_unflushed_batch_token *token,
805                        bool prefer_async)
806 {
807    struct threaded_context *tc = threaded_context(_pipe);
808 
809    /* This is called from the gallium frontend / application thread. */
810    if (token->tc && token->tc == tc) {
811       struct tc_batch *last = &tc->batch_slots[tc->last];
812 
813       /* Prefer to do the flush in the driver thread if it is already
814        * running. That should be better for cache locality.
815        */
816       if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
817          tc_batch_flush(tc, false);
818       else
819          tc_sync(token->tc);
820    }
821 }
822 
823 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)824 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
825 {
826    uint32_t id = threaded_resource(buf)->buffer_id_unique;
827    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
828 }
829 
830 /* Reset a range of buffer binding slots. */
831 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)832 tc_unbind_buffers(uint32_t *binding, unsigned count)
833 {
834    if (count)
835       memset(binding, 0, sizeof(*binding) * count);
836 }
837 
838 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)839 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
840                                unsigned count)
841 {
842    for (unsigned i = 0; i < count; i++) {
843       if (bindings[i])
844          BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
845    }
846 }
847 
848 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)849 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
850                    unsigned count)
851 {
852    unsigned rebind_count = 0;
853 
854    for (unsigned i = 0; i < count; i++) {
855       if (bindings[i] == old_id) {
856          bindings[i] = new_id;
857          rebind_count++;
858       }
859    }
860    return rebind_count;
861 }
862 
863 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)864 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
865                                       BITSET_WORD *buffer_list,
866                                       enum pipe_shader_type shader)
867 {
868    tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
869                                   tc->max_const_buffers);
870    if (tc->seen_shader_buffers[shader]) {
871       tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
872                                      tc->max_shader_buffers);
873    }
874    if (tc->seen_image_buffers[shader]) {
875       tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
876                                      tc->max_images);
877    }
878    if (tc->seen_sampler_buffers[shader]) {
879       tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
880                                      tc->max_samplers);
881    }
882 }
883 
884 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)885 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
886                           uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
887 {
888    unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
889 
890    ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
891                             tc->max_const_buffers);
892    if (ubo)
893       *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
894    if (tc->seen_shader_buffers[shader]) {
895       ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
896                                 tc->max_shader_buffers);
897       if (ssbo)
898          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
899    }
900    if (tc->seen_image_buffers[shader]) {
901       img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
902                                tc->max_images);
903       if (img)
904          *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
905    }
906    if (tc->seen_sampler_buffers[shader]) {
907       sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
908                                    tc->max_samplers);
909       if (sampler)
910          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
911    }
912    return ubo + ssbo + img + sampler;
913 }
914 
915 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
916  * This is called by the first draw call in a batch when we want to inherit
917  * all bindings set by the previous batch.
918  */
919 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)920 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
921 {
922    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
923 
924    tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->num_vertex_buffers);
925    if (tc->seen_streamout_buffers)
926       tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
927 
928    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
929    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
930 
931    if (tc->seen_tcs)
932       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
933    if (tc->seen_tes)
934       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
935    if (tc->seen_gs)
936       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
937 
938    tc->add_all_gfx_bindings_to_buffer_list = false;
939 }
940 
941 /* Add all bound buffers used by compute to the buffer list.
942  * This is called by the first compute call in a batch when we want to inherit
943  * all bindings set by the previous batch.
944  */
945 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)946 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
947 {
948    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
949 
950    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
951    tc->add_all_compute_bindings_to_buffer_list = false;
952 }
953 
954 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)955 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
956 {
957    unsigned vbo = 0, so = 0;
958 
959    vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
960                             tc->num_vertex_buffers);
961    if (vbo)
962       *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
963 
964    if (tc->seen_streamout_buffers) {
965       so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
966                               PIPE_MAX_SO_BUFFERS);
967       if (so)
968          *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
969    }
970    unsigned rebound = vbo + so;
971 
972    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
973    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
974 
975    if (tc->seen_tcs)
976       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
977    if (tc->seen_tes)
978       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
979    if (tc->seen_gs)
980       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
981 
982    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
983 
984    if (rebound)
985       BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
986    return rebound;
987 }
988 
989 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)990 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
991 {
992    while (binding_mask) {
993       if (bindings[u_bit_scan(&binding_mask)] == id)
994          return true;
995    }
996    return false;
997 }
998 
999 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)1000 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
1001                                     enum pipe_shader_type shader)
1002 {
1003    if (tc->seen_shader_buffers[shader] &&
1004        tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
1005                                     tc->shader_buffers_writeable_mask[shader]))
1006       return true;
1007 
1008    if (tc->seen_image_buffers[shader] &&
1009        tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
1010                                     tc->image_buffers_writeable_mask[shader]))
1011       return true;
1012 
1013    return false;
1014 }
1015 
1016 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)1017 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
1018 {
1019    if (tc->seen_streamout_buffers &&
1020        tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
1021                                     BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
1022       return true;
1023 
1024    if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
1025        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
1026        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
1027       return true;
1028 
1029    if (tc->seen_tcs &&
1030        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
1031       return true;
1032 
1033    if (tc->seen_tes &&
1034        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
1035       return true;
1036 
1037    if (tc->seen_gs &&
1038        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
1039       return true;
1040 
1041    return false;
1042 }
1043 
1044 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)1045 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
1046                   unsigned map_usage)
1047 {
1048    if (!tc->options.is_resource_busy)
1049       return true;
1050 
1051    uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
1052 
1053    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
1054       struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
1055 
1056       /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
1057        * then the buffer is considered busy. */
1058       if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
1059           BITSET_TEST(buf_list->buffer_list, id_hash))
1060          return true;
1061    }
1062 
1063    /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
1064     * this buffer is busy or not. */
1065    return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
1066 }
1067 
1068 /**
1069  * allow_cpu_storage should be false for user memory and imported buffers.
1070  */
1071 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)1072 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
1073 {
1074    struct threaded_resource *tres = threaded_resource(res);
1075 
1076    tres->latest = &tres->b;
1077    tres->cpu_storage = NULL;
1078    util_range_init(&tres->valid_buffer_range);
1079    tres->is_shared = false;
1080    tres->is_user_ptr = false;
1081    tres->buffer_id_unique = 0;
1082    tres->pending_staging_uploads = 0;
1083    tres->last_batch_usage = -1;
1084    util_range_init(&tres->pending_staging_uploads_range);
1085 
1086    if (allow_cpu_storage &&
1087        !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
1088                        PIPE_RESOURCE_FLAG_SPARSE |
1089                        PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
1090        /* We need buffer invalidation and buffer busyness tracking for the CPU
1091         * storage, which aren't supported with pipe_vertex_state. */
1092        !(res->bind & PIPE_BIND_VERTEX_STATE))
1093       tres->allow_cpu_storage = true;
1094    else
1095       tres->allow_cpu_storage = false;
1096 }
1097 
1098 void
threaded_resource_deinit(struct pipe_resource * res)1099 threaded_resource_deinit(struct pipe_resource *res)
1100 {
1101    struct threaded_resource *tres = threaded_resource(res);
1102 
1103    if (tres->latest != &tres->b)
1104            pipe_resource_reference(&tres->latest, NULL);
1105    util_range_destroy(&tres->valid_buffer_range);
1106    util_range_destroy(&tres->pending_staging_uploads_range);
1107    align_free(tres->cpu_storage);
1108 }
1109 
1110 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)1111 threaded_context_unwrap_sync(struct pipe_context *pipe)
1112 {
1113    if (!pipe || !pipe->priv)
1114       return pipe;
1115 
1116    tc_sync(threaded_context(pipe));
1117    return (struct pipe_context*)pipe->priv;
1118 }
1119 
1120 
1121 /********************************************************************
1122  * simple functions
1123  */
1124 
1125 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
1126    struct tc_call_##func { \
1127       struct tc_call_base base; \
1128       type state; \
1129    }; \
1130    \
1131    static uint16_t \
1132    tc_call_##func(struct pipe_context *pipe, void *call) \
1133    { \
1134       pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
1135       return call_size(tc_call_##func); \
1136    } \
1137    \
1138    static void \
1139    tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
1140    { \
1141       struct threaded_context *tc = threaded_context(_pipe); \
1142       struct tc_call_##func *p = (struct tc_call_##func*) \
1143                      tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
1144       p->state = deref(param); \
1145       __VA_ARGS__; \
1146    }
1147 
1148 TC_FUNC1(set_active_query_state, , bool, , )
1149 
1150 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
1151 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
1152 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
1153 TC_FUNC1(set_sample_mask, , unsigned, , )
1154 TC_FUNC1(set_min_samples, , unsigned, , )
1155 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
1156 
1157 TC_FUNC1(texture_barrier, , unsigned, , )
1158 TC_FUNC1(memory_barrier, , unsigned, , )
1159 TC_FUNC1(delete_texture_handle, , uint64_t, , )
1160 TC_FUNC1(delete_image_handle, , uint64_t, , )
1161 TC_FUNC1(set_frontend_noop, , bool, , )
1162 
1163 
1164 /********************************************************************
1165  * queries
1166  */
1167 
1168 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)1169 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
1170                 unsigned index)
1171 {
1172    struct threaded_context *tc = threaded_context(_pipe);
1173    struct pipe_context *pipe = tc->pipe;
1174 
1175    return pipe->create_query(pipe, query_type, index);
1176 }
1177 
1178 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)1179 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
1180                       unsigned *query_types)
1181 {
1182    struct threaded_context *tc = threaded_context(_pipe);
1183    struct pipe_context *pipe = tc->pipe;
1184 
1185    return pipe->create_batch_query(pipe, num_queries, query_types);
1186 }
1187 
1188 struct tc_query_call {
1189    struct tc_call_base base;
1190    struct pipe_query *query;
1191 };
1192 
1193 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call)1194 tc_call_destroy_query(struct pipe_context *pipe, void *call)
1195 {
1196    struct pipe_query *query = to_call(call, tc_query_call)->query;
1197    struct threaded_query *tq = threaded_query(query);
1198 
1199    if (list_is_linked(&tq->head_unflushed))
1200       list_del(&tq->head_unflushed);
1201 
1202    pipe->destroy_query(pipe, query);
1203    return call_size(tc_query_call);
1204 }
1205 
1206 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)1207 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
1208 {
1209    struct threaded_context *tc = threaded_context(_pipe);
1210 
1211    tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
1212 }
1213 
1214 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call)1215 tc_call_begin_query(struct pipe_context *pipe, void *call)
1216 {
1217    pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
1218    return call_size(tc_query_call);
1219 }
1220 
1221 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)1222 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
1223 {
1224    struct threaded_context *tc = threaded_context(_pipe);
1225    tc->num_queries_active++;
1226 
1227    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
1228    return true; /* we don't care about the return value for this call */
1229 }
1230 
1231 struct tc_end_query_call {
1232    struct tc_call_base base;
1233    struct threaded_context *tc;
1234    struct pipe_query *query;
1235 };
1236 
1237 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call)1238 tc_call_end_query(struct pipe_context *pipe, void *call)
1239 {
1240    struct tc_end_query_call *p = to_call(call, tc_end_query_call);
1241    struct threaded_query *tq = threaded_query(p->query);
1242 
1243    if (!list_is_linked(&tq->head_unflushed))
1244       list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
1245 
1246    pipe->end_query(pipe, p->query);
1247    return call_size(tc_end_query_call);
1248 }
1249 
1250 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)1251 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
1252 {
1253    struct threaded_context *tc = threaded_context(_pipe);
1254    struct threaded_query *tq = threaded_query(query);
1255    struct tc_end_query_call *call =
1256       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
1257    tc->num_queries_active--;
1258 
1259    call->tc = tc;
1260    call->query = query;
1261 
1262    tq->flushed = false;
1263    tc->query_ended = true;
1264 
1265    return true; /* we don't care about the return value for this call */
1266 }
1267 
1268 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)1269 tc_get_query_result(struct pipe_context *_pipe,
1270                     struct pipe_query *query, bool wait,
1271                     union pipe_query_result *result)
1272 {
1273    struct threaded_context *tc = threaded_context(_pipe);
1274    struct threaded_query *tq = threaded_query(query);
1275    struct pipe_context *pipe = tc->pipe;
1276    bool flushed = tq->flushed;
1277 
1278    if (!flushed) {
1279       tc_sync_msg(tc, wait ? "wait" : "nowait");
1280       tc_set_driver_thread(tc);
1281    }
1282 
1283    bool success = pipe->get_query_result(pipe, query, wait, result);
1284 
1285    if (!flushed)
1286       tc_clear_driver_thread(tc);
1287 
1288    if (success) {
1289       tq->flushed = true;
1290       if (list_is_linked(&tq->head_unflushed)) {
1291          /* This is safe because it can only happen after we sync'd. */
1292          list_del(&tq->head_unflushed);
1293       }
1294    }
1295    return success;
1296 }
1297 
1298 struct tc_query_result_resource {
1299    struct tc_call_base base;
1300    enum pipe_query_flags flags:8;
1301    enum pipe_query_value_type result_type:8;
1302    int8_t index; /* it can be -1 */
1303    unsigned offset;
1304    struct pipe_query *query;
1305    struct pipe_resource *resource;
1306 };
1307 
1308 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call)1309 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call)
1310 {
1311    struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
1312 
1313    pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
1314                                    p->index, p->resource, p->offset);
1315    tc_drop_resource_reference(p->resource);
1316    return call_size(tc_query_result_resource);
1317 }
1318 
1319 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)1320 tc_get_query_result_resource(struct pipe_context *_pipe,
1321                              struct pipe_query *query,
1322                              enum pipe_query_flags flags,
1323                              enum pipe_query_value_type result_type, int index,
1324                              struct pipe_resource *resource, unsigned offset)
1325 {
1326    struct threaded_context *tc = threaded_context(_pipe);
1327 
1328    tc_buffer_disable_cpu_storage(resource);
1329 
1330    struct tc_query_result_resource *p =
1331       tc_add_call(tc, TC_CALL_get_query_result_resource,
1332                   tc_query_result_resource);
1333    p->query = query;
1334    p->flags = flags;
1335    p->result_type = result_type;
1336    p->index = index;
1337    tc_set_resource_reference(&p->resource, resource);
1338    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
1339    p->offset = offset;
1340 }
1341 
1342 struct tc_render_condition {
1343    struct tc_call_base base;
1344    bool condition;
1345    unsigned mode;
1346    struct pipe_query *query;
1347 };
1348 
1349 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call)1350 tc_call_render_condition(struct pipe_context *pipe, void *call)
1351 {
1352    struct tc_render_condition *p = to_call(call, tc_render_condition);
1353    pipe->render_condition(pipe, p->query, p->condition, p->mode);
1354    return call_size(tc_render_condition);
1355 }
1356 
1357 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)1358 tc_render_condition(struct pipe_context *_pipe,
1359                     struct pipe_query *query, bool condition,
1360                     enum pipe_render_cond_flag mode)
1361 {
1362    struct threaded_context *tc = threaded_context(_pipe);
1363    struct tc_render_condition *p =
1364       tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
1365 
1366    p->query = query;
1367    p->condition = condition;
1368    p->mode = mode;
1369 }
1370 
1371 
1372 /********************************************************************
1373  * constant (immutable) states
1374  */
1375 
1376 #define TC_CSO_CREATE(name, sname) \
1377    static void * \
1378    tc_create_##name##_state(struct pipe_context *_pipe, \
1379                             const struct pipe_##sname##_state *state) \
1380    { \
1381       struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1382       return pipe->create_##name##_state(pipe, state); \
1383    }
1384 
1385 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1386 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1387 
1388 #define TC_CSO(name, sname, ...) \
1389    TC_CSO_CREATE(name, sname) \
1390    TC_CSO_BIND(name, ##__VA_ARGS__) \
1391    TC_CSO_DELETE(name)
1392 
1393 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
1394 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
1395 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1396 
1397 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)1398 TC_CSO_WHOLE(rasterizer)
1399 TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
1400 TC_CSO_BIND(depth_stencil_alpha,
1401    if (param && tc->options.parse_renderpass_info) {
1402       /* dsa info is only ever added during a renderpass;
1403        * changes outside of a renderpass reset the data
1404        */
1405       if (!tc->in_renderpass) {
1406          tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
1407          tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
1408       }
1409       /* let the driver parse its own state */
1410       tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
1411    }
1412 )
1413 TC_CSO_DELETE(depth_stencil_alpha)
1414 TC_CSO_WHOLE(compute)
1415 TC_CSO_CREATE(fs, shader)
1416 TC_CSO_BIND(fs,
1417    if (param && tc->options.parse_renderpass_info) {
1418       /* fs info is only ever added during a renderpass;
1419        * changes outside of a renderpass reset the data
1420        */
1421       if (!tc->in_renderpass) {
1422          tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
1423          tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
1424       }
1425       /* let the driver parse its own state */
1426       tc->options.fs_parse(param, tc_get_renderpass_info(tc));
1427    }
1428 )
1429 TC_CSO_DELETE(fs)
1430 TC_CSO_SHADER(vs)
1431 TC_CSO_SHADER_TRACK(gs)
1432 TC_CSO_SHADER_TRACK(tcs)
1433 TC_CSO_SHADER_TRACK(tes)
1434 TC_CSO_CREATE(sampler, sampler)
1435 TC_CSO_DELETE(sampler)
1436 TC_CSO_BIND(vertex_elements)
1437 TC_CSO_DELETE(vertex_elements)
1438 
1439 static void *
1440 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1441                                 const struct pipe_vertex_element *elems)
1442 {
1443    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1444 
1445    return pipe->create_vertex_elements_state(pipe, count, elems);
1446 }
1447 
1448 struct tc_sampler_states {
1449    struct tc_call_base base;
1450    uint8_t shader, start, count;
1451    void *slot[0]; /* more will be allocated if needed */
1452 };
1453 
1454 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call)1455 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call)
1456 {
1457    struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1458 
1459    pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1460    return p->base.num_slots;
1461 }
1462 
1463 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1464 tc_bind_sampler_states(struct pipe_context *_pipe,
1465                        enum pipe_shader_type shader,
1466                        unsigned start, unsigned count, void **states)
1467 {
1468    if (!count)
1469       return;
1470 
1471    struct threaded_context *tc = threaded_context(_pipe);
1472    struct tc_sampler_states *p =
1473       tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1474 
1475    p->shader = shader;
1476    p->start = start;
1477    p->count = count;
1478    memcpy(p->slot, states, count * sizeof(states[0]));
1479 }
1480 
1481 static void
tc_link_shader(struct pipe_context * _pipe,void ** shaders)1482 tc_link_shader(struct pipe_context *_pipe, void **shaders)
1483 {
1484    struct threaded_context *tc = threaded_context(_pipe);
1485    tc->pipe->link_shader(tc->pipe, shaders);
1486 }
1487 /********************************************************************
1488  * immediate states
1489  */
1490 
1491 struct tc_framebuffer {
1492    struct tc_call_base base;
1493    struct pipe_framebuffer_state state;
1494 };
1495 
1496 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call)1497 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call)
1498 {
1499    struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1500 
1501    pipe->set_framebuffer_state(pipe, p);
1502 
1503    unsigned nr_cbufs = p->nr_cbufs;
1504    for (unsigned i = 0; i < nr_cbufs; i++)
1505       tc_drop_surface_reference(p->cbufs[i]);
1506    tc_drop_surface_reference(p->zsbuf);
1507    tc_drop_resource_reference(p->resolve);
1508    return call_size(tc_framebuffer);
1509 }
1510 
1511 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1512 tc_set_framebuffer_state(struct pipe_context *_pipe,
1513                          const struct pipe_framebuffer_state *fb)
1514 {
1515    struct threaded_context *tc = threaded_context(_pipe);
1516    struct tc_framebuffer *p =
1517       tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1518    unsigned nr_cbufs = fb->nr_cbufs;
1519 
1520    p->state.width = fb->width;
1521    p->state.height = fb->height;
1522    p->state.samples = fb->samples;
1523    p->state.layers = fb->layers;
1524    p->state.nr_cbufs = nr_cbufs;
1525 
1526    /* when unbinding, mark attachments as used for the current batch */
1527    for (unsigned i = 0; i < tc->nr_cbufs; i++) {
1528       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], false);
1529       pipe_resource_reference(&tc->fb_resources[i], NULL);
1530    }
1531    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], false);
1532    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, false);
1533 
1534    for (unsigned i = 0; i < nr_cbufs; i++) {
1535       p->state.cbufs[i] = NULL;
1536       pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1537       /* full tracking requires storing the fb attachment resources */
1538       if (fb->cbufs[i])
1539          pipe_resource_reference(&tc->fb_resources[i], fb->cbufs[i]->texture);
1540       tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[i], true);
1541    }
1542    tc->nr_cbufs = nr_cbufs;
1543    if (tc->options.parse_renderpass_info) {
1544       /* ensure this is treated as the first fb set if no fb activity has occurred */
1545       if (!tc->renderpass_info_recording->has_draw &&
1546           !tc->renderpass_info_recording->cbuf_clear &&
1547           !tc->renderpass_info_recording->cbuf_load &&
1548           !tc->renderpass_info_recording->zsbuf_load &&
1549           !tc->renderpass_info_recording->zsbuf_clear_partial)
1550          tc->batch_slots[tc->next].first_set_fb = false;
1551       /* store existing zsbuf data for possible persistence */
1552       uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
1553                       0 :
1554                       tc->renderpass_info_recording->data8[3];
1555       bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
1556                            (fb->zsbuf ? fb->zsbuf->texture : NULL);
1557 
1558       if (tc->seen_fb_state) {
1559          /* this is the end of a renderpass, so increment the renderpass info */
1560          tc_batch_increment_renderpass_info(tc, tc->next, false);
1561          /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
1562           * keep zsbuf usage data
1563           */
1564          if (!zsbuf_changed)
1565             tc->renderpass_info_recording->data8[3] = zsbuf;
1566       } else {
1567          /* this is the first time a set_framebuffer_call is triggered;
1568           * just increment the index and keep using the existing info for recording
1569           */
1570          tc->batch_slots[tc->next].renderpass_info_idx = 0;
1571       }
1572       /* future fb state changes will increment the index */
1573       tc->seen_fb_state = true;
1574    }
1575    pipe_resource_reference(&tc->fb_resources[PIPE_MAX_COLOR_BUFS],
1576                            fb->zsbuf ? fb->zsbuf->texture : NULL);
1577    pipe_resource_reference(&tc->fb_resolve, fb->resolve);
1578    tc_set_resource_batch_usage_persistent(tc, tc->fb_resources[PIPE_MAX_COLOR_BUFS], true);
1579    tc_set_resource_batch_usage_persistent(tc, tc->fb_resolve, true);
1580    tc->in_renderpass = false;
1581    p->state.zsbuf = NULL;
1582    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1583    p->state.resolve = NULL;
1584    pipe_resource_reference(&p->state.resolve, fb->resolve);
1585 }
1586 
1587 struct tc_tess_state {
1588    struct tc_call_base base;
1589    float state[6];
1590 };
1591 
1592 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call)1593 tc_call_set_tess_state(struct pipe_context *pipe, void *call)
1594 {
1595    float *p = to_call(call, tc_tess_state)->state;
1596 
1597    pipe->set_tess_state(pipe, p, p + 4);
1598    return call_size(tc_tess_state);
1599 }
1600 
1601 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1602 tc_set_tess_state(struct pipe_context *_pipe,
1603                   const float default_outer_level[4],
1604                   const float default_inner_level[2])
1605 {
1606    struct threaded_context *tc = threaded_context(_pipe);
1607    float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1608 
1609    memcpy(p, default_outer_level, 4 * sizeof(float));
1610    memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1611 }
1612 
1613 struct tc_patch_vertices {
1614    struct tc_call_base base;
1615    uint8_t patch_vertices;
1616 };
1617 
1618 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call)1619 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call)
1620 {
1621    uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1622 
1623    pipe->set_patch_vertices(pipe, patch_vertices);
1624    return call_size(tc_patch_vertices);
1625 }
1626 
1627 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1628 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1629 {
1630    struct threaded_context *tc = threaded_context(_pipe);
1631 
1632    tc_add_call(tc, TC_CALL_set_patch_vertices,
1633                tc_patch_vertices)->patch_vertices = patch_vertices;
1634 }
1635 
1636 struct tc_constant_buffer_base {
1637    struct tc_call_base base;
1638    uint8_t shader, index;
1639    bool is_null;
1640 };
1641 
1642 struct tc_constant_buffer {
1643    struct tc_constant_buffer_base base;
1644    struct pipe_constant_buffer cb;
1645 };
1646 
1647 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call)1648 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call)
1649 {
1650    struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1651 
1652    if (unlikely(p->base.is_null)) {
1653       pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1654       return call_size(tc_constant_buffer_base);
1655    }
1656 
1657    pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1658    return call_size(tc_constant_buffer);
1659 }
1660 
1661 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1662 tc_set_constant_buffer(struct pipe_context *_pipe,
1663                        enum pipe_shader_type shader, uint index,
1664                        bool take_ownership,
1665                        const struct pipe_constant_buffer *cb)
1666 {
1667    struct threaded_context *tc = threaded_context(_pipe);
1668 
1669    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1670       struct tc_constant_buffer_base *p =
1671          tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1672       p->shader = shader;
1673       p->index = index;
1674       p->is_null = true;
1675       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1676       return;
1677    }
1678 
1679    struct pipe_resource *buffer;
1680    unsigned offset;
1681 
1682    if (cb->user_buffer) {
1683       /* This must be done before adding set_constant_buffer, because it could
1684        * generate e.g. transfer_unmap and flush partially-uninitialized
1685        * set_constant_buffer to the driver if it was done afterwards.
1686        */
1687       buffer = NULL;
1688       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1689                     tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1690       u_upload_unmap(tc->base.const_uploader);
1691       take_ownership = true;
1692    } else {
1693       buffer = cb->buffer;
1694       offset = cb->buffer_offset;
1695    }
1696 
1697    struct tc_constant_buffer *p =
1698       tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1699    p->base.shader = shader;
1700    p->base.index = index;
1701    p->base.is_null = false;
1702    p->cb.user_buffer = NULL;
1703    p->cb.buffer_offset = offset;
1704    p->cb.buffer_size = cb->buffer_size;
1705 
1706    if (take_ownership)
1707       p->cb.buffer = buffer;
1708    else
1709       tc_set_resource_reference(&p->cb.buffer, buffer);
1710 
1711    if (buffer) {
1712       tc_bind_buffer(&tc->const_buffers[shader][index],
1713                      &tc->buffer_lists[tc->next_buf_list], buffer);
1714    } else {
1715       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1716    }
1717 }
1718 
1719 struct tc_inlinable_constants {
1720    struct tc_call_base base;
1721    uint8_t shader;
1722    uint8_t num_values;
1723    uint32_t values[MAX_INLINABLE_UNIFORMS];
1724 };
1725 
1726 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call)1727 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call)
1728 {
1729    struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1730 
1731    pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1732    return call_size(tc_inlinable_constants);
1733 }
1734 
1735 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1736 tc_set_inlinable_constants(struct pipe_context *_pipe,
1737                            enum pipe_shader_type shader,
1738                            uint num_values, uint32_t *values)
1739 {
1740    struct threaded_context *tc = threaded_context(_pipe);
1741    struct tc_inlinable_constants *p =
1742       tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1743    p->shader = shader;
1744    p->num_values = num_values;
1745    memcpy(p->values, values, num_values * 4);
1746 }
1747 
1748 struct tc_sample_locations {
1749    struct tc_call_base base;
1750    uint16_t size;
1751    uint8_t slot[0];
1752 };
1753 
1754 
1755 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call)1756 tc_call_set_sample_locations(struct pipe_context *pipe, void *call)
1757 {
1758    struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1759 
1760    pipe->set_sample_locations(pipe, p->size, p->slot);
1761    return p->base.num_slots;
1762 }
1763 
1764 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1765 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1766 {
1767    struct threaded_context *tc = threaded_context(_pipe);
1768    struct tc_sample_locations *p =
1769       tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1770                              tc_sample_locations, size);
1771 
1772    p->size = size;
1773    memcpy(p->slot, locations, size);
1774 }
1775 
1776 struct tc_scissors {
1777    struct tc_call_base base;
1778    uint8_t start, count;
1779    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1780 };
1781 
1782 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call)1783 tc_call_set_scissor_states(struct pipe_context *pipe, void *call)
1784 {
1785    struct tc_scissors *p = (struct tc_scissors *)call;
1786 
1787    pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1788    return p->base.num_slots;
1789 }
1790 
1791 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1792 tc_set_scissor_states(struct pipe_context *_pipe,
1793                       unsigned start, unsigned count,
1794                       const struct pipe_scissor_state *states)
1795 {
1796    struct threaded_context *tc = threaded_context(_pipe);
1797    struct tc_scissors *p =
1798       tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1799 
1800    p->start = start;
1801    p->count = count;
1802    memcpy(&p->slot, states, count * sizeof(states[0]));
1803 }
1804 
1805 struct tc_viewports {
1806    struct tc_call_base base;
1807    uint8_t start, count;
1808    struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1809 };
1810 
1811 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call)1812 tc_call_set_viewport_states(struct pipe_context *pipe, void *call)
1813 {
1814    struct tc_viewports *p = (struct tc_viewports *)call;
1815 
1816    pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1817    return p->base.num_slots;
1818 }
1819 
1820 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1821 tc_set_viewport_states(struct pipe_context *_pipe,
1822                        unsigned start, unsigned count,
1823                        const struct pipe_viewport_state *states)
1824 {
1825    if (!count)
1826       return;
1827 
1828    struct threaded_context *tc = threaded_context(_pipe);
1829    struct tc_viewports *p =
1830       tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1831 
1832    p->start = start;
1833    p->count = count;
1834    memcpy(&p->slot, states, count * sizeof(states[0]));
1835 }
1836 
1837 struct tc_window_rects {
1838    struct tc_call_base base;
1839    bool include;
1840    uint8_t count;
1841    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1842 };
1843 
1844 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call)1845 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call)
1846 {
1847    struct tc_window_rects *p = (struct tc_window_rects *)call;
1848 
1849    pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1850    return p->base.num_slots;
1851 }
1852 
1853 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1854 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1855                          unsigned count,
1856                          const struct pipe_scissor_state *rects)
1857 {
1858    struct threaded_context *tc = threaded_context(_pipe);
1859    struct tc_window_rects *p =
1860       tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1861 
1862    p->include = include;
1863    p->count = count;
1864    memcpy(p->slot, rects, count * sizeof(rects[0]));
1865 }
1866 
1867 struct tc_sampler_views {
1868    struct tc_call_base base;
1869    uint8_t shader, start, count, unbind_num_trailing_slots;
1870    struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1871 };
1872 
1873 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call)1874 tc_call_set_sampler_views(struct pipe_context *pipe, void *call)
1875 {
1876    struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1877 
1878    pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1879                            p->unbind_num_trailing_slots, true, p->slot);
1880    return p->base.num_slots;
1881 }
1882 
1883 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1884 tc_set_sampler_views(struct pipe_context *_pipe,
1885                      enum pipe_shader_type shader,
1886                      unsigned start, unsigned count,
1887                      unsigned unbind_num_trailing_slots, bool take_ownership,
1888                      struct pipe_sampler_view **views)
1889 {
1890    if (!count && !unbind_num_trailing_slots)
1891       return;
1892 
1893    struct threaded_context *tc = threaded_context(_pipe);
1894    struct tc_sampler_views *p =
1895       tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1896                              views ? count : 0);
1897 
1898    p->shader = shader;
1899    p->start = start;
1900 
1901    if (views) {
1902       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1903 
1904       p->count = count;
1905       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1906 
1907       if (take_ownership) {
1908          memcpy(p->slot, views, sizeof(*views) * count);
1909 
1910          for (unsigned i = 0; i < count; i++) {
1911             if (views[i]) {
1912                if (views[i]->target == PIPE_BUFFER)
1913                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1914                                  views[i]->texture);
1915                else
1916                   tc_set_resource_batch_usage(tc, views[i]->texture);
1917             } else {
1918                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1919             }
1920          }
1921       } else {
1922          for (unsigned i = 0; i < count; i++) {
1923             p->slot[i] = NULL;
1924             pipe_sampler_view_reference(&p->slot[i], views[i]);
1925 
1926             if (views[i]) {
1927                if (views[i]->target == PIPE_BUFFER)
1928                   tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1929                                  views[i]->texture);
1930                else
1931                   tc_set_resource_batch_usage(tc, views[i]->texture);
1932             } else {
1933                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1934             }
1935          }
1936       }
1937 
1938       tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1939                         unbind_num_trailing_slots);
1940       tc->seen_sampler_buffers[shader] = true;
1941    } else {
1942       p->count = 0;
1943       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1944 
1945       tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1946                         count + unbind_num_trailing_slots);
1947    }
1948 }
1949 
1950 struct tc_shader_images {
1951    struct tc_call_base base;
1952    uint8_t shader, start, count;
1953    uint8_t unbind_num_trailing_slots;
1954    struct pipe_image_view slot[0]; /* more will be allocated if needed */
1955 };
1956 
1957 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call)1958 tc_call_set_shader_images(struct pipe_context *pipe, void *call)
1959 {
1960    struct tc_shader_images *p = (struct tc_shader_images *)call;
1961    unsigned count = p->count;
1962 
1963    if (!p->count) {
1964       pipe->set_shader_images(pipe, p->shader, p->start, 0,
1965                               p->unbind_num_trailing_slots, NULL);
1966       return call_size(tc_shader_images);
1967    }
1968 
1969    pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1970                            p->unbind_num_trailing_slots, p->slot);
1971 
1972    for (unsigned i = 0; i < count; i++)
1973       tc_drop_resource_reference(p->slot[i].resource);
1974 
1975    return p->base.num_slots;
1976 }
1977 
1978 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1979 tc_set_shader_images(struct pipe_context *_pipe,
1980                      enum pipe_shader_type shader,
1981                      unsigned start, unsigned count,
1982                      unsigned unbind_num_trailing_slots,
1983                      const struct pipe_image_view *images)
1984 {
1985    if (!count && !unbind_num_trailing_slots)
1986       return;
1987 
1988    struct threaded_context *tc = threaded_context(_pipe);
1989    struct tc_shader_images *p =
1990       tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1991                              images ? count : 0);
1992    unsigned writable_buffers = 0;
1993 
1994    p->shader = shader;
1995    p->start = start;
1996 
1997    if (images) {
1998       p->count = count;
1999       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
2000 
2001       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2002 
2003       for (unsigned i = 0; i < count; i++) {
2004          struct pipe_resource *resource = images[i].resource;
2005 
2006          tc_set_resource_reference(&p->slot[i].resource, resource);
2007 
2008          if (resource) {
2009             if (resource->target == PIPE_BUFFER) {
2010                tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
2011 
2012                if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
2013                   struct threaded_resource *tres = threaded_resource(resource);
2014 
2015                   tc_buffer_disable_cpu_storage(resource);
2016                   util_range_add(&tres->b, &tres->valid_buffer_range,
2017                                  images[i].u.buf.offset,
2018                                  images[i].u.buf.offset + images[i].u.buf.size);
2019                   writable_buffers |= BITFIELD_BIT(start + i);
2020                }
2021             } else {
2022                tc_set_resource_batch_usage(tc, resource);
2023             }
2024          } else {
2025             tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
2026          }
2027       }
2028       memcpy(p->slot, images, count * sizeof(images[0]));
2029 
2030       tc_unbind_buffers(&tc->image_buffers[shader][start + count],
2031                         unbind_num_trailing_slots);
2032       tc->seen_image_buffers[shader] = true;
2033    } else {
2034       p->count = 0;
2035       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
2036 
2037       tc_unbind_buffers(&tc->image_buffers[shader][start],
2038                         count + unbind_num_trailing_slots);
2039    }
2040 
2041    tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2042    tc->image_buffers_writeable_mask[shader] |= writable_buffers;
2043 }
2044 
2045 struct tc_shader_buffers {
2046    struct tc_call_base base;
2047    uint8_t shader, start, count;
2048    bool unbind;
2049    unsigned writable_bitmask;
2050    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
2051 };
2052 
2053 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call)2054 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call)
2055 {
2056    struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
2057    unsigned count = p->count;
2058 
2059    if (p->unbind) {
2060       pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
2061       return call_size(tc_shader_buffers);
2062    }
2063 
2064    pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
2065                             p->writable_bitmask);
2066 
2067    for (unsigned i = 0; i < count; i++)
2068       tc_drop_resource_reference(p->slot[i].buffer);
2069 
2070    return p->base.num_slots;
2071 }
2072 
2073 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)2074 tc_set_shader_buffers(struct pipe_context *_pipe,
2075                       enum pipe_shader_type shader,
2076                       unsigned start, unsigned count,
2077                       const struct pipe_shader_buffer *buffers,
2078                       unsigned writable_bitmask)
2079 {
2080    if (!count)
2081       return;
2082 
2083    struct threaded_context *tc = threaded_context(_pipe);
2084    struct tc_shader_buffers *p =
2085       tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
2086                              buffers ? count : 0);
2087 
2088    p->shader = shader;
2089    p->start = start;
2090    p->count = count;
2091    p->unbind = buffers == NULL;
2092    p->writable_bitmask = writable_bitmask;
2093 
2094    if (buffers) {
2095       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2096 
2097       for (unsigned i = 0; i < count; i++) {
2098          struct pipe_shader_buffer *dst = &p->slot[i];
2099          const struct pipe_shader_buffer *src = buffers + i;
2100 
2101          tc_set_resource_reference(&dst->buffer, src->buffer);
2102          dst->buffer_offset = src->buffer_offset;
2103          dst->buffer_size = src->buffer_size;
2104 
2105          if (src->buffer) {
2106             struct threaded_resource *tres = threaded_resource(src->buffer);
2107 
2108             tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
2109 
2110             if (writable_bitmask & BITFIELD_BIT(i)) {
2111                tc_buffer_disable_cpu_storage(src->buffer);
2112                util_range_add(&tres->b, &tres->valid_buffer_range,
2113                               src->buffer_offset,
2114                               src->buffer_offset + src->buffer_size);
2115             }
2116          } else {
2117             tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
2118          }
2119       }
2120       tc->seen_shader_buffers[shader] = true;
2121    } else {
2122       tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
2123    }
2124 
2125    tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
2126    tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
2127 }
2128 
2129 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call)2130 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call)
2131 {
2132    struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
2133    unsigned count = p->count;
2134 
2135    for (unsigned i = 0; i < count; i++)
2136       tc_assert(!p->slot[i].is_user_buffer);
2137 
2138    pipe->set_vertex_buffers(pipe, count, p->slot);
2139    return p->base.num_slots;
2140 }
2141 
2142 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned count,const struct pipe_vertex_buffer * buffers)2143 tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned count,
2144                       const struct pipe_vertex_buffer *buffers)
2145 {
2146    struct threaded_context *tc = threaded_context(_pipe);
2147 
2148    assert(!count || buffers);
2149 
2150    if (count) {
2151       struct tc_vertex_buffers *p =
2152          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2153       p->count = count;
2154 
2155       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2156 
2157       memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
2158 
2159       for (unsigned i = 0; i < count; i++) {
2160          struct pipe_resource *buf = buffers[i].buffer.resource;
2161 
2162          if (buf) {
2163             tc_bind_buffer(&tc->vertex_buffers[i], next, buf);
2164          } else {
2165             tc_unbind_buffer(&tc->vertex_buffers[i]);
2166          }
2167       }
2168    } else {
2169       struct tc_vertex_buffers *p =
2170          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
2171       p->count = 0;
2172    }
2173 
2174    /* We don't need to unbind trailing buffers because we never touch bindings
2175     * after num_vertex_buffers.
2176     */
2177    tc->num_vertex_buffers = count;
2178 }
2179 
2180 struct pipe_vertex_buffer *
tc_add_set_vertex_buffers_call(struct pipe_context * _pipe,unsigned count)2181 tc_add_set_vertex_buffers_call(struct pipe_context *_pipe, unsigned count)
2182 {
2183    struct threaded_context *tc = threaded_context(_pipe);
2184 
2185    /* We don't need to unbind trailing buffers because we never touch bindings
2186     * after num_vertex_buffers.
2187     */
2188    tc->num_vertex_buffers = count;
2189 
2190    struct tc_vertex_buffers *p =
2191       tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
2192    p->count = count;
2193    return p->slot;
2194 }
2195 
2196 struct tc_stream_outputs {
2197    struct tc_call_base base;
2198    unsigned count;
2199    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
2200    unsigned offsets[PIPE_MAX_SO_BUFFERS];
2201 };
2202 
2203 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call)2204 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call)
2205 {
2206    struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
2207    unsigned count = p->count;
2208 
2209    pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
2210    for (unsigned i = 0; i < count; i++)
2211       tc_drop_so_target_reference(p->targets[i]);
2212 
2213    return call_size(tc_stream_outputs);
2214 }
2215 
2216 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)2217 tc_set_stream_output_targets(struct pipe_context *_pipe,
2218                              unsigned count,
2219                              struct pipe_stream_output_target **tgs,
2220                              const unsigned *offsets)
2221 {
2222    struct threaded_context *tc = threaded_context(_pipe);
2223    struct tc_stream_outputs *p =
2224       tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
2225    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
2226 
2227    for (unsigned i = 0; i < count; i++) {
2228       p->targets[i] = NULL;
2229       pipe_so_target_reference(&p->targets[i], tgs[i]);
2230       if (tgs[i]) {
2231          tc_buffer_disable_cpu_storage(tgs[i]->buffer);
2232          tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
2233       } else {
2234          tc_unbind_buffer(&tc->streamout_buffers[i]);
2235       }
2236    }
2237    p->count = count;
2238    memcpy(p->offsets, offsets, count * sizeof(unsigned));
2239 
2240    tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
2241    if (count)
2242       tc->seen_streamout_buffers = true;
2243 }
2244 
2245 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)2246 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
2247                          unsigned count, struct pipe_surface **resources)
2248 {
2249    struct threaded_context *tc = threaded_context(_pipe);
2250    struct pipe_context *pipe = tc->pipe;
2251 
2252    tc_sync(tc);
2253    pipe->set_compute_resources(pipe, start, count, resources);
2254 }
2255 
2256 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)2257 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
2258                       unsigned count, struct pipe_resource **resources,
2259                       uint32_t **handles)
2260 {
2261    struct threaded_context *tc = threaded_context(_pipe);
2262    struct pipe_context *pipe = tc->pipe;
2263 
2264    tc_sync(tc);
2265    pipe->set_global_binding(pipe, first, count, resources, handles);
2266 }
2267 
2268 
2269 /********************************************************************
2270  * views
2271  */
2272 
2273 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)2274 tc_create_surface(struct pipe_context *_pipe,
2275                   struct pipe_resource *resource,
2276                   const struct pipe_surface *surf_tmpl)
2277 {
2278    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2279    struct pipe_surface *view =
2280          pipe->create_surface(pipe, resource, surf_tmpl);
2281 
2282    if (view)
2283       view->context = _pipe;
2284    return view;
2285 }
2286 
2287 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)2288 tc_surface_destroy(struct pipe_context *_pipe,
2289                    struct pipe_surface *surf)
2290 {
2291    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2292 
2293    pipe->surface_destroy(pipe, surf);
2294 }
2295 
2296 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)2297 tc_create_sampler_view(struct pipe_context *_pipe,
2298                        struct pipe_resource *resource,
2299                        const struct pipe_sampler_view *templ)
2300 {
2301    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2302    struct pipe_sampler_view *view =
2303          pipe->create_sampler_view(pipe, resource, templ);
2304 
2305    if (view)
2306       view->context = _pipe;
2307    return view;
2308 }
2309 
2310 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)2311 tc_sampler_view_destroy(struct pipe_context *_pipe,
2312                         struct pipe_sampler_view *view)
2313 {
2314    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2315 
2316    pipe->sampler_view_destroy(pipe, view);
2317 }
2318 
2319 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)2320 tc_create_stream_output_target(struct pipe_context *_pipe,
2321                                struct pipe_resource *res,
2322                                unsigned buffer_offset,
2323                                unsigned buffer_size)
2324 {
2325    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2326    struct threaded_resource *tres = threaded_resource(res);
2327    struct pipe_stream_output_target *view;
2328 
2329    util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
2330                   buffer_offset + buffer_size);
2331 
2332    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
2333                                             buffer_size);
2334    if (view)
2335       view->context = _pipe;
2336    return view;
2337 }
2338 
2339 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)2340 tc_stream_output_target_destroy(struct pipe_context *_pipe,
2341                                 struct pipe_stream_output_target *target)
2342 {
2343    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
2344 
2345    pipe->stream_output_target_destroy(pipe, target);
2346 }
2347 
2348 
2349 /********************************************************************
2350  * bindless
2351  */
2352 
2353 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2354 tc_create_texture_handle(struct pipe_context *_pipe,
2355                          struct pipe_sampler_view *view,
2356                          const struct pipe_sampler_state *state)
2357 {
2358    struct threaded_context *tc = threaded_context(_pipe);
2359    struct pipe_context *pipe = tc->pipe;
2360 
2361    tc_sync(tc);
2362    return pipe->create_texture_handle(pipe, view, state);
2363 }
2364 
2365 struct tc_make_texture_handle_resident {
2366    struct tc_call_base base;
2367    bool resident;
2368    uint64_t handle;
2369 };
2370 
2371 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call)2372 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call)
2373 {
2374    struct tc_make_texture_handle_resident *p =
2375       to_call(call, tc_make_texture_handle_resident);
2376 
2377    pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
2378    return call_size(tc_make_texture_handle_resident);
2379 }
2380 
2381 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)2382 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2383                                 bool resident)
2384 {
2385    struct threaded_context *tc = threaded_context(_pipe);
2386    struct tc_make_texture_handle_resident *p =
2387       tc_add_call(tc, TC_CALL_make_texture_handle_resident,
2388                   tc_make_texture_handle_resident);
2389 
2390    p->handle = handle;
2391    p->resident = resident;
2392 }
2393 
2394 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)2395 tc_create_image_handle(struct pipe_context *_pipe,
2396                        const struct pipe_image_view *image)
2397 {
2398    struct threaded_context *tc = threaded_context(_pipe);
2399    struct pipe_context *pipe = tc->pipe;
2400 
2401    if (image->resource->target == PIPE_BUFFER)
2402       tc_buffer_disable_cpu_storage(image->resource);
2403 
2404    tc_sync(tc);
2405    return pipe->create_image_handle(pipe, image);
2406 }
2407 
2408 struct tc_make_image_handle_resident {
2409    struct tc_call_base base;
2410    bool resident;
2411    unsigned access;
2412    uint64_t handle;
2413 };
2414 
2415 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call)2416 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call)
2417 {
2418    struct tc_make_image_handle_resident *p =
2419       to_call(call, tc_make_image_handle_resident);
2420 
2421    pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
2422    return call_size(tc_make_image_handle_resident);
2423 }
2424 
2425 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)2426 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
2427                               unsigned access, bool resident)
2428 {
2429    struct threaded_context *tc = threaded_context(_pipe);
2430    struct tc_make_image_handle_resident *p =
2431       tc_add_call(tc, TC_CALL_make_image_handle_resident,
2432                   tc_make_image_handle_resident);
2433 
2434    p->handle = handle;
2435    p->access = access;
2436    p->resident = resident;
2437 }
2438 
2439 
2440 /********************************************************************
2441  * transfer
2442  */
2443 
2444 struct tc_replace_buffer_storage {
2445    struct tc_call_base base;
2446    uint16_t num_rebinds;
2447    uint32_t rebind_mask;
2448    uint32_t delete_buffer_id;
2449    struct pipe_resource *dst;
2450    struct pipe_resource *src;
2451    tc_replace_buffer_storage_func func;
2452 };
2453 
2454 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call)2455 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call)
2456 {
2457    struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2458 
2459    p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2460 
2461    tc_drop_resource_reference(p->dst);
2462    tc_drop_resource_reference(p->src);
2463    return call_size(tc_replace_buffer_storage);
2464 }
2465 
2466 /* Return true if the buffer has been invalidated or is idle. */
2467 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)2468 tc_invalidate_buffer(struct threaded_context *tc,
2469                      struct threaded_resource *tbuf)
2470 {
2471    if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2472       /* It's idle, so invalidation would be a no-op, but we can still clear
2473        * the valid range because we are technically doing invalidation, but
2474        * skipping it because it's useless.
2475        *
2476        * If the buffer is bound for write, we can't invalidate the range.
2477        */
2478       if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2479          util_range_set_empty(&tbuf->valid_buffer_range);
2480       return true;
2481    }
2482 
2483    struct pipe_screen *screen = tc->base.screen;
2484    struct pipe_resource *new_buf;
2485 
2486    /* Shared, pinned, and sparse buffers can't be reallocated. */
2487    if (tbuf->is_shared ||
2488        tbuf->is_user_ptr ||
2489        tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2490       return false;
2491 
2492    /* Allocate a new one. */
2493    new_buf = screen->resource_create(screen, &tbuf->b);
2494    if (!new_buf)
2495       return false;
2496 
2497    /* Replace the "latest" pointer. */
2498    if (tbuf->latest != &tbuf->b)
2499       pipe_resource_reference(&tbuf->latest, NULL);
2500 
2501    tbuf->latest = new_buf;
2502 
2503    uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2504 
2505    /* Enqueue storage replacement of the original buffer. */
2506    struct tc_replace_buffer_storage *p =
2507       tc_add_call(tc, TC_CALL_replace_buffer_storage,
2508                   tc_replace_buffer_storage);
2509 
2510    p->func = tc->replace_buffer_storage;
2511    tc_set_resource_reference(&p->dst, &tbuf->b);
2512    tc_set_resource_reference(&p->src, new_buf);
2513    p->delete_buffer_id = delete_buffer_id;
2514    p->rebind_mask = 0;
2515 
2516    /* Treat the current buffer as the new buffer. */
2517    bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2518    p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2519                                      threaded_resource(new_buf)->buffer_id_unique,
2520                                      &p->rebind_mask);
2521 
2522    /* If the buffer is not bound for write, clear the valid range. */
2523    if (!bound_for_write)
2524       util_range_set_empty(&tbuf->valid_buffer_range);
2525 
2526    tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2527    threaded_resource(new_buf)->buffer_id_unique = 0;
2528 
2529    return true;
2530 }
2531 
2532 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2533 tc_improve_map_buffer_flags(struct threaded_context *tc,
2534                             struct threaded_resource *tres, unsigned usage,
2535                             unsigned offset, unsigned size)
2536 {
2537    /* Never invalidate inside the driver and never infer "unsynchronized". */
2538    unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2539                        TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2540 
2541    /* Prevent a reentry. */
2542    if (usage & tc_flags)
2543       return usage;
2544 
2545    /* Use the staging upload if it's preferred. */
2546    if (usage & (PIPE_MAP_DISCARD_RANGE |
2547                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2548        !(usage & PIPE_MAP_PERSISTENT) &&
2549        tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2550        tc->use_forced_staging_uploads) {
2551       usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2552                  PIPE_MAP_UNSYNCHRONIZED);
2553 
2554       return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2555    }
2556 
2557    /* Sparse buffers can't be mapped directly and can't be reallocated
2558     * (fully invalidated). That may just be a radeonsi limitation, but
2559     * the threaded context must obey it with radeonsi.
2560     */
2561    if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2562       /* We can use DISCARD_RANGE instead of full discard. This is the only
2563        * fast path for sparse buffers that doesn't need thread synchronization.
2564        */
2565       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2566          usage |= PIPE_MAP_DISCARD_RANGE;
2567 
2568       /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2569        * The threaded context doesn't do unsychronized mappings and invalida-
2570        * tions of sparse buffers, therefore a correct driver behavior won't
2571        * result in an incorrect behavior with the threaded context.
2572        */
2573       return usage;
2574    }
2575 
2576    usage |= tc_flags;
2577 
2578    /* Handle CPU reads trivially. */
2579    if (usage & PIPE_MAP_READ) {
2580       if (usage & PIPE_MAP_UNSYNCHRONIZED)
2581          usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2582 
2583       /* Drivers aren't allowed to do buffer invalidations. */
2584       return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2585    }
2586 
2587    /* See if the buffer range being mapped has never been initialized or
2588     * the buffer is idle, in which case it can be mapped unsynchronized. */
2589    if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2590        ((!tres->is_shared &&
2591          !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2592         !tc_is_buffer_busy(tc, tres, usage)))
2593       usage |= PIPE_MAP_UNSYNCHRONIZED;
2594 
2595    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2596       /* If discarding the entire range, discard the whole resource instead. */
2597       if (usage & PIPE_MAP_DISCARD_RANGE &&
2598           offset == 0 && size == tres->b.width0)
2599          usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2600 
2601       /* Discard the whole resource if needed. */
2602       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2603          if (tc_invalidate_buffer(tc, tres))
2604             usage |= PIPE_MAP_UNSYNCHRONIZED;
2605          else
2606             usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2607       }
2608    }
2609 
2610    /* We won't need this flag anymore. */
2611    /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2612    usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2613 
2614    /* GL_AMD_pinned_memory and persistent mappings can't use staging
2615     * buffers. */
2616    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2617                 PIPE_MAP_PERSISTENT) ||
2618        tres->is_user_ptr)
2619       usage &= ~PIPE_MAP_DISCARD_RANGE;
2620 
2621    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2622    if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2623       usage &= ~PIPE_MAP_DISCARD_RANGE;
2624       usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2625    }
2626 
2627    return usage;
2628 }
2629 
2630 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2631 tc_buffer_map(struct pipe_context *_pipe,
2632               struct pipe_resource *resource, unsigned level,
2633               unsigned usage, const struct pipe_box *box,
2634               struct pipe_transfer **transfer)
2635 {
2636    struct threaded_context *tc = threaded_context(_pipe);
2637    struct threaded_resource *tres = threaded_resource(resource);
2638    struct pipe_context *pipe = tc->pipe;
2639 
2640    /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2641     * this shouldn't normally be necessary because glthread only uses large buffers.
2642     */
2643    if (usage & PIPE_MAP_THREAD_SAFE)
2644       tc_buffer_disable_cpu_storage(resource);
2645 
2646    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2647 
2648    /* If the CPU storage is enabled, return it directly. */
2649    if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2650       /* We can't let resource_copy_region disable the CPU storage. */
2651       assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2652 
2653       if (!tres->cpu_storage) {
2654          tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2655 
2656          if (tres->cpu_storage && tres->valid_buffer_range.end) {
2657             /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2658             struct pipe_box box2;
2659             struct pipe_transfer *transfer2;
2660 
2661             unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2662             u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2663 
2664             tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2665             tc_set_driver_thread(tc);
2666 
2667             void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2668                                          0, PIPE_MAP_READ, &box2, &transfer2);
2669             memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2670                    ret,
2671                    valid_range_len);
2672             pipe->buffer_unmap(pipe, transfer2);
2673 
2674             tc_clear_driver_thread(tc);
2675          }
2676       }
2677 
2678       if (tres->cpu_storage) {
2679          struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2680          ttrans->b.resource = resource;
2681          ttrans->b.usage = usage;
2682          ttrans->b.box = *box;
2683          ttrans->valid_buffer_range = &tres->valid_buffer_range;
2684          ttrans->cpu_storage_mapped = true;
2685          *transfer = &ttrans->b;
2686 
2687          return (uint8_t*)tres->cpu_storage + box->x;
2688       } else {
2689          tres->allow_cpu_storage = false;
2690       }
2691    }
2692 
2693    /* Do a staging transfer within the threaded context. The driver should
2694     * only get resource_copy_region.
2695     */
2696    if (usage & PIPE_MAP_DISCARD_RANGE) {
2697       struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2698       uint8_t *map;
2699 
2700       u_upload_alloc(tc->base.stream_uploader, 0,
2701                      box->width + (box->x % tc->map_buffer_alignment),
2702                      tc->map_buffer_alignment, &ttrans->b.offset,
2703                      &ttrans->staging, (void**)&map);
2704       if (!map) {
2705          slab_free(&tc->pool_transfers, ttrans);
2706          return NULL;
2707       }
2708 
2709       ttrans->b.resource = resource;
2710       ttrans->b.level = 0;
2711       ttrans->b.usage = usage;
2712       ttrans->b.box = *box;
2713       ttrans->b.stride = 0;
2714       ttrans->b.layer_stride = 0;
2715       ttrans->valid_buffer_range = &tres->valid_buffer_range;
2716       ttrans->cpu_storage_mapped = false;
2717       *transfer = &ttrans->b;
2718 
2719       p_atomic_inc(&tres->pending_staging_uploads);
2720       util_range_add(resource, &tres->pending_staging_uploads_range,
2721                      box->x, box->x + box->width);
2722 
2723       return map + (box->x % tc->map_buffer_alignment);
2724    }
2725 
2726    if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2727        p_atomic_read(&tres->pending_staging_uploads) &&
2728        util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2729       /* Write conflict detected between a staging transfer and the direct mapping we're
2730        * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2731        * will have to wait for the staging transfer completion.
2732        * Note: The conflict detection is only based on the mapped range, not on the actual
2733        * written range(s).
2734        */
2735       usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2736       tc->use_forced_staging_uploads = false;
2737    }
2738 
2739    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2740    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2741       tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2742                       usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2743       tc_set_driver_thread(tc);
2744    }
2745 
2746    tc->bytes_mapped_estimate += box->width;
2747 
2748    void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2749                                 level, usage, box, transfer);
2750    threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2751    threaded_transfer(*transfer)->cpu_storage_mapped = false;
2752 
2753    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2754       tc_clear_driver_thread(tc);
2755 
2756    return ret;
2757 }
2758 
2759 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2760 tc_texture_map(struct pipe_context *_pipe,
2761                struct pipe_resource *resource, unsigned level,
2762                unsigned usage, const struct pipe_box *box,
2763                struct pipe_transfer **transfer)
2764 {
2765    struct threaded_context *tc = threaded_context(_pipe);
2766    struct threaded_resource *tres = threaded_resource(resource);
2767    struct pipe_context *pipe = tc->pipe;
2768 
2769    tc_sync_msg(tc, "texture");
2770    tc_set_driver_thread(tc);
2771    /* block all unsync texture subdata during map */
2772    tc_set_resource_batch_usage_persistent(tc, resource, true);
2773 
2774    tc->bytes_mapped_estimate += box->width;
2775 
2776    void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2777                                  level, usage, box, transfer);
2778 
2779    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2780       tc_clear_driver_thread(tc);
2781 
2782    return ret;
2783 }
2784 
2785 struct tc_transfer_flush_region {
2786    struct tc_call_base base;
2787    struct pipe_box box;
2788    struct pipe_transfer *transfer;
2789 };
2790 
2791 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call)2792 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call)
2793 {
2794    struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2795 
2796    pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2797    return call_size(tc_transfer_flush_region);
2798 }
2799 
2800 struct tc_resource_copy_region {
2801    struct tc_call_base base;
2802    unsigned dst_level;
2803    unsigned dstx, dsty, dstz;
2804    unsigned src_level;
2805    struct pipe_box src_box;
2806    struct pipe_resource *dst;
2807    struct pipe_resource *src;
2808 };
2809 
2810 static void
2811 tc_resource_copy_region(struct pipe_context *_pipe,
2812                         struct pipe_resource *dst, unsigned dst_level,
2813                         unsigned dstx, unsigned dsty, unsigned dstz,
2814                         struct pipe_resource *src, unsigned src_level,
2815                         const struct pipe_box *src_box);
2816 
2817 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2818 tc_buffer_do_flush_region(struct threaded_context *tc,
2819                           struct threaded_transfer *ttrans,
2820                           const struct pipe_box *box)
2821 {
2822    struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2823 
2824    if (ttrans->staging) {
2825       struct pipe_box src_box;
2826 
2827       u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2828                (box->x - ttrans->b.box.x),
2829                box->width, &src_box);
2830 
2831       /* Copy the staging buffer into the original one. */
2832       tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2833                               ttrans->staging, 0, &src_box);
2834    }
2835 
2836    /* Don't update the valid range when we're uploading the CPU storage
2837     * because it includes the uninitialized range too.
2838     */
2839    if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2840       util_range_add(&tres->b, ttrans->valid_buffer_range,
2841                      box->x, box->x + box->width);
2842    }
2843 }
2844 
2845 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2846 tc_transfer_flush_region(struct pipe_context *_pipe,
2847                          struct pipe_transfer *transfer,
2848                          const struct pipe_box *rel_box)
2849 {
2850    struct threaded_context *tc = threaded_context(_pipe);
2851    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2852    struct threaded_resource *tres = threaded_resource(transfer->resource);
2853    unsigned required_usage = PIPE_MAP_WRITE |
2854                              PIPE_MAP_FLUSH_EXPLICIT;
2855 
2856    if (tres->b.target == PIPE_BUFFER) {
2857       if ((transfer->usage & required_usage) == required_usage) {
2858          struct pipe_box box;
2859 
2860          u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2861          tc_buffer_do_flush_region(tc, ttrans, &box);
2862       }
2863 
2864       /* Staging transfers don't send the call to the driver.
2865        *
2866        * Transfers using the CPU storage shouldn't call transfer_flush_region
2867        * in the driver because the buffer is not really mapped on the driver
2868        * side and the CPU storage always re-uploads everything (flush_region
2869        * makes no difference).
2870        */
2871       if (ttrans->staging || ttrans->cpu_storage_mapped)
2872          return;
2873    }
2874 
2875    struct tc_transfer_flush_region *p =
2876       tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2877    p->transfer = transfer;
2878    p->box = *rel_box;
2879 }
2880 
2881 static void
2882 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2883          unsigned flags);
2884 
2885 struct tc_buffer_unmap {
2886    struct tc_call_base base;
2887    bool was_staging_transfer;
2888    union {
2889       struct pipe_transfer *transfer;
2890       struct pipe_resource *resource;
2891    };
2892 };
2893 
2894 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call)2895 tc_call_buffer_unmap(struct pipe_context *pipe, void *call)
2896 {
2897    struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2898 
2899    if (p->was_staging_transfer) {
2900       struct threaded_resource *tres = threaded_resource(p->resource);
2901       /* Nothing to do except keeping track of staging uploads */
2902       assert(tres->pending_staging_uploads > 0);
2903       p_atomic_dec(&tres->pending_staging_uploads);
2904       tc_drop_resource_reference(p->resource);
2905    } else {
2906       pipe->buffer_unmap(pipe, p->transfer);
2907    }
2908 
2909    return call_size(tc_buffer_unmap);
2910 }
2911 
2912 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2913 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2914 {
2915    struct threaded_context *tc = threaded_context(_pipe);
2916    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2917    struct threaded_resource *tres = threaded_resource(transfer->resource);
2918 
2919    /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2920     * called from any thread and bypasses all multithreaded queues.
2921     */
2922    if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2923       assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2924       assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2925                                   PIPE_MAP_DISCARD_RANGE)));
2926 
2927       struct pipe_context *pipe = tc->pipe;
2928       util_range_add(&tres->b, ttrans->valid_buffer_range,
2929                       transfer->box.x, transfer->box.x + transfer->box.width);
2930 
2931       pipe->buffer_unmap(pipe, transfer);
2932       return;
2933    }
2934 
2935    if (transfer->usage & PIPE_MAP_WRITE &&
2936        !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2937       tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2938 
2939    if (ttrans->cpu_storage_mapped) {
2940       /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2941        * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2942        * If that happens, we just ignore the unmap call and don't upload anything to prevent
2943        * a crash.
2944        *
2945        * Disallow the CPU storage in the driver to work around this.
2946        */
2947       assert(tres->cpu_storage);
2948 
2949       if (tres->cpu_storage) {
2950          tc_invalidate_buffer(tc, tres);
2951          tc_buffer_subdata(&tc->base, &tres->b,
2952                            PIPE_MAP_UNSYNCHRONIZED |
2953                            TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2954                            0, tres->b.width0, tres->cpu_storage);
2955          /* This shouldn't have been freed by buffer_subdata. */
2956          assert(tres->cpu_storage);
2957       } else {
2958          static bool warned_once = false;
2959          if (!warned_once) {
2960             fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2961             fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2962             warned_once = true;
2963          }
2964       }
2965 
2966       tc_drop_resource_reference(ttrans->staging);
2967       slab_free(&tc->pool_transfers, ttrans);
2968       return;
2969    }
2970 
2971    bool was_staging_transfer = false;
2972 
2973    if (ttrans->staging) {
2974       was_staging_transfer = true;
2975 
2976       tc_drop_resource_reference(ttrans->staging);
2977       slab_free(&tc->pool_transfers, ttrans);
2978    }
2979 
2980    struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2981                                            tc_buffer_unmap);
2982    if (was_staging_transfer) {
2983       tc_set_resource_reference(&p->resource, &tres->b);
2984       p->was_staging_transfer = true;
2985    } else {
2986       p->transfer = transfer;
2987       p->was_staging_transfer = false;
2988    }
2989 
2990    /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2991     * defers the unmap operation to the batch execution.
2992     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2993     * and if it goes over an optional limit the current batch is flushed,
2994     * to reclaim some RAM. */
2995    if (!ttrans->staging && tc->bytes_mapped_limit &&
2996        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2997       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2998    }
2999 }
3000 
3001 struct tc_texture_unmap {
3002    struct tc_call_base base;
3003    struct pipe_transfer *transfer;
3004 };
3005 
3006 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call)3007 tc_call_texture_unmap(struct pipe_context *pipe, void *call)
3008 {
3009    struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
3010 
3011    pipe->texture_unmap(pipe, p->transfer);
3012    return call_size(tc_texture_unmap);
3013 }
3014 
3015 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)3016 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
3017 {
3018    struct threaded_context *tc = threaded_context(_pipe);
3019    struct threaded_transfer *ttrans = threaded_transfer(transfer);
3020 
3021    /* enable subdata again once resource is no longer mapped */
3022    tc_set_resource_batch_usage_persistent(tc, transfer->resource, false);
3023 
3024    tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
3025 
3026    /* tc_texture_map directly maps the textures, but tc_texture_unmap
3027     * defers the unmap operation to the batch execution.
3028     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
3029     * and if it goes over an optional limit the current batch is flushed,
3030     * to reclaim some RAM. */
3031    if (!ttrans->staging && tc->bytes_mapped_limit &&
3032        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
3033       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
3034    }
3035 }
3036 
3037 struct tc_buffer_subdata {
3038    struct tc_call_base base;
3039    unsigned usage, offset, size;
3040    struct pipe_resource *resource;
3041    char slot[0]; /* more will be allocated if needed */
3042 };
3043 
3044 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call)3045 tc_call_buffer_subdata(struct pipe_context *pipe, void *call)
3046 {
3047    struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
3048 
3049    pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
3050                         p->slot);
3051    tc_drop_resource_reference(p->resource);
3052    return p->base.num_slots;
3053 }
3054 
3055 static bool
is_mergeable_buffer_subdata(const struct tc_call_base * previous_call,unsigned usage,unsigned offset,struct pipe_resource * resource)3056 is_mergeable_buffer_subdata(const struct tc_call_base *previous_call,
3057                             unsigned usage, unsigned offset,
3058                             struct pipe_resource *resource)
3059 {
3060    if (!previous_call || previous_call->call_id != TC_CALL_buffer_subdata)
3061       return false;
3062 
3063    struct tc_buffer_subdata *subdata = (struct tc_buffer_subdata *)previous_call;
3064 
3065    return subdata->usage == usage && subdata->resource == resource
3066           && (subdata->offset + subdata->size) == offset;
3067 }
3068 
3069 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)3070 tc_buffer_subdata(struct pipe_context *_pipe,
3071                   struct pipe_resource *resource,
3072                   unsigned usage, unsigned offset,
3073                   unsigned size, const void *data)
3074 {
3075    struct threaded_context *tc = threaded_context(_pipe);
3076    struct threaded_resource *tres = threaded_resource(resource);
3077 
3078    if (!size)
3079       return;
3080 
3081    usage |= PIPE_MAP_WRITE;
3082 
3083    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
3084    if (!(usage & PIPE_MAP_DIRECTLY))
3085       usage |= PIPE_MAP_DISCARD_RANGE;
3086 
3087    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
3088 
3089    /* Unsychronized and big transfers should use transfer_map. Also handle
3090     * full invalidations, because drivers aren't allowed to do them.
3091     */
3092    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
3093                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
3094        size > TC_MAX_SUBDATA_BYTES ||
3095        tres->cpu_storage) {
3096       struct pipe_transfer *transfer;
3097       struct pipe_box box;
3098       uint8_t *map = NULL;
3099 
3100       u_box_1d(offset, size, &box);
3101 
3102       /* CPU storage is only useful for partial updates. It can add overhead
3103        * on glBufferData calls so avoid using it.
3104        */
3105       if (!tres->cpu_storage && offset == 0 && size == resource->width0)
3106          usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
3107 
3108       map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
3109       if (map) {
3110          memcpy(map, data, size);
3111          tc_buffer_unmap(_pipe, transfer);
3112       }
3113       return;
3114    }
3115 
3116    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3117 
3118    /* We can potentially merge this subdata call with the previous one (if any),
3119     * if the application does a whole-buffer upload piecewise. */
3120    {
3121       struct tc_call_base *last_call = tc_get_last_mergeable_call(tc);
3122       struct tc_buffer_subdata *merge_dest = (struct tc_buffer_subdata *)last_call;
3123 
3124       if (is_mergeable_buffer_subdata(last_call, usage, offset, resource) &&
3125          tc_enlarge_last_mergeable_call(tc, call_size_with_slots(tc_buffer_subdata, merge_dest->size + size))) {
3126          memcpy(merge_dest->slot + merge_dest->size, data, size);
3127          merge_dest->size += size;
3128 
3129          /* TODO: We *could* do an invalidate + upload here if we detect that
3130           * the merged subdata call overwrites the entire buffer. However, that's
3131           * a little complicated since we can't add further calls to our batch
3132           * until we have removed the merged subdata call, which means that
3133           * calling tc_invalidate_buffer before we have removed the call will
3134           * blow things up.
3135           *
3136           * Just leave a large, merged subdata call in the batch for now, which is
3137           * at least better than tons of tiny subdata calls.
3138           */
3139 
3140          return;
3141       }
3142    }
3143 
3144    /* The upload is small. Enqueue it. */
3145    struct tc_buffer_subdata *p =
3146       tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
3147 
3148    tc_set_resource_reference(&p->resource, resource);
3149    /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
3150     * _flags would set UNSYNCHRONIZED and we wouldn't get here.
3151     */
3152    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
3153    p->usage = usage;
3154    p->offset = offset;
3155    p->size = size;
3156    memcpy(p->slot, data, size);
3157 
3158    tc_mark_call_mergeable(tc, &p->base);
3159 }
3160 
3161 struct tc_texture_subdata {
3162    struct tc_call_base base;
3163    unsigned level, usage, stride;
3164    struct pipe_box box;
3165    struct pipe_resource *resource;
3166    uintptr_t layer_stride;
3167    char slot[0]; /* more will be allocated if needed */
3168 };
3169 
3170 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call)3171 tc_call_texture_subdata(struct pipe_context *pipe, void *call)
3172 {
3173    struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
3174 
3175    pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
3176                          p->slot, p->stride, p->layer_stride);
3177    tc_drop_resource_reference(p->resource);
3178    return p->base.num_slots;
3179 }
3180 
3181 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,uintptr_t layer_stride)3182 tc_texture_subdata(struct pipe_context *_pipe,
3183                    struct pipe_resource *resource,
3184                    unsigned level, unsigned usage,
3185                    const struct pipe_box *box,
3186                    const void *data, unsigned stride,
3187                    uintptr_t layer_stride)
3188 {
3189    struct threaded_context *tc = threaded_context(_pipe);
3190    uint64_t size;
3191 
3192    assert(box->height >= 1);
3193    assert(box->depth >= 1);
3194 
3195    size = (box->depth - 1) * layer_stride +
3196           (box->height - 1) * (uint64_t)stride +
3197           box->width * util_format_get_blocksize(resource->format);
3198    if (!size)
3199       return;
3200 
3201    /* Small uploads can be enqueued, big uploads must sync. */
3202    if (size <= TC_MAX_SUBDATA_BYTES) {
3203       struct tc_texture_subdata *p =
3204          tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
3205 
3206       tc_set_resource_batch_usage(tc, resource);
3207       tc_set_resource_reference(&p->resource, resource);
3208       p->level = level;
3209       p->usage = usage;
3210       p->box = *box;
3211       p->stride = stride;
3212       p->layer_stride = layer_stride;
3213       memcpy(p->slot, data, size);
3214    } else {
3215       struct pipe_context *pipe = tc->pipe;
3216       struct threaded_resource *tres = threaded_resource(resource);
3217       unsigned unsync_usage = TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_WRITE;
3218       bool can_unsync = !tc_resource_batch_usage_test_busy(tc, resource) &&
3219                         tc->options.is_resource_busy &&
3220                         !tc->options.is_resource_busy(tc->pipe->screen, tres->latest, usage | unsync_usage);
3221 
3222       if (!can_unsync && resource->usage != PIPE_USAGE_STAGING &&
3223           tc->options.parse_renderpass_info && tc->in_renderpass) {
3224          enum pipe_format format = resource->format;
3225          if (usage & PIPE_MAP_DEPTH_ONLY)
3226             format = util_format_get_depth_only(format);
3227          else if (usage & PIPE_MAP_STENCIL_ONLY)
3228             format = PIPE_FORMAT_S8_UINT;
3229 
3230          unsigned fmt_stride = util_format_get_stride(format, box->width);
3231          uint64_t fmt_layer_stride = util_format_get_2d_size(format, stride, box->height);
3232          assert(fmt_layer_stride * box->depth <= UINT32_MAX);
3233 
3234          struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth);
3235          pipe->buffer_subdata(pipe, pres, unsync_usage, 0, layer_stride * box->depth, data);
3236          struct pipe_box src_box = *box;
3237          src_box.x = src_box.y = src_box.z = 0;
3238 
3239          if (fmt_stride == stride && fmt_layer_stride == layer_stride) {
3240             /* if stride matches, single copy is fine*/
3241             tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box);
3242          } else {
3243             /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */
3244             src_box.depth = 1;
3245             for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) {
3246                unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z;
3247                int blocksize = util_format_get_blocksize(format);
3248                int blockwidth = util_format_get_blockwidth(format);
3249                int blockheight = util_format_get_blockheight(format);
3250 
3251                assert(blocksize > 0);
3252                assert(blockwidth > 0);
3253                assert(blockheight > 0);
3254 
3255                dst_x /= blockwidth;
3256                dst_y /= blockheight;
3257                width = DIV_ROUND_UP(width, blockwidth);
3258                height = DIV_ROUND_UP(height, blockheight);
3259 
3260                width *= blocksize;
3261 
3262                if (width == fmt_stride && width == (unsigned)stride) {
3263                   ASSERTED uint64_t size = (uint64_t)height * width;
3264 
3265                   assert(size <= SIZE_MAX);
3266                   assert(dst_x + src_box.width < u_minify(pres->width0, level));
3267                   assert(dst_y + src_box.height < u_minify(pres->height0, level));
3268                   assert(pres->target != PIPE_TEXTURE_3D ||  z + src_box.depth < u_minify(pres->depth0, level));
3269                   tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3270                } else {
3271                   src_box.height = 1;
3272                   for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride)
3273                      tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
3274                }
3275             }
3276          }
3277 
3278          pipe_resource_reference(&pres, NULL);
3279       } else {
3280          if (can_unsync) {
3281             usage |= unsync_usage;
3282          } else {
3283             tc_sync(tc);
3284             tc_set_driver_thread(tc);
3285          }
3286          pipe->texture_subdata(pipe, resource, level, usage, box, data,
3287                               stride, layer_stride);
3288          if (!can_unsync)
3289             tc_clear_driver_thread(tc);
3290       }
3291    }
3292 }
3293 
3294 
3295 /********************************************************************
3296  * miscellaneous
3297  */
3298 
3299 #define TC_FUNC_SYNC_RET0(ret_type, func) \
3300    static ret_type \
3301    tc_##func(struct pipe_context *_pipe) \
3302    { \
3303       struct threaded_context *tc = threaded_context(_pipe); \
3304       struct pipe_context *pipe = tc->pipe; \
3305       tc_sync(tc); \
3306       return pipe->func(pipe); \
3307    }
3308 
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)3309 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
3310 
3311 static void
3312 tc_get_sample_position(struct pipe_context *_pipe,
3313                        unsigned sample_count, unsigned sample_index,
3314                        float *out_value)
3315 {
3316    struct threaded_context *tc = threaded_context(_pipe);
3317    struct pipe_context *pipe = tc->pipe;
3318 
3319    pipe->get_sample_position(pipe, sample_count, sample_index,
3320                              out_value);
3321 }
3322 
3323 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)3324 tc_get_device_reset_status(struct pipe_context *_pipe)
3325 {
3326    struct threaded_context *tc = threaded_context(_pipe);
3327    struct pipe_context *pipe = tc->pipe;
3328 
3329    if (!tc->options.unsynchronized_get_device_reset_status)
3330       tc_sync(tc);
3331 
3332    return pipe->get_device_reset_status(pipe);
3333 }
3334 
3335 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)3336 tc_set_device_reset_callback(struct pipe_context *_pipe,
3337                              const struct pipe_device_reset_callback *cb)
3338 {
3339    struct threaded_context *tc = threaded_context(_pipe);
3340    struct pipe_context *pipe = tc->pipe;
3341 
3342    tc_sync(tc);
3343    pipe->set_device_reset_callback(pipe, cb);
3344 }
3345 
3346 struct tc_string_marker {
3347    struct tc_call_base base;
3348    int len;
3349    char slot[0]; /* more will be allocated if needed */
3350 };
3351 
3352 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call)3353 tc_call_emit_string_marker(struct pipe_context *pipe, void *call)
3354 {
3355    struct tc_string_marker *p = (struct tc_string_marker *)call;
3356    pipe->emit_string_marker(pipe, p->slot, p->len);
3357    return p->base.num_slots;
3358 }
3359 
3360 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)3361 tc_emit_string_marker(struct pipe_context *_pipe,
3362                       const char *string, int len)
3363 {
3364    struct threaded_context *tc = threaded_context(_pipe);
3365 
3366    if (len <= TC_MAX_STRING_MARKER_BYTES) {
3367       struct tc_string_marker *p =
3368          tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
3369 
3370       memcpy(p->slot, string, len);
3371       p->len = len;
3372    } else {
3373       struct pipe_context *pipe = tc->pipe;
3374 
3375       tc_sync(tc);
3376       tc_set_driver_thread(tc);
3377       pipe->emit_string_marker(pipe, string, len);
3378       tc_clear_driver_thread(tc);
3379    }
3380 }
3381 
3382 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)3383 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
3384                     unsigned flags)
3385 {
3386    struct threaded_context *tc = threaded_context(_pipe);
3387    struct pipe_context *pipe = tc->pipe;
3388 
3389    tc_sync(tc);
3390    pipe->dump_debug_state(pipe, stream, flags);
3391 }
3392 
3393 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct util_debug_callback * cb)3394 tc_set_debug_callback(struct pipe_context *_pipe,
3395                       const struct util_debug_callback *cb)
3396 {
3397    struct threaded_context *tc = threaded_context(_pipe);
3398    struct pipe_context *pipe = tc->pipe;
3399 
3400    tc_sync(tc);
3401 
3402    /* Drop all synchronous debug callbacks. Drivers are expected to be OK
3403     * with this. shader-db will use an environment variable to disable
3404     * the threaded context.
3405     */
3406    if (cb && !cb->async)
3407       pipe->set_debug_callback(pipe, NULL);
3408    else
3409       pipe->set_debug_callback(pipe, cb);
3410 }
3411 
3412 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)3413 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
3414 {
3415    struct threaded_context *tc = threaded_context(_pipe);
3416    struct pipe_context *pipe = tc->pipe;
3417 
3418    tc_sync(tc);
3419    pipe->set_log_context(pipe, log);
3420 }
3421 
3422 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)3423 tc_create_fence_fd(struct pipe_context *_pipe,
3424                    struct pipe_fence_handle **fence, int fd,
3425                    enum pipe_fd_type type)
3426 {
3427    struct threaded_context *tc = threaded_context(_pipe);
3428    struct pipe_context *pipe = tc->pipe;
3429 
3430    if (!tc->options.unsynchronized_create_fence_fd)
3431       tc_sync(tc);
3432 
3433    pipe->create_fence_fd(pipe, fence, fd, type);
3434 }
3435 
3436 struct tc_fence_call {
3437    struct tc_call_base base;
3438    struct pipe_fence_handle *fence;
3439 };
3440 
3441 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call)3442 tc_call_fence_server_sync(struct pipe_context *pipe, void *call)
3443 {
3444    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
3445 
3446    pipe->fence_server_sync(pipe, fence);
3447    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
3448    return call_size(tc_fence_call);
3449 }
3450 
3451 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3452 tc_fence_server_sync(struct pipe_context *_pipe,
3453                      struct pipe_fence_handle *fence)
3454 {
3455    struct threaded_context *tc = threaded_context(_pipe);
3456    struct pipe_screen *screen = tc->pipe->screen;
3457    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
3458                                             tc_fence_call);
3459 
3460    call->fence = NULL;
3461    screen->fence_reference(screen, &call->fence, fence);
3462 }
3463 
3464 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)3465 tc_fence_server_signal(struct pipe_context *_pipe,
3466                            struct pipe_fence_handle *fence)
3467 {
3468    struct threaded_context *tc = threaded_context(_pipe);
3469    struct pipe_context *pipe = tc->pipe;
3470    tc_sync(tc);
3471    pipe->fence_server_signal(pipe, fence);
3472 }
3473 
3474 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)3475 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
3476                       UNUSED const struct pipe_video_codec *templ)
3477 {
3478    unreachable("Threaded context should not be enabled for video APIs");
3479    return NULL;
3480 }
3481 
3482 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)3483 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
3484                        UNUSED const struct pipe_video_buffer *templ)
3485 {
3486    unreachable("Threaded context should not be enabled for video APIs");
3487    return NULL;
3488 }
3489 
3490 struct tc_context_param {
3491    struct tc_call_base base;
3492    enum pipe_context_param param;
3493    unsigned value;
3494 };
3495 
3496 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call)3497 tc_call_set_context_param(struct pipe_context *pipe, void *call)
3498 {
3499    struct tc_context_param *p = to_call(call, tc_context_param);
3500 
3501    if (pipe->set_context_param)
3502       pipe->set_context_param(pipe, p->param, p->value);
3503 
3504    return call_size(tc_context_param);
3505 }
3506 
3507 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)3508 tc_set_context_param(struct pipe_context *_pipe,
3509                            enum pipe_context_param param,
3510                            unsigned value)
3511 {
3512    struct threaded_context *tc = threaded_context(_pipe);
3513 
3514    if (param == PIPE_CONTEXT_PARAM_UPDATE_THREAD_SCHEDULING) {
3515       util_thread_sched_apply_policy(tc->queue.threads[0],
3516                                      UTIL_THREAD_THREADED_CONTEXT, value,
3517                                      NULL);
3518 
3519       /* Execute this immediately (without enqueuing).
3520        * It's required to be thread-safe.
3521        */
3522       struct pipe_context *pipe = tc->pipe;
3523       if (pipe->set_context_param)
3524          pipe->set_context_param(pipe, param, value);
3525       return;
3526    }
3527 
3528    if (tc->pipe->set_context_param) {
3529       struct tc_context_param *call =
3530          tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
3531 
3532       call->param = param;
3533       call->value = value;
3534    }
3535 }
3536 
3537 
3538 /********************************************************************
3539  * draw, launch, clear, blit, copy, flush
3540  */
3541 
3542 struct tc_flush_deferred_call {
3543    struct tc_call_base base;
3544    unsigned flags;
3545    struct pipe_fence_handle *fence;
3546 };
3547 
3548 struct tc_flush_call {
3549    struct tc_call_base base;
3550    unsigned flags;
3551    struct pipe_fence_handle *fence;
3552    struct threaded_context *tc;
3553 };
3554 
3555 static void
tc_flush_queries(struct threaded_context * tc)3556 tc_flush_queries(struct threaded_context *tc)
3557 {
3558    struct threaded_query *tq, *tmp;
3559    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3560       list_del(&tq->head_unflushed);
3561 
3562       /* Memory release semantics: due to a possible race with
3563        * tc_get_query_result, we must ensure that the linked list changes
3564        * are visible before setting tq->flushed.
3565        */
3566       p_atomic_set(&tq->flushed, true);
3567    }
3568 }
3569 
3570 static uint16_t
tc_call_flush_deferred(struct pipe_context * pipe,void * call)3571 tc_call_flush_deferred(struct pipe_context *pipe, void *call)
3572 {
3573    struct tc_flush_deferred_call *p = to_call(call, tc_flush_deferred_call);
3574    struct pipe_screen *screen = pipe->screen;
3575 
3576    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3577    screen->fence_reference(screen, &p->fence, NULL);
3578 
3579    return call_size(tc_flush_deferred_call);
3580 }
3581 
3582 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call)3583 tc_call_flush(struct pipe_context *pipe, void *call)
3584 {
3585    struct tc_flush_call *p = to_call(call, tc_flush_call);
3586    struct pipe_screen *screen = pipe->screen;
3587 
3588    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3589    screen->fence_reference(screen, &p->fence, NULL);
3590 
3591    tc_flush_queries(p->tc);
3592 
3593    return call_size(tc_flush_call);
3594 }
3595 
3596 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)3597 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3598          unsigned flags)
3599 {
3600    struct threaded_context *tc = threaded_context(_pipe);
3601    struct pipe_context *pipe = tc->pipe;
3602    struct pipe_screen *screen = pipe->screen;
3603    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3604    bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0;
3605 
3606    if (!deferred || !fence)
3607       tc->in_renderpass = false;
3608 
3609    if (async && tc->options.create_fence) {
3610       if (fence) {
3611          struct tc_batch *next = &tc->batch_slots[tc->next];
3612 
3613          if (!next->token) {
3614             next->token = malloc(sizeof(*next->token));
3615             if (!next->token)
3616                goto out_of_memory;
3617 
3618             pipe_reference_init(&next->token->ref, 1);
3619             next->token->tc = tc;
3620          }
3621 
3622          screen->fence_reference(screen, fence,
3623                                  tc->options.create_fence(pipe, next->token));
3624          if (!*fence)
3625             goto out_of_memory;
3626       }
3627 
3628       struct tc_flush_call *p;
3629       if (deferred) {
3630          /* these have identical fields */
3631          p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call);
3632       } else {
3633          p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3634          p->tc = tc;
3635       }
3636       p->fence = fence ? *fence : NULL;
3637       p->flags = flags | TC_FLUSH_ASYNC;
3638 
3639       if (!deferred) {
3640          /* non-deferred async flushes indicate completion of existing renderpass info */
3641          tc_signal_renderpass_info_ready(tc);
3642          tc_batch_flush(tc, false);
3643          tc->seen_fb_state = false;
3644       }
3645 
3646       return;
3647    }
3648 
3649 out_of_memory:
3650    tc->flushing = true;
3651    /* renderpass info is signaled during sync */
3652    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3653                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3654 
3655    if (!deferred) {
3656       tc_flush_queries(tc);
3657       tc->seen_fb_state = false;
3658       tc->query_ended = false;
3659    }
3660    tc_set_driver_thread(tc);
3661    pipe->flush(pipe, fence, flags);
3662    tc_clear_driver_thread(tc);
3663    tc->flushing = false;
3664 }
3665 
3666 struct tc_draw_single_drawid {
3667    struct tc_draw_single base;
3668    unsigned drawid_offset;
3669 };
3670 
3671 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call)3672 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call)
3673 {
3674    struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3675    struct tc_draw_single *info = &info_drawid->base;
3676 
3677    /* u_threaded_context stores start/count in min/max_index for single draws. */
3678    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3679    struct pipe_draw_start_count_bias draw;
3680 
3681    draw.start = info->info.min_index;
3682    draw.count = info->info.max_index;
3683    draw.index_bias = info->index_bias;
3684 
3685    info->info.index_bounds_valid = false;
3686    info->info.has_user_indices = false;
3687    info->info.take_index_buffer_ownership = false;
3688 
3689    pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3690    if (info->info.index_size)
3691       tc_drop_resource_reference(info->info.index.resource);
3692 
3693    return call_size(tc_draw_single_drawid);
3694 }
3695 
3696 static void
simplify_draw_info(struct pipe_draw_info * info)3697 simplify_draw_info(struct pipe_draw_info *info)
3698 {
3699    /* Clear these fields to facilitate draw merging.
3700     * Drivers shouldn't use them.
3701     */
3702    info->has_user_indices = false;
3703    info->index_bounds_valid = false;
3704    info->take_index_buffer_ownership = false;
3705    info->index_bias_varies = false;
3706    info->_pad = 0;
3707 
3708    /* This shouldn't be set when merging single draws. */
3709    info->increment_draw_id = false;
3710 
3711    if (info->index_size) {
3712       if (!info->primitive_restart)
3713          info->restart_index = 0;
3714    } else {
3715       assert(!info->primitive_restart);
3716       info->primitive_restart = false;
3717       info->restart_index = 0;
3718       info->index.resource = NULL;
3719    }
3720 }
3721 
3722 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3723 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3724                               struct tc_draw_single *next)
3725 {
3726    if (next->base.call_id != TC_CALL_draw_single)
3727       return false;
3728 
3729    STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3730                  sizeof(struct pipe_draw_info) - 8);
3731    STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3732                  sizeof(struct pipe_draw_info) - 4);
3733    /* All fields must be the same except start and count. */
3734    /* u_threaded_context stores start/count in min/max_index for single draws. */
3735    return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3736                  DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3737 }
3738 
3739 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call)3740 tc_call_draw_single(struct pipe_context *pipe, void *call)
3741 {
3742    /* Draw call merging. */
3743    struct tc_draw_single *first = to_call(call, tc_draw_single);
3744    struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3745 
3746    /* If at least 2 consecutive draw calls can be merged... */
3747    if (next->base.call_id == TC_CALL_draw_single) {
3748       if (is_next_call_a_mergeable_draw(first, next)) {
3749          /* The maximum number of merged draws is given by the batch size. */
3750          struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3751          unsigned num_draws = 2;
3752          bool index_bias_varies = first->index_bias != next->index_bias;
3753 
3754          /* u_threaded_context stores start/count in min/max_index for single draws. */
3755          multi[0].start = first->info.min_index;
3756          multi[0].count = first->info.max_index;
3757          multi[0].index_bias = first->index_bias;
3758          multi[1].start = next->info.min_index;
3759          multi[1].count = next->info.max_index;
3760          multi[1].index_bias = next->index_bias;
3761 
3762          /* Find how many other draws can be merged. */
3763          next = get_next_call(next, tc_draw_single);
3764          for (; is_next_call_a_mergeable_draw(first, next);
3765               next = get_next_call(next, tc_draw_single), num_draws++) {
3766             /* u_threaded_context stores start/count in min/max_index for single draws. */
3767             multi[num_draws].start = next->info.min_index;
3768             multi[num_draws].count = next->info.max_index;
3769             multi[num_draws].index_bias = next->index_bias;
3770             index_bias_varies |= first->index_bias != next->index_bias;
3771          }
3772 
3773          first->info.index_bias_varies = index_bias_varies;
3774          pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3775 
3776          /* Since all draws use the same index buffer, drop all references at once. */
3777          if (first->info.index_size)
3778             pipe_drop_resource_references(first->info.index.resource, num_draws);
3779 
3780          return call_size(tc_draw_single) * num_draws;
3781       }
3782    }
3783 
3784    /* u_threaded_context stores start/count in min/max_index for single draws. */
3785    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3786    struct pipe_draw_start_count_bias draw;
3787 
3788    draw.start = first->info.min_index;
3789    draw.count = first->info.max_index;
3790    draw.index_bias = first->index_bias;
3791 
3792    first->info.index_bounds_valid = false;
3793    first->info.has_user_indices = false;
3794    first->info.take_index_buffer_ownership = false;
3795 
3796    pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3797    if (first->info.index_size)
3798       tc_drop_resource_reference(first->info.index.resource);
3799 
3800    return call_size(tc_draw_single);
3801 }
3802 
3803 struct tc_draw_indirect {
3804    struct tc_call_base base;
3805    struct pipe_draw_start_count_bias draw;
3806    struct pipe_draw_info info;
3807    struct pipe_draw_indirect_info indirect;
3808 };
3809 
3810 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call)3811 tc_call_draw_indirect(struct pipe_context *pipe, void *call)
3812 {
3813    struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3814 
3815    info->info.index_bounds_valid = false;
3816    info->info.take_index_buffer_ownership = false;
3817 
3818    pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3819    if (info->info.index_size)
3820       tc_drop_resource_reference(info->info.index.resource);
3821 
3822    tc_drop_resource_reference(info->indirect.buffer);
3823    tc_drop_resource_reference(info->indirect.indirect_draw_count);
3824    tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3825    return call_size(tc_draw_indirect);
3826 }
3827 
3828 struct tc_draw_multi {
3829    struct tc_call_base base;
3830    unsigned num_draws;
3831    struct pipe_draw_info info;
3832    struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3833 };
3834 
3835 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call)3836 tc_call_draw_multi(struct pipe_context *pipe, void *call)
3837 {
3838    struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3839 
3840    info->info.has_user_indices = false;
3841    info->info.index_bounds_valid = false;
3842    info->info.take_index_buffer_ownership = false;
3843 
3844    pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3845    if (info->info.index_size)
3846       tc_drop_resource_reference(info->info.index.resource);
3847 
3848    return info->base.num_slots;
3849 }
3850 
3851 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3852    offsetof(struct pipe_draw_info, index)
3853 
3854 /* Single draw with drawid_offset == 0. */
3855 static void
tc_draw_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3856 tc_draw_single(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3857                unsigned drawid_offset,
3858                const struct pipe_draw_indirect_info *indirect,
3859                const struct pipe_draw_start_count_bias *draws,
3860                unsigned num_draws)
3861 {
3862    struct threaded_context *tc = threaded_context(_pipe);
3863    struct tc_draw_single *p =
3864       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3865 
3866    if (info->index_size) {
3867       if (!info->take_index_buffer_ownership) {
3868          tc_set_resource_reference(&p->info.index.resource,
3869                                    info->index.resource);
3870       }
3871       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3872    }
3873    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3874    /* u_threaded_context stores start/count in min/max_index for single draws. */
3875    p->info.min_index = draws[0].start;
3876    p->info.max_index = draws[0].count;
3877    p->index_bias = draws[0].index_bias;
3878    simplify_draw_info(&p->info);
3879 }
3880 
3881 /* Single draw with drawid_offset > 0. */
3882 static void
tc_draw_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3883 tc_draw_single_draw_id(struct pipe_context *_pipe,
3884                        const struct pipe_draw_info *info,
3885                        unsigned drawid_offset,
3886                        const struct pipe_draw_indirect_info *indirect,
3887                        const struct pipe_draw_start_count_bias *draws,
3888                        unsigned num_draws)
3889 {
3890    struct threaded_context *tc = threaded_context(_pipe);
3891    struct tc_draw_single *p =
3892       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3893 
3894    if (info->index_size) {
3895       if (!info->take_index_buffer_ownership) {
3896          tc_set_resource_reference(&p->info.index.resource,
3897                                    info->index.resource);
3898       }
3899       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3900    }
3901    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3902    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3903    /* u_threaded_context stores start/count in min/max_index for single draws. */
3904    p->info.min_index = draws[0].start;
3905    p->info.max_index = draws[0].count;
3906    p->index_bias = draws[0].index_bias;
3907    simplify_draw_info(&p->info);
3908 }
3909 
3910 /* Single draw with user indices and drawid_offset == 0. */
3911 static void
tc_draw_user_indices_single(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3912 tc_draw_user_indices_single(struct pipe_context *_pipe,
3913                             const struct pipe_draw_info *info,
3914                             unsigned drawid_offset,
3915                             const struct pipe_draw_indirect_info *indirect,
3916                             const struct pipe_draw_start_count_bias *draws,
3917                             unsigned num_draws)
3918 {
3919    struct threaded_context *tc = threaded_context(_pipe);
3920    unsigned index_size = info->index_size;
3921    unsigned size = draws[0].count * index_size;
3922    struct pipe_resource *buffer = NULL;
3923    unsigned offset;
3924 
3925    if (!size)
3926       return;
3927 
3928    /* This must be done before adding draw_vbo, because it could generate
3929     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3930     * to the driver if it was done afterwards.
3931     */
3932    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3933                  (uint8_t*)info->index.user + draws[0].start * index_size,
3934                  &offset, &buffer);
3935    if (unlikely(!buffer))
3936       return;
3937 
3938    struct tc_draw_single *p =
3939       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3940    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3941    p->info.index.resource = buffer;
3942    /* u_threaded_context stores start/count in min/max_index for single draws. */
3943    p->info.min_index = offset >> util_logbase2(index_size);
3944    p->info.max_index = draws[0].count;
3945    p->index_bias = draws[0].index_bias;
3946    simplify_draw_info(&p->info);
3947 }
3948 
3949 /* Single draw with user indices and drawid_offset > 0. */
3950 static void
tc_draw_user_indices_single_draw_id(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3951 tc_draw_user_indices_single_draw_id(struct pipe_context *_pipe,
3952                                     const struct pipe_draw_info *info,
3953                                     unsigned drawid_offset,
3954                                     const struct pipe_draw_indirect_info *indirect,
3955                                     const struct pipe_draw_start_count_bias *draws,
3956                                     unsigned num_draws)
3957 {
3958    struct threaded_context *tc = threaded_context(_pipe);
3959    unsigned index_size = info->index_size;
3960    unsigned size = draws[0].count * index_size;
3961    struct pipe_resource *buffer = NULL;
3962    unsigned offset;
3963 
3964    if (!size)
3965       return;
3966 
3967    /* This must be done before adding draw_vbo, because it could generate
3968     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3969     * to the driver if it was done afterwards.
3970     */
3971    u_upload_data(tc->base.stream_uploader, 0, size, 4,
3972                  (uint8_t*)info->index.user + draws[0].start * index_size,
3973                  &offset, &buffer);
3974    if (unlikely(!buffer))
3975       return;
3976 
3977    struct tc_draw_single *p =
3978       &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base;
3979    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3980    p->info.index.resource = buffer;
3981    ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3982    /* u_threaded_context stores start/count in min/max_index for single draws. */
3983    p->info.min_index = offset >> util_logbase2(index_size);
3984    p->info.max_index = draws[0].count;
3985    p->index_bias = draws[0].index_bias;
3986    simplify_draw_info(&p->info);
3987 }
3988 
3989 #define DRAW_OVERHEAD_BYTES sizeof(struct tc_draw_multi)
3990 #define ONE_DRAW_SLOT_BYTES sizeof(((struct tc_draw_multi*)NULL)->slot[0])
3991 
3992 #define SLOTS_FOR_ONE_DRAW \
3993    DIV_ROUND_UP(DRAW_OVERHEAD_BYTES + ONE_DRAW_SLOT_BYTES, \
3994                 sizeof(struct tc_call_base))
3995 
3996 static void
tc_draw_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3997 tc_draw_multi(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3998               unsigned drawid_offset,
3999               const struct pipe_draw_indirect_info *indirect,
4000               const struct pipe_draw_start_count_bias *draws,
4001               unsigned num_draws)
4002 {
4003    struct threaded_context *tc = threaded_context(_pipe);
4004    int total_offset = 0;
4005    bool take_index_buffer_ownership = info->take_index_buffer_ownership;
4006 
4007    while (num_draws) {
4008       struct tc_batch *next = &tc->batch_slots[tc->next];
4009 
4010       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4011       /* If there isn't enough place for one draw, try to fill the next one */
4012       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4013          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4014       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4015 
4016       /* How many draws can we fit in the current batch */
4017       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4018                           ONE_DRAW_SLOT_BYTES);
4019 
4020       /* Non-indexed call or indexed with a real index buffer. */
4021       struct tc_draw_multi *p =
4022          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4023                                 dr);
4024       if (info->index_size) {
4025          if (!take_index_buffer_ownership) {
4026             tc_set_resource_reference(&p->info.index.resource,
4027                                       info->index.resource);
4028          }
4029          tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
4030       }
4031       take_index_buffer_ownership = false;
4032       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4033       p->num_draws = dr;
4034       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4035       num_draws -= dr;
4036 
4037       total_offset += dr;
4038    }
4039 }
4040 
4041 static void
tc_draw_user_indices_multi(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4042 tc_draw_user_indices_multi(struct pipe_context *_pipe,
4043                            const struct pipe_draw_info *info,
4044                            unsigned drawid_offset,
4045                            const struct pipe_draw_indirect_info *indirect,
4046                            const struct pipe_draw_start_count_bias *draws,
4047                            unsigned num_draws)
4048 {
4049    struct threaded_context *tc = threaded_context(_pipe);
4050    struct pipe_resource *buffer = NULL;
4051    unsigned buffer_offset, total_count = 0;
4052    unsigned index_size_shift = util_logbase2(info->index_size);
4053    uint8_t *ptr = NULL;
4054 
4055    /* Get the total count. */
4056    for (unsigned i = 0; i < num_draws; i++)
4057       total_count += draws[i].count;
4058 
4059    if (!total_count)
4060       return;
4061 
4062    /* Allocate space for all index buffers.
4063     *
4064     * This must be done before adding draw_vbo, because it could generate
4065     * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
4066     * to the driver if it was done afterwards.
4067     */
4068    u_upload_alloc(tc->base.stream_uploader, 0,
4069                   total_count << index_size_shift, 4,
4070                   &buffer_offset, &buffer, (void**)&ptr);
4071    if (unlikely(!buffer))
4072       return;
4073 
4074    int total_offset = 0;
4075    unsigned offset = 0;
4076    while (num_draws) {
4077       struct tc_batch *next = &tc->batch_slots[tc->next];
4078 
4079       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4080       /* If there isn't enough place for one draw, try to fill the next one */
4081       if (nb_slots_left < SLOTS_FOR_ONE_DRAW)
4082          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4083       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4084 
4085       /* How many draws can we fit in the current batch */
4086       const int dr = MIN2(num_draws, (size_left_bytes - DRAW_OVERHEAD_BYTES) /
4087                           ONE_DRAW_SLOT_BYTES);
4088 
4089       struct tc_draw_multi *p =
4090          tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
4091                                 dr);
4092       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
4093 
4094       if (total_offset == 0)
4095          /* the first slot inherits the reference from u_upload_alloc() */
4096          p->info.index.resource = buffer;
4097       else
4098          /* all following slots need a new reference */
4099          tc_set_resource_reference(&p->info.index.resource, buffer);
4100 
4101       p->num_draws = dr;
4102 
4103       /* Upload index buffers. */
4104       for (unsigned i = 0; i < dr; i++) {
4105          unsigned count = draws[i + total_offset].count;
4106 
4107          if (!count) {
4108             p->slot[i].start = 0;
4109             p->slot[i].count = 0;
4110             p->slot[i].index_bias = 0;
4111             continue;
4112          }
4113 
4114          unsigned size = count << index_size_shift;
4115          memcpy(ptr + offset,
4116                 (uint8_t*)info->index.user +
4117                 (draws[i + total_offset].start << index_size_shift), size);
4118          p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
4119          p->slot[i].count = count;
4120          p->slot[i].index_bias = draws[i + total_offset].index_bias;
4121          offset += size;
4122       }
4123 
4124       total_offset += dr;
4125       num_draws -= dr;
4126    }
4127 }
4128 
4129 static void
tc_draw_indirect(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4130 tc_draw_indirect(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4131                  unsigned drawid_offset,
4132                  const struct pipe_draw_indirect_info *indirect,
4133                  const struct pipe_draw_start_count_bias *draws,
4134                  unsigned num_draws)
4135 {
4136    struct threaded_context *tc = threaded_context(_pipe);
4137    assert(!info->has_user_indices);
4138    assert(num_draws == 1);
4139 
4140    struct tc_draw_indirect *p =
4141       tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
4142    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4143 
4144    if (info->index_size) {
4145       if (!info->take_index_buffer_ownership) {
4146          tc_set_resource_reference(&p->info.index.resource,
4147                                    info->index.resource);
4148       }
4149       tc_add_to_buffer_list(next, info->index.resource);
4150    }
4151    memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
4152 
4153    tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
4154    tc_set_resource_reference(&p->indirect.indirect_draw_count,
4155                              indirect->indirect_draw_count);
4156    p->indirect.count_from_stream_output = NULL;
4157    pipe_so_target_reference(&p->indirect.count_from_stream_output,
4158                             indirect->count_from_stream_output);
4159 
4160    if (indirect->buffer)
4161       tc_add_to_buffer_list(next, indirect->buffer);
4162    if (indirect->indirect_draw_count)
4163       tc_add_to_buffer_list(next, indirect->indirect_draw_count);
4164    if (indirect->count_from_stream_output)
4165       tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
4166 
4167    memcpy(&p->indirect, indirect, sizeof(*indirect));
4168    p->draw.start = draws[0].start;
4169 }
4170 
4171 /* Dispatch table for tc_draw_vbo:
4172  *
4173  * Indexed by:
4174  *    [is_indirect * 8 + index_size_and_has_user_indices * 4 +
4175  *     is_multi_draw * 2 + non_zero_draw_id]
4176  */
4177 static pipe_draw_func draw_funcs[16] = {
4178    tc_draw_single,
4179    tc_draw_single_draw_id,
4180    tc_draw_multi,
4181    tc_draw_multi,
4182    tc_draw_user_indices_single,
4183    tc_draw_user_indices_single_draw_id,
4184    tc_draw_user_indices_multi,
4185    tc_draw_user_indices_multi,
4186    tc_draw_indirect,
4187    tc_draw_indirect,
4188    tc_draw_indirect,
4189    tc_draw_indirect,
4190    tc_draw_indirect,
4191    tc_draw_indirect,
4192    tc_draw_indirect,
4193    tc_draw_indirect,
4194 };
4195 
4196 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4197 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
4198             unsigned drawid_offset,
4199             const struct pipe_draw_indirect_info *indirect,
4200             const struct pipe_draw_start_count_bias *draws,
4201             unsigned num_draws)
4202 {
4203    STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
4204                  sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
4205 
4206    struct threaded_context *tc = threaded_context(_pipe);
4207    if (tc->options.parse_renderpass_info)
4208       tc_parse_draw(tc);
4209 
4210    /* Use a function table to call the desired variant of draw_vbo. */
4211    unsigned index = (indirect != NULL) * 8 +
4212                     (info->index_size && info->has_user_indices) * 4 +
4213                     (num_draws > 1) * 2 + (drawid_offset != 0);
4214    draw_funcs[index](_pipe, info, drawid_offset, indirect, draws, num_draws);
4215 
4216    /* This must be after tc_add_*call, which can flush the batch. */
4217    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4218       tc_add_all_gfx_bindings_to_buffer_list(tc);
4219 }
4220 
4221 struct tc_draw_single *
tc_add_draw_single_call(struct pipe_context * _pipe,struct pipe_resource * index_bo)4222 tc_add_draw_single_call(struct pipe_context *_pipe,
4223                         struct pipe_resource *index_bo)
4224 {
4225    struct threaded_context *tc = threaded_context(_pipe);
4226 
4227    if (tc->options.parse_renderpass_info)
4228       tc_parse_draw(tc);
4229 
4230    struct tc_draw_single *p =
4231       tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
4232 
4233    if (index_bo)
4234       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], index_bo);
4235 
4236    /* This must be after tc_add_*call, which can flush the batch. */
4237    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4238       tc_add_all_gfx_bindings_to_buffer_list(tc);
4239 
4240    return p;
4241 }
4242 
4243 struct tc_draw_vstate_single {
4244    struct tc_call_base base;
4245    struct pipe_draw_start_count_bias draw;
4246 
4247    /* The following states must be together without holes because they are
4248     * compared by draw merging.
4249     */
4250    struct pipe_vertex_state *state;
4251    uint32_t partial_velem_mask;
4252    struct pipe_draw_vertex_state_info info;
4253 };
4254 
4255 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)4256 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
4257                                      struct tc_draw_vstate_single *next)
4258 {
4259    if (next->base.call_id != TC_CALL_draw_vstate_single)
4260       return false;
4261 
4262    return !memcmp(&first->state, &next->state,
4263                   offsetof(struct tc_draw_vstate_single, info) +
4264                   sizeof(struct pipe_draw_vertex_state_info) -
4265                   offsetof(struct tc_draw_vstate_single, state));
4266 }
4267 
4268 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call)4269 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call)
4270 {
4271    /* Draw call merging. */
4272    struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
4273    struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
4274 
4275    /* If at least 2 consecutive draw calls can be merged... */
4276    if (is_next_call_a_mergeable_draw_vstate(first, next)) {
4277       /* The maximum number of merged draws is given by the batch size. */
4278       struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
4279                                               call_size(tc_draw_vstate_single)];
4280       unsigned num_draws = 2;
4281 
4282       draws[0] = first->draw;
4283       draws[1] = next->draw;
4284 
4285       /* Find how many other draws can be merged. */
4286       next = get_next_call(next, tc_draw_vstate_single);
4287       for (; is_next_call_a_mergeable_draw_vstate(first, next);
4288            next = get_next_call(next, tc_draw_vstate_single),
4289            num_draws++)
4290          draws[num_draws] = next->draw;
4291 
4292       pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4293                               first->info, draws, num_draws);
4294       /* Since all draws use the same state, drop all references at once. */
4295       tc_drop_vertex_state_references(first->state, num_draws);
4296 
4297       return call_size(tc_draw_vstate_single) * num_draws;
4298    }
4299 
4300    pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
4301                            first->info, &first->draw, 1);
4302    tc_drop_vertex_state_references(first->state, 1);
4303    return call_size(tc_draw_vstate_single);
4304 }
4305 
4306 struct tc_draw_vstate_multi {
4307    struct tc_call_base base;
4308    uint32_t partial_velem_mask;
4309    struct pipe_draw_vertex_state_info info;
4310    unsigned num_draws;
4311    struct pipe_vertex_state *state;
4312    struct pipe_draw_start_count_bias slot[0];
4313 };
4314 
4315 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call)4316 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call)
4317 {
4318    struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
4319 
4320    pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
4321                            info->info, info->slot, info->num_draws);
4322    tc_drop_vertex_state_references(info->state, 1);
4323    return info->base.num_slots;
4324 }
4325 
4326 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)4327 tc_draw_vertex_state(struct pipe_context *_pipe,
4328                      struct pipe_vertex_state *state,
4329                      uint32_t partial_velem_mask,
4330                      struct pipe_draw_vertex_state_info info,
4331                      const struct pipe_draw_start_count_bias *draws,
4332                      unsigned num_draws)
4333 {
4334    struct threaded_context *tc = threaded_context(_pipe);
4335    if (tc->options.parse_renderpass_info)
4336       tc_parse_draw(tc);
4337 
4338    if (num_draws == 1) {
4339       /* Single draw. */
4340       struct tc_draw_vstate_single *p =
4341          tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
4342       p->partial_velem_mask = partial_velem_mask;
4343       p->draw = draws[0];
4344       p->info.mode = info.mode;
4345       p->info.take_vertex_state_ownership = false;
4346 
4347       /* This should be always 0 for simplicity because we assume that
4348        * index_bias doesn't vary.
4349        */
4350       assert(draws[0].index_bias == 0);
4351 
4352       if (!info.take_vertex_state_ownership)
4353          tc_set_vertex_state_reference(&p->state, state);
4354       else
4355          p->state = state;
4356 
4357 
4358       /* This must be after tc_add_*call, which can flush the batch. */
4359       if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4360          tc_add_all_gfx_bindings_to_buffer_list(tc);
4361       return;
4362    }
4363 
4364    const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
4365    const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
4366    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
4367                                                sizeof(struct tc_call_base));
4368    /* Multi draw. */
4369    int total_offset = 0;
4370    bool take_vertex_state_ownership = info.take_vertex_state_ownership;
4371    while (num_draws) {
4372       struct tc_batch *next = &tc->batch_slots[tc->next];
4373 
4374       int nb_slots_left = TC_SLOTS_PER_BATCH - 1 - next->num_total_slots;
4375       /* If there isn't enough place for one draw, try to fill the next one */
4376       if (nb_slots_left < slots_for_one_draw)
4377          nb_slots_left = TC_SLOTS_PER_BATCH - 1;
4378       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
4379 
4380       /* How many draws can we fit in the current batch */
4381       const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
4382 
4383       /* Non-indexed call or indexed with a real index buffer. */
4384       struct tc_draw_vstate_multi *p =
4385          tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
4386 
4387       if (!take_vertex_state_ownership)
4388          tc_set_vertex_state_reference(&p->state, state);
4389       else
4390          p->state = state;
4391 
4392       take_vertex_state_ownership = false;
4393       p->partial_velem_mask = partial_velem_mask;
4394       p->info.mode = info.mode;
4395       p->info.take_vertex_state_ownership = false;
4396       p->num_draws = dr;
4397       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
4398       num_draws -= dr;
4399 
4400       total_offset += dr;
4401    }
4402 
4403 
4404    /* This must be after tc_add_*call, which can flush the batch. */
4405    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
4406       tc_add_all_gfx_bindings_to_buffer_list(tc);
4407 }
4408 
4409 struct tc_launch_grid_call {
4410    struct tc_call_base base;
4411    struct pipe_grid_info info;
4412 };
4413 
4414 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call)4415 tc_call_launch_grid(struct pipe_context *pipe, void *call)
4416 {
4417    struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
4418 
4419    pipe->launch_grid(pipe, p);
4420    tc_drop_resource_reference(p->indirect);
4421    return call_size(tc_launch_grid_call);
4422 }
4423 
4424 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)4425 tc_launch_grid(struct pipe_context *_pipe,
4426                const struct pipe_grid_info *info)
4427 {
4428    struct threaded_context *tc = threaded_context(_pipe);
4429    struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
4430                                                tc_launch_grid_call);
4431    assert(info->input == NULL);
4432 
4433    tc_set_resource_reference(&p->info.indirect, info->indirect);
4434    memcpy(&p->info, info, sizeof(*info));
4435 
4436    if (info->indirect)
4437       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
4438 
4439    /* This must be after tc_add_*call, which can flush the batch. */
4440    if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
4441       tc_add_all_compute_bindings_to_buffer_list(tc);
4442 }
4443 
4444 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call)4445 tc_call_resource_copy_region(struct pipe_context *pipe, void *call)
4446 {
4447    struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
4448 
4449    pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
4450                               p->dstz, p->src, p->src_level, &p->src_box);
4451    tc_drop_resource_reference(p->dst);
4452    tc_drop_resource_reference(p->src);
4453    return call_size(tc_resource_copy_region);
4454 }
4455 
4456 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)4457 tc_resource_copy_region(struct pipe_context *_pipe,
4458                         struct pipe_resource *dst, unsigned dst_level,
4459                         unsigned dstx, unsigned dsty, unsigned dstz,
4460                         struct pipe_resource *src, unsigned src_level,
4461                         const struct pipe_box *src_box)
4462 {
4463    struct threaded_context *tc = threaded_context(_pipe);
4464    struct threaded_resource *tdst = threaded_resource(dst);
4465    struct tc_resource_copy_region *p =
4466       tc_add_call(tc, TC_CALL_resource_copy_region,
4467                   tc_resource_copy_region);
4468 
4469    if (dst->target == PIPE_BUFFER)
4470       tc_buffer_disable_cpu_storage(dst);
4471 
4472    tc_set_resource_batch_usage(tc, dst);
4473    tc_set_resource_reference(&p->dst, dst);
4474    p->dst_level = dst_level;
4475    p->dstx = dstx;
4476    p->dsty = dsty;
4477    p->dstz = dstz;
4478    tc_set_resource_batch_usage(tc, src);
4479    tc_set_resource_reference(&p->src, src);
4480    p->src_level = src_level;
4481    p->src_box = *src_box;
4482 
4483    if (dst->target == PIPE_BUFFER) {
4484       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
4485 
4486       tc_add_to_buffer_list(next, src);
4487       tc_add_to_buffer_list(next, dst);
4488 
4489       util_range_add(&tdst->b, &tdst->valid_buffer_range,
4490                      dstx, dstx + src_box->width);
4491    }
4492 }
4493 
4494 struct tc_blit_call {
4495    struct tc_call_base base;
4496    struct pipe_blit_info info;
4497 };
4498 
4499 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call)4500 tc_call_blit(struct pipe_context *pipe, void *call)
4501 {
4502    struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
4503 
4504    pipe->blit(pipe, blit);
4505    tc_drop_resource_reference(blit->dst.resource);
4506    tc_drop_resource_reference(blit->src.resource);
4507    return call_size(tc_blit_call);
4508 }
4509 
4510 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)4511 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
4512 {
4513    struct threaded_context *tc = threaded_context(_pipe);
4514    struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
4515 
4516    tc_set_resource_batch_usage(tc, info->dst.resource);
4517    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
4518    tc_set_resource_batch_usage(tc, info->src.resource);
4519    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
4520    memcpy(&blit->info, info, sizeof(*info));
4521    if (tc->options.parse_renderpass_info) {
4522       tc->renderpass_info_recording->has_resolve = info->src.resource->nr_samples > 1 &&
4523                                                    info->dst.resource->nr_samples <= 1 &&
4524                                                    tc->fb_resolve == info->dst.resource;
4525    }
4526 }
4527 
4528 struct tc_generate_mipmap {
4529    struct tc_call_base base;
4530    enum pipe_format format;
4531    unsigned base_level;
4532    unsigned last_level;
4533    unsigned first_layer;
4534    unsigned last_layer;
4535    struct pipe_resource *res;
4536 };
4537 
4538 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call)4539 tc_call_generate_mipmap(struct pipe_context *pipe, void *call)
4540 {
4541    struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
4542    ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
4543                                                     p->base_level,
4544                                                     p->last_level,
4545                                                     p->first_layer,
4546                                                     p->last_layer);
4547    assert(result);
4548    tc_drop_resource_reference(p->res);
4549    return call_size(tc_generate_mipmap);
4550 }
4551 
4552 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)4553 tc_generate_mipmap(struct pipe_context *_pipe,
4554                    struct pipe_resource *res,
4555                    enum pipe_format format,
4556                    unsigned base_level,
4557                    unsigned last_level,
4558                    unsigned first_layer,
4559                    unsigned last_layer)
4560 {
4561    struct threaded_context *tc = threaded_context(_pipe);
4562    struct pipe_context *pipe = tc->pipe;
4563    struct pipe_screen *screen = pipe->screen;
4564    unsigned bind = PIPE_BIND_SAMPLER_VIEW;
4565 
4566    if (util_format_is_depth_or_stencil(format))
4567       bind = PIPE_BIND_DEPTH_STENCIL;
4568    else
4569       bind = PIPE_BIND_RENDER_TARGET;
4570 
4571    if (!screen->is_format_supported(screen, format, res->target,
4572                                     res->nr_samples, res->nr_storage_samples,
4573                                     bind))
4574       return false;
4575 
4576    struct tc_generate_mipmap *p =
4577       tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
4578 
4579    tc_set_resource_batch_usage(tc, res);
4580    tc_set_resource_reference(&p->res, res);
4581    p->format = format;
4582    p->base_level = base_level;
4583    p->last_level = last_level;
4584    p->first_layer = first_layer;
4585    p->last_layer = last_layer;
4586    return true;
4587 }
4588 
4589 struct tc_resource_call {
4590    struct tc_call_base base;
4591    struct pipe_resource *resource;
4592 };
4593 
4594 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call)4595 tc_call_flush_resource(struct pipe_context *pipe, void *call)
4596 {
4597    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4598 
4599    pipe->flush_resource(pipe, resource);
4600    tc_drop_resource_reference(resource);
4601    return call_size(tc_resource_call);
4602 }
4603 
4604 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4605 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
4606 {
4607    struct threaded_context *tc = threaded_context(_pipe);
4608    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
4609                                                tc_resource_call);
4610 
4611    tc_set_resource_batch_usage(tc, resource);
4612    tc_set_resource_reference(&call->resource, resource);
4613 }
4614 
4615 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call)4616 tc_call_invalidate_resource(struct pipe_context *pipe, void *call)
4617 {
4618    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
4619 
4620    pipe->invalidate_resource(pipe, resource);
4621    tc_drop_resource_reference(resource);
4622    return call_size(tc_resource_call);
4623 }
4624 
4625 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)4626 tc_invalidate_resource(struct pipe_context *_pipe,
4627                        struct pipe_resource *resource)
4628 {
4629    struct threaded_context *tc = threaded_context(_pipe);
4630 
4631    if (resource->target == PIPE_BUFFER) {
4632       tc_invalidate_buffer(tc, threaded_resource(resource));
4633       return;
4634    }
4635 
4636    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
4637                                                tc_resource_call);
4638    tc_set_resource_batch_usage(tc, resource);
4639    tc_set_resource_reference(&call->resource, resource);
4640 
4641    struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4642    if (info) {
4643       if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
4644          info->zsbuf_invalidate = true;
4645       } else {
4646          for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
4647             if (tc->fb_resources[i] == resource)
4648                info->cbuf_invalidate |= BITFIELD_BIT(i);
4649          }
4650       }
4651    }
4652 }
4653 
4654 struct tc_clear {
4655    struct tc_call_base base;
4656    bool scissor_state_set;
4657    uint8_t stencil;
4658    uint16_t buffers;
4659    float depth;
4660    struct pipe_scissor_state scissor_state;
4661    union pipe_color_union color;
4662 };
4663 
4664 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call)4665 tc_call_clear(struct pipe_context *pipe, void *call)
4666 {
4667    struct tc_clear *p = to_call(call, tc_clear);
4668 
4669    pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
4670    return call_size(tc_clear);
4671 }
4672 
4673 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)4674 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
4675          const union pipe_color_union *color, double depth,
4676          unsigned stencil)
4677 {
4678    struct threaded_context *tc = threaded_context(_pipe);
4679    struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
4680 
4681    p->buffers = buffers;
4682    if (scissor_state) {
4683       p->scissor_state = *scissor_state;
4684       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4685       /* partial clear info is useful for drivers to know whether any zs writes occur;
4686        * drivers are responsible for optimizing partial clear -> full clear
4687        */
4688       if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
4689          info->zsbuf_clear_partial |= !info->zsbuf_clear;
4690    } else {
4691       struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
4692       if (info) {
4693          /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
4694          info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
4695          if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
4696             if (!info->zsbuf_load && !info->zsbuf_clear_partial)
4697                info->zsbuf_clear = true;
4698             else if (!info->zsbuf_clear)
4699                /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */
4700                info->zsbuf_clear_partial = true;
4701          }
4702       }
4703    }
4704    p->scissor_state_set = !!scissor_state;
4705    p->color = *color;
4706    p->depth = depth;
4707    p->stencil = stencil;
4708 }
4709 
4710 struct tc_clear_render_target {
4711    struct tc_call_base base;
4712    bool render_condition_enabled;
4713    unsigned dstx;
4714    unsigned dsty;
4715    unsigned width;
4716    unsigned height;
4717    union pipe_color_union color;
4718    struct pipe_surface *dst;
4719 };
4720 
4721 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call)4722 tc_call_clear_render_target(struct pipe_context *pipe, void *call)
4723 {
4724    struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
4725 
4726    pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
4727                              p->render_condition_enabled);
4728    tc_drop_surface_reference(p->dst);
4729    return call_size(tc_clear_render_target);
4730 }
4731 
4732 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4733 tc_clear_render_target(struct pipe_context *_pipe,
4734                        struct pipe_surface *dst,
4735                        const union pipe_color_union *color,
4736                        unsigned dstx, unsigned dsty,
4737                        unsigned width, unsigned height,
4738                        bool render_condition_enabled)
4739 {
4740    struct threaded_context *tc = threaded_context(_pipe);
4741    struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
4742    p->dst = NULL;
4743    pipe_surface_reference(&p->dst, dst);
4744    p->color = *color;
4745    p->dstx = dstx;
4746    p->dsty = dsty;
4747    p->width = width;
4748    p->height = height;
4749    p->render_condition_enabled = render_condition_enabled;
4750 }
4751 
4752 
4753 struct tc_clear_depth_stencil {
4754    struct tc_call_base base;
4755    bool render_condition_enabled;
4756    float depth;
4757    unsigned clear_flags;
4758    unsigned stencil;
4759    unsigned dstx;
4760    unsigned dsty;
4761    unsigned width;
4762    unsigned height;
4763    struct pipe_surface *dst;
4764 };
4765 
4766 
4767 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call)4768 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call)
4769 {
4770    struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4771 
4772    pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4773                              p->dstx, p->dsty, p->width, p->height,
4774                              p->render_condition_enabled);
4775    tc_drop_surface_reference(p->dst);
4776    return call_size(tc_clear_depth_stencil);
4777 }
4778 
4779 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)4780 tc_clear_depth_stencil(struct pipe_context *_pipe,
4781                        struct pipe_surface *dst, unsigned clear_flags,
4782                        double depth, unsigned stencil, unsigned dstx,
4783                        unsigned dsty, unsigned width, unsigned height,
4784                        bool render_condition_enabled)
4785 {
4786    struct threaded_context *tc = threaded_context(_pipe);
4787    struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4788    p->dst = NULL;
4789    pipe_surface_reference(&p->dst, dst);
4790    p->clear_flags = clear_flags;
4791    p->depth = depth;
4792    p->stencil = stencil;
4793    p->dstx = dstx;
4794    p->dsty = dsty;
4795    p->width = width;
4796    p->height = height;
4797    p->render_condition_enabled = render_condition_enabled;
4798 }
4799 
4800 struct tc_clear_buffer {
4801    struct tc_call_base base;
4802    uint8_t clear_value_size;
4803    unsigned offset;
4804    unsigned size;
4805    char clear_value[16];
4806    struct pipe_resource *res;
4807 };
4808 
4809 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call)4810 tc_call_clear_buffer(struct pipe_context *pipe, void *call)
4811 {
4812    struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4813 
4814    pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4815                       p->clear_value_size);
4816    tc_drop_resource_reference(p->res);
4817    return call_size(tc_clear_buffer);
4818 }
4819 
4820 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)4821 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4822                 unsigned offset, unsigned size,
4823                 const void *clear_value, int clear_value_size)
4824 {
4825    struct threaded_context *tc = threaded_context(_pipe);
4826    struct threaded_resource *tres = threaded_resource(res);
4827    struct tc_clear_buffer *p =
4828       tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4829 
4830    tc_buffer_disable_cpu_storage(res);
4831 
4832    tc_set_resource_reference(&p->res, res);
4833    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
4834    p->offset = offset;
4835    p->size = size;
4836    memcpy(p->clear_value, clear_value, clear_value_size);
4837    p->clear_value_size = clear_value_size;
4838 
4839    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4840 }
4841 
4842 struct tc_clear_texture {
4843    struct tc_call_base base;
4844    unsigned level;
4845    struct pipe_box box;
4846    char data[16];
4847    struct pipe_resource *res;
4848 };
4849 
4850 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call)4851 tc_call_clear_texture(struct pipe_context *pipe, void *call)
4852 {
4853    struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4854 
4855    pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4856    tc_drop_resource_reference(p->res);
4857    return call_size(tc_clear_texture);
4858 }
4859 
4860 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4861 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4862                  unsigned level, const struct pipe_box *box, const void *data)
4863 {
4864    struct threaded_context *tc = threaded_context(_pipe);
4865    struct tc_clear_texture *p =
4866       tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4867 
4868    tc_set_resource_batch_usage(tc, res);
4869    tc_set_resource_reference(&p->res, res);
4870    p->level = level;
4871    p->box = *box;
4872    memcpy(p->data, data,
4873           util_format_get_blocksize(res->format));
4874 }
4875 
4876 struct tc_resource_commit {
4877    struct tc_call_base base;
4878    bool commit;
4879    unsigned level;
4880    struct pipe_box box;
4881    struct pipe_resource *res;
4882 };
4883 
4884 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call)4885 tc_call_resource_commit(struct pipe_context *pipe, void *call)
4886 {
4887    struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4888 
4889    pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4890    tc_drop_resource_reference(p->res);
4891    return call_size(tc_resource_commit);
4892 }
4893 
4894 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4895 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4896                    unsigned level, struct pipe_box *box, bool commit)
4897 {
4898    struct threaded_context *tc = threaded_context(_pipe);
4899    struct tc_resource_commit *p =
4900       tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4901 
4902    tc_set_resource_reference(&p->res, res);
4903    tc_set_resource_batch_usage(tc, res);
4904    p->level = level;
4905    p->box = *box;
4906    p->commit = commit;
4907    return true; /* we don't care about the return value for this call */
4908 }
4909 
4910 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4911 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4912 {
4913    struct threaded_context *tc = threaded_context(_pipe);
4914    struct pipe_context *pipe = tc->pipe;
4915 
4916    return pipe->init_intel_perf_query_info(pipe);
4917 }
4918 
4919 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4920 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4921                              unsigned query_index,
4922                              const char **name,
4923                              uint32_t *data_size,
4924                              uint32_t *n_counters,
4925                              uint32_t *n_active)
4926 {
4927    struct threaded_context *tc = threaded_context(_pipe);
4928    struct pipe_context *pipe = tc->pipe;
4929 
4930    tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4931    pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4932          n_counters, n_active);
4933 }
4934 
4935 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4936 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4937                                      unsigned query_index,
4938                                      unsigned counter_index,
4939                                      const char **name,
4940                                      const char **desc,
4941                                      uint32_t *offset,
4942                                      uint32_t *data_size,
4943                                      uint32_t *type_enum,
4944                                      uint32_t *data_type_enum,
4945                                      uint64_t *raw_max)
4946 {
4947    struct threaded_context *tc = threaded_context(_pipe);
4948    struct pipe_context *pipe = tc->pipe;
4949 
4950    pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4951          name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4952 }
4953 
4954 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4955 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4956 {
4957    struct threaded_context *tc = threaded_context(_pipe);
4958    struct pipe_context *pipe = tc->pipe;
4959 
4960    return pipe->new_intel_perf_query_obj(pipe, query_index);
4961 }
4962 
4963 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call)4964 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call)
4965 {
4966    (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4967    return call_size(tc_query_call);
4968 }
4969 
4970 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4971 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4972 {
4973    struct threaded_context *tc = threaded_context(_pipe);
4974 
4975    tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4976 
4977    /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4978    return true;
4979 }
4980 
4981 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call)4982 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call)
4983 {
4984    pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4985    return call_size(tc_query_call);
4986 }
4987 
4988 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4989 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4990 {
4991    struct threaded_context *tc = threaded_context(_pipe);
4992 
4993    tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4994 }
4995 
4996 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4997 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4998 {
4999    struct threaded_context *tc = threaded_context(_pipe);
5000    struct pipe_context *pipe = tc->pipe;
5001 
5002    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5003    pipe->delete_intel_perf_query(pipe, q);
5004 }
5005 
5006 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)5007 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
5008 {
5009    struct threaded_context *tc = threaded_context(_pipe);
5010    struct pipe_context *pipe = tc->pipe;
5011 
5012    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5013    pipe->wait_intel_perf_query(pipe, q);
5014 }
5015 
5016 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)5017 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
5018 {
5019    struct threaded_context *tc = threaded_context(_pipe);
5020    struct pipe_context *pipe = tc->pipe;
5021 
5022    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5023    return pipe->is_intel_perf_query_ready(pipe, q);
5024 }
5025 
5026 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)5027 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
5028                              struct pipe_query *q,
5029                              size_t data_size,
5030                              uint32_t *data,
5031                              uint32_t *bytes_written)
5032 {
5033    struct threaded_context *tc = threaded_context(_pipe);
5034    struct pipe_context *pipe = tc->pipe;
5035 
5036    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
5037    return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
5038 }
5039 
5040 /********************************************************************
5041  * callback
5042  */
5043 
5044 struct tc_callback_call {
5045    struct tc_call_base base;
5046    void (*fn)(void *data);
5047    void *data;
5048 };
5049 
5050 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call)5051 tc_call_callback(UNUSED struct pipe_context *pipe, void *call)
5052 {
5053    struct tc_callback_call *p = to_call(call, tc_callback_call);
5054 
5055    p->fn(p->data);
5056    return call_size(tc_callback_call);
5057 }
5058 
5059 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)5060 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
5061             bool asap)
5062 {
5063    struct threaded_context *tc = threaded_context(_pipe);
5064 
5065    if (asap && tc_is_sync(tc)) {
5066       fn(data);
5067       return;
5068    }
5069 
5070    struct tc_callback_call *p =
5071       tc_add_call(tc, TC_CALL_callback, tc_callback_call);
5072    p->fn = fn;
5073    p->data = data;
5074 }
5075 
5076 
5077 /********************************************************************
5078  * create & destroy
5079  */
5080 
5081 static void
tc_destroy(struct pipe_context * _pipe)5082 tc_destroy(struct pipe_context *_pipe)
5083 {
5084    struct threaded_context *tc = threaded_context(_pipe);
5085    struct pipe_context *pipe = tc->pipe;
5086 
5087    if (tc->base.const_uploader &&
5088        tc->base.stream_uploader != tc->base.const_uploader)
5089       u_upload_destroy(tc->base.const_uploader);
5090 
5091    if (tc->base.stream_uploader)
5092       u_upload_destroy(tc->base.stream_uploader);
5093 
5094    tc_sync(tc);
5095 
5096    if (util_queue_is_initialized(&tc->queue)) {
5097       util_queue_destroy(&tc->queue);
5098 
5099       for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5100          util_queue_fence_destroy(&tc->batch_slots[i].fence);
5101          util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
5102          assert(!tc->batch_slots[i].token);
5103       }
5104    }
5105 
5106    slab_destroy_child(&tc->pool_transfers);
5107    assert(tc->batch_slots[tc->next].num_total_slots == 0);
5108    pipe->destroy(pipe);
5109 
5110    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
5111       if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
5112          util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
5113       util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
5114    }
5115 
5116    for (unsigned i = 0; i < ARRAY_SIZE(tc->fb_resources); i++)
5117       pipe_resource_reference(&tc->fb_resources[i], NULL);
5118    pipe_resource_reference(&tc->fb_resolve, NULL);
5119 
5120    FREE(tc);
5121 }
5122 
tc_driver_internal_flush_notify(struct threaded_context * tc)5123 void tc_driver_internal_flush_notify(struct threaded_context *tc)
5124 {
5125    /* Allow drivers to call this function even for internal contexts that
5126     * don't have tc. It simplifies drivers.
5127     */
5128    if (!tc)
5129       return;
5130 
5131    /* Signal fences set by tc_batch_execute. */
5132    for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
5133       util_queue_fence_signal(tc->signal_fences_next_flush[i]);
5134 
5135    tc->num_signal_fences_next_flush = 0;
5136 }
5137 
5138 /**
5139  * Wrap an existing pipe_context into a threaded_context.
5140  *
5141  * \param pipe                 pipe_context to wrap
5142  * \param parent_transfer_pool parent slab pool set up for creating pipe_-
5143  *                             transfer objects; the driver should have one
5144  *                             in pipe_screen.
5145  * \param replace_buffer  callback for replacing a pipe_resource's storage
5146  *                        with another pipe_resource's storage.
5147  * \param options         optional TC options/callbacks
5148  * \param out  if successful, the threaded_context will be returned here in
5149  *             addition to the return value if "out" != NULL
5150  */
5151 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)5152 threaded_context_create(struct pipe_context *pipe,
5153                         struct slab_parent_pool *parent_transfer_pool,
5154                         tc_replace_buffer_storage_func replace_buffer,
5155                         const struct threaded_context_options *options,
5156                         struct threaded_context **out)
5157 {
5158    struct threaded_context *tc;
5159 
5160    if (!pipe)
5161       return NULL;
5162 
5163    if (!debug_get_bool_option("GALLIUM_THREAD", true))
5164       return pipe;
5165 
5166    tc = CALLOC_STRUCT(threaded_context);
5167    if (!tc) {
5168       pipe->destroy(pipe);
5169       return NULL;
5170    }
5171 
5172    if (options) {
5173       /* this is unimplementable */
5174       assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify));
5175       tc->options = *options;
5176    }
5177 
5178    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
5179 
5180    /* The driver context isn't wrapped, so set its "priv" to NULL. */
5181    pipe->priv = NULL;
5182 
5183    tc->pipe = pipe;
5184    tc->replace_buffer_storage = replace_buffer;
5185    tc->map_buffer_alignment =
5186       pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
5187    tc->ubo_alignment =
5188       MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
5189    tc->base.priv = pipe; /* priv points to the wrapped driver context */
5190    tc->base.screen = pipe->screen;
5191    tc->base.destroy = tc_destroy;
5192    tc->base.callback = tc_callback;
5193 
5194    tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
5195    if (pipe->stream_uploader == pipe->const_uploader)
5196       tc->base.const_uploader = tc->base.stream_uploader;
5197    else
5198       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
5199 
5200    if (!tc->base.stream_uploader || !tc->base.const_uploader)
5201       goto fail;
5202 
5203    tc->use_forced_staging_uploads = true;
5204 
5205    /* The queue size is the number of batches "waiting". Batches are removed
5206     * from the queue before being executed, so keep one tc_batch slot for that
5207     * execution. Also, keep one unused slot for an unflushed batch.
5208     */
5209    if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
5210       goto fail;
5211 
5212    tc->last_completed = -1;
5213    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
5214 #if !defined(NDEBUG) && TC_DEBUG >= 1
5215       tc->batch_slots[i].sentinel = TC_SENTINEL;
5216 #endif
5217       tc->batch_slots[i].tc = tc;
5218       tc->batch_slots[i].batch_idx = i;
5219       util_queue_fence_init(&tc->batch_slots[i].fence);
5220       tc->batch_slots[i].renderpass_info_idx = -1;
5221       if (tc->options.parse_renderpass_info) {
5222          util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
5223          tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]);
5224       }
5225    }
5226    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
5227       util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
5228 
5229    list_inithead(&tc->unflushed_queries);
5230 
5231    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
5232 
5233    /* If you have different limits in each shader stage, set the maximum. */
5234    struct pipe_screen *screen = pipe->screen;;
5235    tc->max_const_buffers =
5236       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5237                                PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
5238    tc->max_shader_buffers =
5239       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5240                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
5241    tc->max_images =
5242       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5243                                PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
5244    tc->max_samplers =
5245       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
5246                                PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
5247 
5248    tc->base.set_context_param = tc_set_context_param; /* always set this */
5249 
5250 #define CTX_INIT(_member) \
5251    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
5252 
5253    CTX_INIT(flush);
5254    CTX_INIT(draw_vbo);
5255    CTX_INIT(draw_vertex_state);
5256    CTX_INIT(launch_grid);
5257    CTX_INIT(resource_copy_region);
5258    CTX_INIT(blit);
5259    CTX_INIT(clear);
5260    CTX_INIT(clear_render_target);
5261    CTX_INIT(clear_depth_stencil);
5262    CTX_INIT(clear_buffer);
5263    CTX_INIT(clear_texture);
5264    CTX_INIT(flush_resource);
5265    CTX_INIT(generate_mipmap);
5266    CTX_INIT(render_condition);
5267    CTX_INIT(create_query);
5268    CTX_INIT(create_batch_query);
5269    CTX_INIT(destroy_query);
5270    CTX_INIT(begin_query);
5271    CTX_INIT(end_query);
5272    CTX_INIT(get_query_result);
5273    CTX_INIT(get_query_result_resource);
5274    CTX_INIT(set_active_query_state);
5275    CTX_INIT(create_blend_state);
5276    CTX_INIT(bind_blend_state);
5277    CTX_INIT(delete_blend_state);
5278    CTX_INIT(create_sampler_state);
5279    CTX_INIT(bind_sampler_states);
5280    CTX_INIT(delete_sampler_state);
5281    CTX_INIT(create_rasterizer_state);
5282    CTX_INIT(bind_rasterizer_state);
5283    CTX_INIT(delete_rasterizer_state);
5284    CTX_INIT(create_depth_stencil_alpha_state);
5285    CTX_INIT(bind_depth_stencil_alpha_state);
5286    CTX_INIT(delete_depth_stencil_alpha_state);
5287    CTX_INIT(link_shader);
5288    CTX_INIT(create_fs_state);
5289    CTX_INIT(bind_fs_state);
5290    CTX_INIT(delete_fs_state);
5291    CTX_INIT(create_vs_state);
5292    CTX_INIT(bind_vs_state);
5293    CTX_INIT(delete_vs_state);
5294    CTX_INIT(create_gs_state);
5295    CTX_INIT(bind_gs_state);
5296    CTX_INIT(delete_gs_state);
5297    CTX_INIT(create_tcs_state);
5298    CTX_INIT(bind_tcs_state);
5299    CTX_INIT(delete_tcs_state);
5300    CTX_INIT(create_tes_state);
5301    CTX_INIT(bind_tes_state);
5302    CTX_INIT(delete_tes_state);
5303    CTX_INIT(create_compute_state);
5304    CTX_INIT(bind_compute_state);
5305    CTX_INIT(delete_compute_state);
5306    CTX_INIT(create_vertex_elements_state);
5307    CTX_INIT(bind_vertex_elements_state);
5308    CTX_INIT(delete_vertex_elements_state);
5309    CTX_INIT(set_blend_color);
5310    CTX_INIT(set_stencil_ref);
5311    CTX_INIT(set_sample_mask);
5312    CTX_INIT(set_min_samples);
5313    CTX_INIT(set_clip_state);
5314    CTX_INIT(set_constant_buffer);
5315    CTX_INIT(set_inlinable_constants);
5316    CTX_INIT(set_framebuffer_state);
5317    CTX_INIT(set_polygon_stipple);
5318    CTX_INIT(set_sample_locations);
5319    CTX_INIT(set_scissor_states);
5320    CTX_INIT(set_viewport_states);
5321    CTX_INIT(set_window_rectangles);
5322    CTX_INIT(set_sampler_views);
5323    CTX_INIT(set_tess_state);
5324    CTX_INIT(set_patch_vertices);
5325    CTX_INIT(set_shader_buffers);
5326    CTX_INIT(set_shader_images);
5327    CTX_INIT(set_vertex_buffers);
5328    CTX_INIT(create_stream_output_target);
5329    CTX_INIT(stream_output_target_destroy);
5330    CTX_INIT(set_stream_output_targets);
5331    CTX_INIT(create_sampler_view);
5332    CTX_INIT(sampler_view_destroy);
5333    CTX_INIT(create_surface);
5334    CTX_INIT(surface_destroy);
5335    CTX_INIT(buffer_map);
5336    CTX_INIT(texture_map);
5337    CTX_INIT(transfer_flush_region);
5338    CTX_INIT(buffer_unmap);
5339    CTX_INIT(texture_unmap);
5340    CTX_INIT(buffer_subdata);
5341    CTX_INIT(texture_subdata);
5342    CTX_INIT(texture_barrier);
5343    CTX_INIT(memory_barrier);
5344    CTX_INIT(resource_commit);
5345    CTX_INIT(create_video_codec);
5346    CTX_INIT(create_video_buffer);
5347    CTX_INIT(set_compute_resources);
5348    CTX_INIT(set_global_binding);
5349    CTX_INIT(get_sample_position);
5350    CTX_INIT(invalidate_resource);
5351    CTX_INIT(get_device_reset_status);
5352    CTX_INIT(set_device_reset_callback);
5353    CTX_INIT(dump_debug_state);
5354    CTX_INIT(set_log_context);
5355    CTX_INIT(emit_string_marker);
5356    CTX_INIT(set_debug_callback);
5357    CTX_INIT(create_fence_fd);
5358    CTX_INIT(fence_server_sync);
5359    CTX_INIT(fence_server_signal);
5360    CTX_INIT(get_timestamp);
5361    CTX_INIT(create_texture_handle);
5362    CTX_INIT(delete_texture_handle);
5363    CTX_INIT(make_texture_handle_resident);
5364    CTX_INIT(create_image_handle);
5365    CTX_INIT(delete_image_handle);
5366    CTX_INIT(make_image_handle_resident);
5367    CTX_INIT(set_frontend_noop);
5368    CTX_INIT(init_intel_perf_query_info);
5369    CTX_INIT(get_intel_perf_query_info);
5370    CTX_INIT(get_intel_perf_query_counter_info);
5371    CTX_INIT(new_intel_perf_query_obj);
5372    CTX_INIT(begin_intel_perf_query);
5373    CTX_INIT(end_intel_perf_query);
5374    CTX_INIT(delete_intel_perf_query);
5375    CTX_INIT(wait_intel_perf_query);
5376    CTX_INIT(is_intel_perf_query_ready);
5377    CTX_INIT(get_intel_perf_query_data);
5378 #undef CTX_INIT
5379 
5380 #define CALL(name) tc->execute_func[TC_CALL_##name] = tc_call_##name;
5381 #include "u_threaded_context_calls.h"
5382 #undef CALL
5383 
5384    if (out)
5385       *out = tc;
5386 
5387    tc_begin_next_buffer_list(tc);
5388    if (tc->options.parse_renderpass_info)
5389       tc_batch_increment_renderpass_info(tc, tc->next, false);
5390    return &tc->base;
5391 
5392 fail:
5393    tc_destroy(&tc->base);
5394    return NULL;
5395 }
5396 
5397 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)5398 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
5399 {
5400    uint64_t total_ram;
5401    if (os_get_total_physical_memory(&total_ram)) {
5402       tc->bytes_mapped_limit = total_ram / divisor;
5403       if (sizeof(void*) == 4)
5404          tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
5405    }
5406 }
5407 
5408 const struct tc_renderpass_info *
threaded_context_get_renderpass_info(struct threaded_context * tc)5409 threaded_context_get_renderpass_info(struct threaded_context *tc)
5410 {
5411    assert(tc->renderpass_info && tc->options.parse_renderpass_info);
5412    struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info);
5413    while (1) {
5414       util_queue_fence_wait(&info->ready);
5415       if (!info->next)
5416          return &info->info;
5417       info = info->next;
5418    }
5419 }
5420